Skip to content

Commit

Permalink
Remove to/from bits
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Sep 19, 2024
1 parent cce780c commit af226d8
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 37 deletions.
5 changes: 3 additions & 2 deletions python/akimbo_ip/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def parse_address4(str_arr):
Output will be fixed length 4 bytestring array
"""
out = lib.parse4(str_arr.offsets.data.astype("uint32"), str_arr.content.data)
return utils.u8_to_ip4(out.view("uint8"))
out, valid = lib.parse4(str_arr.offsets.data.astype("uint32"), str_arr.content.data)
return ak.contents.ByteMaskedArray(ak.index.Index8(valid), utils.u8_to_ip4(out.view("uint8")), True)


def parse_address6(str_arr):
Expand Down Expand Up @@ -216,6 +216,7 @@ def to_bytestring(arr):

def to_ip4(arr):
if arr.is_leaf:
# any 4-byte type like uint32
return arr.data.view("uint32"),
else:
# bytestring or 4 * uint8 regular
Expand Down
117 changes: 86 additions & 31 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ fn to_text4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>)
let mut data: Vec<u8> = Vec::new();
for out in x.as_array().iter()
{
data.extend(Ipv4Addr::from_bits(*out).to_string().as_bytes());
let (a, b, c, d) = out.to_be_bytes().into();
data.extend(Ipv4Addr::new(a, b, c, d).to_string().as_bytes());
offsets.push(data.len() as u32);
};
Ok((data.into_pyarray_bound(py), offsets.into_pyarray_bound(py)))
Expand All @@ -35,17 +36,20 @@ fn to_text4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>)
#[pyfunction]
fn parse4<'py>(py: Python<'py>, offsets: PyReadonlyArray1<'py, u32>,
data : PyReadonlyArray1<'py, u8>
) -> PyResult<Bound<'py, PyArray1<u32>>> {
) -> PyResult<(Bound<'py, PyArray1<u32>>, Bound<'py, PyArray1<u8>>)> {
let ar = offsets.as_array();
let sl = ar.as_slice().unwrap();
let ar2 = data.as_array();
let by = ar2.as_slice().unwrap();
let out: Vec<u32> = sl.windows(2).map(
let (out, valid): (Vec<u32>, Vec<u8>) = sl.windows(2).map(
|w| {
Ipv4Addr::parse_ascii(&by[w[0] as usize..w[1] as usize]).unwrap().to_bits()
match Ipv4Addr::parse_ascii(&by[w[0] as usize..w[1] as usize]) {
Ok(x) => (u32::from_ne_bytes(x.octets()), 1u8),
Err(_) => (0u32, 0u8)
}
}
).collect();
Ok(out.into_pyarray_bound(py))
).unzip();
Ok((out.into_pyarray_bound(py), valid.into_pyarray_bound(py)))
}

#[pyfunction]
Expand Down Expand Up @@ -91,7 +95,7 @@ fn parsenet4<'py>(py: Python<'py>,
for w in sl.windows(2) {
let net = Ipv4Net::from_str(
&str::from_utf8(&by[w[0] as usize..w[1] as usize]).unwrap()).unwrap();
outaddr.push(net.addr().to_bits());
outaddr.push(u32::from_ne_bytes(net.addr().octets()));
outpref.push(net.prefix_len());
};
Ok((outaddr.into_pyarray_bound(py), outpref.into_pyarray_bound(py)))
Expand Down Expand Up @@ -121,8 +125,11 @@ fn hosts4<'py>(py: Python<'py>,
let mut out: Vec<u32> = Vec::new();
let mut offsets: Vec<u64> = Vec::from([0]);
for (&add, &pre) in addr.as_array().iter().zip(pref.as_array()) {
let hosts = Ipv4Net::new(Ipv4Addr::from_bits(add), pre).unwrap().hosts();
out.extend(hosts.map(|ip|ip.to_bits()));
let hosts = Ipv4Net::new({
let (a, b, c, d) = add.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d)
}, pre).unwrap().hosts();
out.extend(hosts.map(|ip|u32::from_ne_bytes(ip.octets())));
offsets.push(out.len() as u64);
};
Ok((out.into_pyarray_bound(py), offsets.into_pyarray_bound(py)))
Expand All @@ -134,7 +141,7 @@ fn hostmask4<'py>(py: Python<'py>,
pref: PyReadonlyArray1<'py, u8>,
) -> PyResult<Bound<'py, PyArray1<u32>>> {
let out: Vec<u32> = pref.as_array().iter().map(
|x| u32::max_value() >> x
|x| u32::from_ne_bytes(Ipv4Net::new(Ipv4Addr::new(0, 0, 0, 0), *x).unwrap().hostmask().octets())
).collect();
Ok(out.into_pyarray_bound(py))
}
Expand All @@ -145,9 +152,8 @@ fn hostmask4<'py>(py: Python<'py>,
fn netmask4<'py>(py: Python<'py>,
pref: PyReadonlyArray1<'py, u8>,
) -> PyResult<Bound<'py, PyArray1<u32>>> {
// TODO: check for prefix >= 128 .checked_shl(prefix).unwrap_or(0)
let out: Vec<u32> = pref.as_array().iter().map(
|x| u32::max_value() << (32 - x)
|x| u32::from_ne_bytes(Ipv4Net::new(Ipv4Addr::new(0, 0, 0, 0), *x).unwrap().netmask().octets())
).collect();
Ok(out.into_pyarray_bound(py))
}
Expand All @@ -159,7 +165,10 @@ fn network4<'py>(py: Python<'py>,
pref: PyReadonlyArray1<'py, u8>,
) -> PyResult<Bound<'py, PyArray1<u32>>> {
let out: Vec<u32> = addr.as_array().iter().zip(pref.as_array().iter()).map(
| (&add, &pre) | Ipv4Net::new(Ipv4Addr::from_bits(add), pre).unwrap().network().to_bits()
| (&add, &pre) | u32::from_ne_bytes(Ipv4Net::new({
let (a, b, c, d) = add.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d)
}, pre).unwrap().network().octets())
).collect();
Ok(out.into_pyarray_bound(py))
}
Expand All @@ -172,7 +181,10 @@ fn broadcast4<'py>(py: Python<'py>,
pref: PyReadonlyArray1<'py, u8>,
) -> PyResult<Bound<'py, PyArray1<u32>>> {
let out: Vec<u32> = addr.as_array().iter().zip(pref.as_array().iter()).map(
| (&add, &pre) | Ipv4Net::new(Ipv4Addr::from_bits(add), pre).unwrap().broadcast().to_bits()
| (&add, &pre) | u32::from_ne_bytes(Ipv4Net::new({
let (a, b, c, d) = add.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d)
}, pre).unwrap().broadcast().octets())
).collect();
Ok(out.into_pyarray_bound(py))
}
Expand All @@ -183,7 +195,10 @@ fn trunc4<'py>(py: Python<'py>,
pref: PyReadonlyArray1<'py, u8>,
) -> PyResult<Bound<'py, PyArray1<u32>>> {
let out: Vec<u32> = addr.as_array().iter().zip(pref.as_array().iter()).map(
| (&add, &pre) | Ipv4Net::new(Ipv4Addr::from_bits(add), pre).unwrap().trunc().addr().to_bits()
| (&add, &pre) | u32::from_ne_bytes(Ipv4Net::new({
let (a, b, c, d) = add.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d)
}, pre).unwrap().trunc().addr().octets())
).collect();
Ok(out.into_pyarray_bound(py))
}
Expand All @@ -194,7 +209,10 @@ fn supernet4<'py>(py: Python<'py>,
pref: PyReadonlyArray1<'py, u8>,
) -> PyResult<Bound<'py, PyArray1<u32>>> {
let out: Vec<u32> = addr.as_array().iter().zip(pref.as_array().iter()).map(
| (&add, &pre) | Ipv4Net::new(Ipv4Addr::from_bits(add), pre).unwrap().supernet().unwrap().addr().to_bits()
| (&add, &pre) | u32::from_ne_bytes(Ipv4Net::new({
let (a, b, c, d) = add.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d)
}, pre).unwrap().supernet().unwrap().addr().octets())
).collect();
Ok(out.into_pyarray_bound(py))
}
Expand All @@ -211,10 +229,13 @@ fn subnets4<'py>(py: Python<'py>,
counts.push(0);
addr.as_array().iter().zip(pref.as_array().iter()).for_each(
| (&add, &pre) | {
Ipv4Net::new(Ipv4Addr::from_bits(add), pre).unwrap().subnets(new_pref).unwrap().for_each(
Ipv4Net::new({
let (a, b, c, d) = add.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d)
}, pre).unwrap().subnets(new_pref).unwrap().for_each(
|x|{
count += 1;
out.push(x.addr().to_bits())
out.push(u32::from_ne_bytes(x.addr().octets()))
}
);
counts.push(count);
Expand Down Expand Up @@ -247,12 +268,13 @@ fn aggregate4<'py>(py: Python<'py>,
for w in offs {
networks.clear();
while count_in < *w {
networks.push(Ipv4Net::new(Ipv4Addr::from_bits(*ad_slice.next().unwrap()), *pr_slice.next().unwrap()).unwrap());
let (a, b, c, d): (u8, u8, u8, u8) = ad_slice.next().unwrap().to_ne_bytes().into();
networks.push(Ipv4Net::new(Ipv4Addr::new(a, b, c, d), *pr_slice.next().unwrap()).unwrap());
count_in += 1;
};
Ipv4Net::aggregate(&networks).iter().for_each(
|x| {
out_addr.push(x.addr().to_bits());
out_addr.push(u32::from_ne_bytes(x.addr().octets()));
out_pref.push(x.prefix_len());
count += 1;
});
Expand All @@ -264,67 +286,100 @@ fn aggregate4<'py>(py: Python<'py>,

#[pyfunction]
fn is_broadcast4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_broadcast()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_broadcast()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_global4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_global()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_global()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_unspecified4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_unspecified()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_unspecified()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_loopback4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_loopback()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_loopback()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_private4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_private()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_private()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_link_local4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_link_local()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_link_local()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_shared4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_shared()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_shared()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_benchmarking4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_benchmarking()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_benchmarking()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_reserved4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_reserved()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_reserved()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_multicast4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_multicast()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_multicast()
}).collect();
Ok(out.into_pyarray_bound(py))
}

#[pyfunction]
fn is_documentation4<'py>(py: Python<'py>, x: PyReadonlyArray1<'py, u32>) -> PyResult<Bound<'py, PyArray1<bool>>> {
let out: Vec<bool> = x.as_array().iter().map(|&x|Ipv4Addr::from_bits(x).is_documentation()).collect();
let out: Vec<bool> = x.as_array().iter().map(|&x|{
let (a, b, c, d) = x.to_ne_bytes().into();
Ipv4Addr::new(a, b, c, d).is_documentation()
}).collect();
Ok(out.into_pyarray_bound(py))
}

Expand Down
6 changes: 3 additions & 3 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ def test_rename():

def test_inner_list_hosts():
# note: both addresses are rounded down
s = pd.DataFrame({"address": pd.Series([1, 2], dtype="u4"),
s = pd.DataFrame({"address": pd.Series([b"\x00\x00\x00\x00", b"\x01\x00\x00\x00"], dtype=bytestring4),
"prefix": pd.Series([31, 29], dtype="u1")}).ak.merge()
out = s.ak.ip.hosts4()
assert out.to_list() == [
# includes gateway/broadcast
[b'\x00\x00\x00\x00', b'\x01\x00\x00\x00'],
[b'\x00\x00\x00\x00', b'\x00\x00\x00\x01'],
# does not include gateway/broadcast
[b'\x01\x00\x00\x00', b'\x02\x00\x00\x00', b'\x03\x00\x00\x00', b'\x04\x00\x00\x00', b'\x05\x00\x00\x00', b'\x06\x00\x00\x00']
[b'\x01\x00\x00\x01', b'\x01\x00\x00\x02', b'\x01\x00\x00\x03', b'\x01\x00\x00\x04', b'\x01\x00\x00\x05', b'\x01\x00\x00\x06']
]


Expand Down
10 changes: 9 additions & 1 deletion tests/test_std.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_network4():
assert (out == expected).all()


def test_hostmast4():
def test_hostmask4():
s = pd.Series(["10.1.0.0/20"]).ak.ip.parse_net4()
expected = pd.Series("0.0.15.255").ak.ip.parse_address4()
out = s.ak.ip.hostmask4()
Expand Down Expand Up @@ -78,3 +78,11 @@ def test_aggregate4():
]]).ak.ip.parse_net4()
out = s.ak.ip.aggregate4()
assert out.ak.to_list() == expected.ak.to_list()


def test_parse4():
s = pd.Series(["127.0.0.1", "broken"])
out = s.ak.ip.parse_address4().ak.ip.to_int_list()
assert out.tolist() == [[127, 0, 0, 1], pd.NA]


0 comments on commit af226d8

Please sign in to comment.