From cc644233a6b9596d8a0c312ce37cce13f9026a4b Mon Sep 17 00:00:00 2001 From: cohaereo Date: Sat, 23 Mar 2024 23:25:26 +0100 Subject: [PATCH] scanner: Handle files between 4-7 bytes Fixes an edge case where the scanner encounters a file with 4 bytes and a hash in it. as chunks_exact(8) normally skips over them --- src/scanner.rs | 111 +++++++++++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 45 deletions(-) diff --git a/src/scanner.rs b/src/scanner.rs index c2a40ed..87c5d2d 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -105,58 +105,79 @@ pub fn scan_file(context: &ScannerContext, data: &[u8]) -> ScanResult { let mut r = ScanResult::default(); - for (i, v) in data.chunks_exact(8).enumerate() { - let m: [u8; 8] = v.try_into().unwrap(); - let m32_1: [u8; 4] = v[0..4].try_into().unwrap(); - let m32_2: [u8; 4] = v[4..8].try_into().unwrap(); - let value64 = u64_from_endian(context.endian, m); - let value_hi = u32_from_endian(context.endian, m32_1); - let value_lo = u32_from_endian(context.endian, m32_2); - let offset_u64 = (i * 8) as u64; - - let hash = TagHash64(value64); - { - profiling::scope!("check 64 bit hash"); - if context.valid_file_hashes64.binary_search(&hash).is_ok() { - profiling::scope!("insert 64 bit hash"); - r.file_hashes64.push(ScannedHash { - offset: offset_u64, - hash, - }); + if data.len() >= 8 { + for (i, v) in data.chunks_exact(8).enumerate() { + let m: [u8; 8] = v.try_into().unwrap(); + let m32_1: [u8; 4] = v[0..4].try_into().unwrap(); + let m32_2: [u8; 4] = v[4..8].try_into().unwrap(); + let value64 = u64_from_endian(context.endian, m); + let value_hi = u32_from_endian(context.endian, m32_1); + let value_lo = u32_from_endian(context.endian, m32_2); + let offset_u64 = (i * 8) as u64; + + let hash = TagHash64(value64); + { + profiling::scope!("check 64 bit hash"); + if context.valid_file_hashes64.binary_search(&hash).is_ok() { + profiling::scope!("insert 64 bit hash"); + r.file_hashes64.push(ScannedHash { + offset: offset_u64, + hash, + }); + } } - } - profiling::scope!("32 bit chunks"); - for (vi, value) in [value_hi, value_lo].into_iter().enumerate() { - let offset = offset_u64 + (vi * 4) as u64; - let hash = TagHash(value); + profiling::scope!("32 bit chunks"); + for (vi, value) in [value_hi, value_lo].into_iter().enumerate() { + let offset = offset_u64 + (vi * 4) as u64; + let hash = TagHash(value); - if hash.is_pkg_file() && context.valid_file_hashes.binary_search(&hash).is_ok() { - r.file_hashes.push(ScannedHash { offset, hash }); - } + if hash.is_pkg_file() && context.valid_file_hashes.binary_search(&hash).is_ok() { + r.file_hashes.push(ScannedHash { offset, hash }); + } - // if hash.is_valid() && !hash.is_pkg_file() { - // r.classes.push(ScannedHash { - // offset, - // hash: value, - // }); - // } - - if value == 0x80800065 { - r.raw_strings.extend( - read_raw_string_blob(data, offset) - .into_iter() - .map(|(_, s)| s), - ); - } + if value == 0x80800065 { + r.raw_strings.extend( + read_raw_string_blob(data, offset) + .into_iter() + .map(|(_, s)| s), + ); + } - if value != 0x811c9dc5 && context.known_string_hashes.binary_search(&value).is_ok() { - r.string_hashes.push(ScannedHash { - offset, - hash: value, - }); + if value != 0x811c9dc5 && context.known_string_hashes.binary_search(&value).is_ok() + { + r.string_hashes.push(ScannedHash { + offset, + hash: value, + }); + } } } + } else if data.len() >= 4 { + // Handle files shorter than 8 bytes separately + let m: [u8; 4] = data[0..4].try_into().unwrap(); + let value = u32_from_endian(context.endian, m); + let offset = 0; + let hash = TagHash(value); + + if hash.is_pkg_file() && context.valid_file_hashes.binary_search(&hash).is_ok() { + r.file_hashes.push(ScannedHash { offset, hash }); + } + + if value == 0x80800065 { + r.raw_strings.extend( + read_raw_string_blob(data, offset) + .into_iter() + .map(|(_, s)| s), + ); + } + + if value != 0x811c9dc5 && context.known_string_hashes.binary_search(&value).is_ok() { + r.string_hashes.push(ScannedHash { + offset, + hash: value, + }); + } } r