From 844fc686dffa641c17143010b451b773e371a62b Mon Sep 17 00:00:00 2001 From: cohaereo Date: Thu, 28 Mar 2024 20:04:51 +0100 Subject: [PATCH] Fix scanning files that aren't aligned to 8 bytes --- src/scanner.rs | 81 +++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 54 deletions(-) diff --git a/src/scanner.rs b/src/scanner.rs index a8968a8..43bf9db 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -105,68 +105,24 @@ pub fn scan_file(context: &ScannerContext, data: &[u8]) -> ScanResult { let mut r = ScanResult::default(); - if data.len() >= 8 { - for (i, v) in data.chunks_exact(8).enumerate() { - let m: [u8; 8] = v.try_into().unwrap(); - let m32_1: [u8; 4] = v[0..4].try_into().unwrap(); - let m32_2: [u8; 4] = v[4..8].try_into().unwrap(); - let value64 = u64_from_endian(context.endian, m); - let value_hi = u32_from_endian(context.endian, m32_1); - let value_lo = u32_from_endian(context.endian, m32_2); - let offset_u64 = (i * 8) as u64; - - let hash = TagHash64(value64); - { - profiling::scope!("check 64 bit hash"); - if context.valid_file_hashes64.binary_search(&hash).is_ok() { - profiling::scope!("insert 64 bit hash"); - r.file_hashes64.push(ScannedHash { - offset: offset_u64, - hash, - }); - } - } - - profiling::scope!("32 bit chunks"); - for (vi, value) in [value_hi, value_lo].into_iter().enumerate() { - let offset = offset_u64 + (vi * 4) as u64; - let hash = TagHash(value); - - if hash.is_pkg_file() && context.valid_file_hashes.binary_search(&hash).is_ok() { - r.file_hashes.push(ScannedHash { offset, hash }); - } - - if value == 0x80800065 { - r.raw_strings.extend( - read_raw_string_blob(data, offset) - .into_iter() - .map(|(_, s)| s), - ); - } - - if value != 0x811c9dc5 && context.known_string_hashes.binary_search(&value).is_ok() - { - r.string_hashes.push(ScannedHash { - offset, - hash: value, - }); - } - } + for offset in (0..data.len()).step_by(4) { + if offset + 4 > data.len() { + break; } - } else if data.len() >= 4 { - // Handle files shorter than 8 bytes separately - let m: [u8; 4] = data[0..4].try_into().unwrap(); + let m: [u8; 4] = data[offset..offset + 4].try_into().unwrap(); let value = u32_from_endian(context.endian, m); - let offset = 0; let hash = TagHash(value); if hash.is_pkg_file() && context.valid_file_hashes.binary_search(&hash).is_ok() { - r.file_hashes.push(ScannedHash { offset, hash }); + r.file_hashes.push(ScannedHash { + offset: offset as u64, + hash, + }); } if value == 0x80800065 { r.raw_strings.extend( - read_raw_string_blob(data, offset) + read_raw_string_blob(data, offset as u64) .into_iter() .map(|(_, s)| s), ); @@ -174,10 +130,27 @@ pub fn scan_file(context: &ScannerContext, data: &[u8]) -> ScanResult { if value != 0x811c9dc5 && context.known_string_hashes.binary_search(&value).is_ok() { r.string_hashes.push(ScannedHash { - offset, + offset: offset as u64, hash: value, }); } + + if (offset % 8) == 0 && offset + 8 <= data.len() { + let m: [u8; 8] = data[offset..offset + 8].try_into().unwrap(); + let value64 = u64_from_endian(context.endian, m); + + let hash = TagHash64(value64); + { + profiling::scope!("check 64 bit hash"); + if context.valid_file_hashes64.binary_search(&hash).is_ok() { + profiling::scope!("insert 64 bit hash"); + r.file_hashes64.push(ScannedHash { + offset: offset as u64, + hash, + }); + } + } + } } r