correction (broken rules - duplicates + parenthese errors)

mthcht · Sep 9, 2024 · 43c3bce · 43c3bce
1 parent c56aac6
commit 43c3bce
Show file tree

Hide file tree

Showing 11 changed files with 139,124 additions and 139,913 deletions.
diff --git a/_utils/create_yara_rules.py b/_utils/create_yara_rules.py
@@ -51,7 +51,7 @@ def safe_tool_name(tool):
     sanitized_tool = tool
     if tool[0].isdigit() or tool.lower() in yara_reserved_keywords:
         sanitized_tool = f"_{tool}"
-    return sanitized_tool.replace('-','_').replace(' ','_').replace('.','_').replace('&','_and_').replace('$','')
+    return sanitized_tool.replace('-','_').replace(' ','_').replace('.','_').replace('&','_and_').replace('$','').replace('(', '_').replace(')', '_')
 
 def generate_yara_rules(output_directory):
     script_directory = os.path.dirname(os.path.realpath(__file__))

diff --git a/_utils/scan.py b/_utils/scan.py
@@ -67,6 +67,8 @@ def get_yara_files(yara_path):
         yara_files.append(yara_path)
     return yara_files
 
+
+
 def scan_and_output(yara_rule_file, file_path, rules, patterns, out_f=None):
     results = []
     try:
@@ -77,26 +79,37 @@ def scan_and_output(yara_rule_file, file_path, rules, patterns, out_f=None):
                 if matches:
                     for match in matches:  # Loop through each match object
                         for string_match in match.strings:  # Loop through each string match inside the match object
-                            string_pattern = patterns.get(string_match[1], 'unknown (error)')
-                            rule_name = str(yara_rule_file)
-                            try:
-                                matched_string_UTF16 = string_match[2].decode('utf-16')
-                            except UnicodeDecodeError:
-                                matched_string_UTF16 = "<Undecodable data>"
-
-                            result_dict = {
-                                'rule_name': rule_name,
-                                'file_path': str(file_path),
-                                'offset': string_match[0],
-                                'string_id': string_match[1],
-                                'string_pattern': string_pattern,
-                                'matched_string_UTF8': string_match[2].decode('utf-8', 'ignore'),
-                                'matched_string_UTF16': matched_string_UTF16
-                            }
-                            results.append(result_dict)
-
-                            key = (string_pattern, rule_name, str(file_path))
-                            summary_count[key] = summary_count.get(key, 0) + 1
+                            # Access the identifier (string ID)
+                            print(f"String ID: {string_match.identifier}")
+
+                            # Loop through the instances for this string_match to get the data
+                            for instance in string_match.instances:
+                                print(f"Offset: {instance.offset}")
+                                print(f"Matched data (raw bytes): {instance.matched_data}")
+                                print(f"Matched data (UTF-8 decoded): {instance.matched_data.decode('utf-8', 'ignore')}")
+
+                                try:
+                                    utf16_decoded = instance.matched_data.decode('utf-16')
+                                    print(f"Matched data (UTF-16 decoded): {utf16_decoded}")
+                                except UnicodeDecodeError:
+                                    print("UTF-16 decoding failed")
+
+                                string_pattern = patterns.get(string_match.identifier, 'unknown (error)')
+                                rule_name = str(yara_rule_file)
+
+                                result_dict = {
+                                    'rule_name': rule_name,
+                                    'file_path': str(file_path),
+                                    'offset': instance.offset,
+                                    'string_id': string_match.identifier,
+                                    'string_pattern': string_pattern,
+                                    'matched_string_UTF8': instance.matched_data.decode('utf-8', 'ignore'),
+                                    'matched_string_UTF16': utf16_decoded if 'utf16_decoded' in locals() else "<Undecodable data>"
+                                }
+                                results.append(result_dict)
+
+                                key = (string_pattern, rule_name, str(file_path))
+                                summary_count[key] = summary_count.get(key, 0) + 1
 
                     formatted_results = json.dumps(results, indent=4)
                     print(formatted_results)
@@ -108,7 +121,8 @@ def scan_and_output(yara_rule_file, file_path, rules, patterns, out_f=None):
             print(f"Skipping {file_path}. File size exceeds 64MB.")
     except PermissionError:
         print(f"Permission denied for {file_path}. Skipping.")
-
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Scan a file or directory with one or multiple YARA rules")
     parser.add_argument("-y", "--yara", required=True, help="Path to the YARA rule file(s) or directory containing them")