From bbb1556dba43e0452aad6f2ce55381a0da3b446b Mon Sep 17 00:00:00 2001 From: Abhi Date: Thu, 21 Mar 2024 00:02:52 +0530 Subject: [PATCH 1/8] Remove `egg` depreciation with support for >=python3.6 --- .gitignore | 0 README.md | 7 +- pyproject.toml | 38 +++ pyxamstore/__init__.py | 729 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 6 +- setup.py | 38 --- 6 files changed, 773 insertions(+), 45 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 README.md create mode 100755 pyproject.toml mode change 100644 => 100755 requirements.txt delete mode 100644 setup.py diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 854a924..dc1612d --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ This is an alpha release of an `assemblies.blob` AssemblyStore parser written in Python. The tool is capable of unpack and repackaging `assemblies.blob` and `assemblies.manifest` Xamarin files from an APK. ## Installing -Run the installer script: - - python setup.py install +```shell + pip3 install build && python3 -m build && pip install --force-reinstall dist/pyxamstore-1.0.0-py3-none-any.whl +``` You can then use the tool by calling `pyxamstore` @@ -28,5 +28,4 @@ From here you'll need to copy the new manifest and blobs as well as repackage/si Additional file format details can be found on my [personal website](https://www.thecobraden.com/posts/unpacking_xamarin_assembly_stores/). # Known Limitations -* Python3 support (working on it!) * DLLs that have debug/config data associated with them diff --git a/pyproject.toml b/pyproject.toml new file mode 100755 index 0000000..bca2d87 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[tool.poetry] +name = "pyxamstore" +version = "1.0.0" +description = "Python utility for parsing Xamarin AssemblyStore blob files" +authors = ["jakev", "AbhiTheModder"] +keywords = ["android", "device", "security", "mobile", "reverse-engineering", "Xamarin", "AssemblyStore", "reverse", "hacking"] + +classifiers = [ + 'Development Status :: 3 - Alpha', + 'Natural Language :: English', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', +] +license = "MIT" +readme = "README.md" +homepage = "https://github.com/jakev/pyxamstore" +repository = "https://github.com/jakev/pyxamstore" +documentation = "https://github.com/jakev/pyxamstore" + +[tool.poetry.dependencies] +python = "^3.6" +docopt = "^0.6.2" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +pyxamstore = "pyxamstore:main" \ No newline at end of file diff --git a/pyxamstore/__init__.py b/pyxamstore/__init__.py index e69de29..17252d0 100644 --- a/pyxamstore/__init__.py +++ b/pyxamstore/__init__.py @@ -0,0 +1,729 @@ +"""Pack and unpack Xamarin AssemblyStore files""" + +from __future__ import print_function +from builtins import object +import struct +import argparse +import os +import os.path +import sys +import json +import shutil + +import lz4.block +import xxhash + +from . import constants + +# Enable debugging here. +DEBUG = False + +def debug(message): + + """Print a debuggable message""" + + if DEBUG: + print("[debug] %s" % message) + + +class ManifestEntry(object): + + """Element in Manifest""" + + hash32 = "" + hash64 = "" + blob_id = 0 + blob_idx = 0 + name = "" + + def __init__(self, hash32, hash64, blob_id, blob_idx, name): + + """Initialize item""" + + self.hash32 = hash32 + self.hash64 = hash64 + self.blob_id = int(blob_id) + self.blob_idx = int(blob_idx) + self.name = name + + +class ManifestList(list): + + """List of manifest entries""" + + def get_idx(self, blob_id, blob_idx): + + """Find entry by ID""" + + for entry in self: + if entry.blob_idx == blob_idx and entry.blob_id == blob_id: + return entry + return None + + +class AssemblyStoreAssembly(object): + + """Assembly Details""" + + data_offset = 0 + data_size = 0 + debug_data_offset = 0 + debug_data_size = 0 + config_data_offset = 0 + config_data_size = 0 + + def __init__(self): + pass + + +class AssemblyStoreHashEntry(object): + + """Hash Details""" + + hash_val = "" + mapping_index = 0 + local_store_index = 0 + store_id = 0 + + def __init__(self): + pass + + +class AssemblyStore(object): + + """AssemblyStore object""" + + raw = "" + + file_name = "" + + manifest_entries = None + + hdr_magic = "" + hdr_version = 0 + hdr_lec = 0 + hdr_gec = 0 + hdr_store_id = 0 + + assembly_list = None + global_hash32 = None + global_hash64 = None + + def __init__(self, in_file_name, manifest_entries, primary=True): + + """Parse and read store""" + + self.manifest_entries = manifest_entries + self.file_name = os.path.basename(in_file_name) + + blob_file = open(in_file_name, "rb") + + self.raw = blob_file.read() + + blob_file.seek(0) + + # Header Section + # + # 0 - 3: Magic + # 4 - 7: Version + # 8 - 11: LocalEntryCount + # 12 - 15: GlobalEntryCount + # 16 - 19: StoreID + + magic = blob_file.read(4) + if magic != constants.ASSEMBLY_STORE_MAGIC: + raise Exception("Invalid Magic: %s" % magic) + + version = struct.unpack("I", blob_file.read(4))[0] + if version > constants.ASSEMBLY_STORE_FORMAT_VERSION: + raise Exception("This version is higher than expected! Max = %d, got %d" + % constants.ASSEMBLY_STORE_FORMAT_VERSION, version) + + self.hdr_version = version + + self.hdr_lec = struct.unpack("I", blob_file.read(4))[0] + self.hdr_gec = struct.unpack("I", blob_file.read(4))[0] + self.hdr_store_id = struct.unpack("I", blob_file.read(4))[0] + + debug("Local entry count: %d" % self.hdr_lec) + debug("Global entry count: %d" % self.hdr_gec) + + self.assemblies_list = list() + + debug("Entries start at: %d (0x%x)" % (blob_file.tell(), blob_file.tell())) + + i = 0 + while i < self.hdr_lec: + + # 0 - 3: DataOffset + # 4 - 7: DataSize + # 8 - 11: DebugDataOffset + # 12 - 15: DebugDataSize + # 16 - 19: ConfigDataOffset + # 20 - 23: ConfigDataSize + + debug("Extracting Assembly: %d (0x%x)" % (blob_file.tell(), blob_file.tell())) + entry = blob_file.read(24) + + assembly = AssemblyStoreAssembly() + + assembly.data_offset = struct.unpack("I", entry[0:4])[0] + assembly.data_size = struct.unpack("I", entry[4:8])[0] + assembly.debug_data_offset = struct.unpack("I", entry[8:12])[0] + assembly.debug_data_size = struct.unpack("I", entry[12:16])[0] + assembly.config_data_offset = struct.unpack("I", entry[16:20])[0] + assembly.config_data_size = struct.unpack("I", entry[20:24])[0] + + self.assemblies_list.append(assembly) + + debug(" Data Offset: %d (0x%x)" % (assembly.data_offset, assembly.data_offset)) + debug(" Data Size: %d (0x%x)" % (assembly.data_size, assembly.data_size)) + debug(" Config Offset: %d (0x%x)" % (assembly.config_data_offset, assembly.config_data_offset)) + debug(" Config Size: %d (0x%x)" % (assembly.config_data_size, assembly.config_data_size)) + debug(" Debug Offset: %d (0x%x)" % (assembly.debug_data_offset, assembly.debug_data_offset)) + debug(" Debug Size: %d (0x%x)" % (assembly.debug_data_size, assembly.debug_data_size)) + + i += 1 + + if not primary: + debug("Skipping hash sections in non-primary store") + return + + # Parse Hash data + # + # The following 2 sections are _required_ to be in order from + # lowest to highest (e.g. 0x00000000 to 0xffffffff). + # Since you're very likely not going to be adding assemblies + # (or renaming) to the store, I'm going to store the hashes with the + # assemblies.json to make sorting easier when packing. + + debug("Hash32 start at: %d (0x%x)" % (blob_file.tell(), blob_file.tell())) + self.global_hash32 = list() + + i = 0 + while i < self.hdr_lec: + + entry = blob_file.read(20) + + hash_entry = AssemblyStoreHashEntry() + + hash_entry.hash_val = "0x%08x" % struct.unpack("") + print("") + print(" MODES:") + print("\tunpack Unpack assembly blobs.") + print("\tpack Repackage assembly blobs.") + print("\thash file_name Generate xxHash values.") + print("\thelp Print this message.") + + return 0 + + +def do_unpack(in_directory, in_arch, force): + + """Unpack a assemblies.blob/manifest""" + + arch_assemblies = False + + if force and os.path.isdir("out/"): + shutil.rmtree("out/") + + # First check if all files exist. + if os.path.isdir("out/"): + print("Out directory already exists!") + return 3 + + manifest_path = os.path.join(in_directory, constants.FILE_ASSEMBLIES_MANIFEST) + assemblies_path = os.path.join(in_directory, constants.FILE_ASSEMBLIES_BLOB) + + if not os.path.isfile(manifest_path): + print("Manifest file '%s' does not exist!" % manifest_path) + return 4 + elif not os.path.isfile(assemblies_path): + print("Main assemblies blob '%s' does not exist!" % assemblies_path) + return 4 + + # The manifest will have all entries (regardless of which + # *.blob they're found in. Parse this first, and then handle + # each blob. + + manifest_entries = read_manifest(manifest_path) + if manifest_entries is None: + print("Unable to parse assemblies.manifest file!") + return 5 + + json_data = dict() + json_data['stores'] = list() + json_data['assemblies'] = list() + + os.mkdir("out/") + + assembly_store = AssemblyStore(assemblies_path, manifest_entries) + + if assembly_store.hdr_lec != assembly_store.hdr_gec: + arch_assemblies = True + debug("There are more assemblies to unpack here!") + + # Do extraction. + json_data = assembly_store.extract_all(json_data) + + # What about architecture assemblies? + if arch_assemblies: + arch_assemblies_path = os.path.join(in_directory, + constants.ARCHITECTURE_MAP[in_arch]) + + arch_assembly_store = AssemblyStore(arch_assemblies_path, + manifest_entries, + primary=False) + json_data = arch_assembly_store.extract_all(json_data) + + # Save the large config out. + with open(constants.FILE_ASSEMBLIES_JSON, 'w') as assembly_file: + assembly_file.write(json.dumps(json_data, indent=4)) + +def do_pack(in_json_config): + + """Create new assemblies.blob/manifest""" + + if not os.path.isfile(in_json_config): + print("Config file '%s' does not exist!" % in_json_config) + return -1 + + if os.path.isfile("assemblies.manifest.new"): + print("Output manifest exists!") + return -2 + + + if os.path.isfile("assemblies.blob.new"): + print("Output blob exists!") + return -3 + + json_data = None + with open(in_json_config, "r") as json_f: + json_data = json.load(json_f) + + # Write new assemblies.manifest + print("Writing 'assemblies.manifest.new'...") + assemblies_manifest_f = open("assemblies.manifest.new", "w") + + assemblies_manifest_f.write("Hash 32 Hash 64 ") + assemblies_manifest_f.write("Blob ID Blob idx Name\r\n") + + #for _, store_json in json_data['stores'].items(): + for assembly in json_data['assemblies']: + hash32, hash64 = gen_xxhash(assembly['name']) + + line = ("0x%08s 0x%016s %03d %04d %s\r\n" + % (hash32, hash64, assembly['store_id'], + assembly['blob_idx'], assembly['name'])) + + assemblies_manifest_f.write(line) + + assemblies_manifest_f.close() + + # This is hacky, but we need the lec/gec if there are multiple stores. + store_zero_lec = 0 + for assembly_store in json_data['stores']: + for store_name, store_data in list(assembly_store.items()): + if store_name == "assemblies.blob": + store_zero_lec = store_data['header']['lec'] + + # Next do the blobs. + for assembly_store in json_data['stores']: + for store_name, store_data in list(assembly_store.items()): + + out_store_name = "%s.new" % store_name + + # Pack the new AssemblyStore structure + print("Writing '%s'..." % out_store_name) + assemblies_blob_f = open(out_store_name, "wb") + + # Write header + json_hdr = store_data['header'] + assemblies_blob_f.write(struct.pack("4sIIII", + constants.ASSEMBLY_STORE_MAGIC, + json_hdr['version'], + json_hdr['lec'], + json_hdr['gec'], + json_hdr['store_id'])) + + # Offsets are weird. + # If this is a primary store, the data is: + # -header + # -ASA header + # -hash32 + # -hash64 + # -ASA data + # But a non-primary does not have hashes. Best to determine early + # if this is primary and act accordingly throughout. + primary = bool(json_hdr['store_id'] == 0) + + next_entry_offset = 20 + next_data_offset = 20 + (json_hdr['lec'] * 24) + (json_hdr['gec'] * 40) + + if not primary: + next_data_offset = 20 + (json_hdr['lec'] * 24) + + # First pass: Write the entries + DLL content. + for assembly in json_data['assemblies']: + + if assembly['store_id'] != json_hdr['store_id']: + debug("Skipping assembly for another store") + continue + + assembly_data = open(assembly['file'], "rb").read() + if assembly['lz4']: + assembly_data = lz4_compress(assembly_data, + assembly['lz4_desc_idx']) + + data_size = len(assembly_data) + + # Write the entry data + assemblies_blob_f.seek(next_entry_offset) + assemblies_blob_f.write(struct.pack("IIIIII", + next_data_offset, + data_size, + 0, 0, 0, 0)) + + # Write binary data + assemblies_blob_f.seek(next_data_offset) + assemblies_blob_f.write(assembly_data) + + # Move all offsets forward. + next_data_offset += data_size + next_entry_offset += 24 + + # Second + third pass: sort the hashes and write them + # But skip if not primary. + if not primary: + assemblies_blob_f.close() + continue + + next_hash32_offset = 20 + (json_hdr['lec'] * 24) + next_hash64_offset = 20 + (json_hdr['lec'] * 24) + (json_hdr['gec'] * 20) + + assembly_data = json_data["assemblies"] + + # hash32 + for assembly in sorted(assembly_data, key=lambda d: d['hash32']): + + # Hash sections + hash32, hash64 = gen_xxhash(assembly['name'], raw=True) + mapping_id = assembly['blob_idx'] if assembly['store_id'] == 0 else store_zero_lec + assembly['blob_idx'] + + # Write the hash32 + assemblies_blob_f.seek(next_hash32_offset) + assemblies_blob_f.write(struct.pack("4sIIII", + hash32, + 0, + mapping_id, + assembly['blob_idx'], + assembly['store_id'])) + + next_hash32_offset += 20 + + # hash64 + for assembly in sorted(assembly_data, key=lambda d: d['hash64']): + + # Hash sections + hash32, hash64 = gen_xxhash(assembly['name'], raw=True) + mapping_id = assembly['blob_idx'] if assembly['store_id'] == 0 else store_zero_lec + assembly['blob_idx'] + + # Write the hash64 + assemblies_blob_f.seek(next_hash64_offset) + assemblies_blob_f.write(struct.pack("8sIII", + hash64, + mapping_id, + assembly['blob_idx'], + assembly['store_id'])) + + next_hash64_offset += 20 + + # Done! + assemblies_blob_f.close() + + return 0 + + +def unpack_store(args): + + """Unpack an assemblies store""" + + parser = argparse.ArgumentParser(prog='pyxamstore unpack', + description='Unpack DLLs from assemblies.blob store.') + parser.add_argument('--dir', '-d', type=str, metavar='val', + default='./', + dest='directory', + help='Where to load blobs/manifest from.') + parser.add_argument('--arch', '-a', type=str, metavar='val', + default='arm64', + dest='architecture', + help='Which architecture to unpack: arm(64), x86(_64)') + parser.add_argument('--force', '-f', action='store_const', + dest='force', const=True, default=False, + help="Force re-create out/ directory.") + + parsed_args = parser.parse_args(args) + + return do_unpack(parsed_args.directory, + parsed_args.architecture, + parsed_args.force) + + +def pack_store(args): + + """Pack an assemblies store""" + + parser = argparse.ArgumentParser(prog='pyxamstore pack', + description='Repackage DLLs into assemblies.blob.') + parser.add_argument('--config', '-c', type=str, metavar='val', + default='assemblies.json', + dest='config_json', + help='Input assemblies.json file.') + + parsed_args = parser.parse_args(args) + + if not os.path.isfile(parsed_args.config_json): + print("File '%s' doesn't exist!" % parsed_args.config_json) + return -3 + + return do_pack(parsed_args.config_json) + + +def gen_hash(args): + + """Generate xxhashes for a given file path/string, mostly for testing""" + + if len(args) < 1: + print("Need to provide a string to hash!") + return -1 + + file_name = args.pop(0) + hash_name = os.path.splitext(os.path.basename(file_name))[0] + + print("Generating hashes for string '%s' (%s)" % (file_name, hash_name)) + hash32, hash64 = gen_xxhash(hash_name) + + print("Hash32: 0x%s" % hash32) + print("Hash64: 0x%s" % hash64) + + return 0 + + +def main(): + + """Main Loop""" + + if len(sys.argv) < 2: + print("Mode is required!") + usage() + return -1 + + sys.argv.pop(0) + mode = sys.argv.pop(0) + + if mode == "unpack": + return unpack_store(sys.argv) + elif mode == "pack": + return pack_store(sys.argv) + elif mode == "hash": + return gen_hash(sys.argv) + elif mode in ['-h', '--h', 'help']: + return usage() + + print("Unknown mode: '%s'" % mode) + return -2 + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 2180f22..751f205 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -future==0.18.3 -lz4==4.3.1 -xxhash==3.2.0 +future +lz4 +xxhash diff --git a/setup.py b/setup.py deleted file mode 100644 index 5b665db..0000000 --- a/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Setup Script""" - -from __future__ import absolute_import -from setuptools import setup - - -VERSION = '1.0.0' - -setup( - name='pyxamstore', - version=VERSION, - description='Xamarin AssemblyStore Explorer (pyxamstore)', - - download_url='https://github.com/jakev/pyxamstore', - - author='Jake Valletta', - author_email='javallet@gmail.com', - - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Information Technology', - 'Topic :: Security', - 'Operating System :: POSIX :: Linux', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7'], - - keywords='android device security mobile reverse-engineering Xamarin AssemblyStore', - - packages=["pyxamstore"], - - install_requires=open("requirements.txt", "rb").read().decode("utf-8").split("\n"), - - entry_points={ - 'console_scripts': [ - 'pyxamstore = pyxamstore.explorer:main', - ], - }, -) From be6df70e416aa4375e24e7b48e8880493fdb8183 Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Sun, 5 May 2024 23:27:39 +0530 Subject: [PATCH 2/8] Update pyproject.toml --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bca2d87..7821932 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,8 @@ documentation = "https://github.com/jakev/pyxamstore" [tool.poetry.dependencies] python = "^3.6" -docopt = "^0.6.2" +lz4 = "^4.3" +xxhash = "^3.4" [tool.poetry.dev-dependencies] From 9bbbd6393f5a7b94811427e4c63253b8f708f406 Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Sun, 5 May 2024 23:28:20 +0530 Subject: [PATCH 3/8] Update pyproject.toml --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7821932..eb19f28 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,9 @@ documentation = "https://github.com/jakev/pyxamstore" [tool.poetry.dependencies] python = "^3.6" -lz4 = "^4.3" -xxhash = "^3.4" +lz4 +xxhash +future [tool.poetry.dev-dependencies] From 118de5758394a850f7c43750cacdf2477fb8b15b Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Sun, 5 May 2024 23:29:30 +0530 Subject: [PATCH 4/8] Update pyproject.toml --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eb19f28..4481d2a 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,9 +26,9 @@ documentation = "https://github.com/jakev/pyxamstore" [tool.poetry.dependencies] python = "^3.6" -lz4 -xxhash -future +lz4 = "^4.3.3" +xxhash = "^3.4.1" +future = "^1.0.0" [tool.poetry.dev-dependencies] From 56c886c462afcf999c042a47d791c1bfd2fec6ff Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Mon, 6 May 2024 20:19:55 +0530 Subject: [PATCH 5/8] Add: Installation and Building steps --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dc1612d..e05c731 100755 --- a/README.md +++ b/README.md @@ -2,8 +2,14 @@ This is an alpha release of an `assemblies.blob` AssemblyStore parser written in Python. The tool is capable of unpack and repackaging `assemblies.blob` and `assemblies.manifest` Xamarin files from an APK. ## Installing +- Using `pip` [recommended]: ```shell - pip3 install build && python3 -m build && pip install --force-reinstall dist/pyxamstore-1.0.0-py3-none-any.whl +pip install -U git+https://github.com/AbhiTheModder/pyxamstore +``` + +## Building Yourself +```shell +pip3 install build && python3 -m build && pip install --force-reinstall dist/pyxamstore-1.0.0-py3-none-any.whl ``` You can then use the tool by calling `pyxamstore` From 46be093eb774f4ab2d57d62e22b675126bcfc9fe Mon Sep 17 00:00:00 2001 From: Abhi Date: Thu, 26 Sep 2024 15:19:18 +0530 Subject: [PATCH 6/8] Refactor extract_all method to accept outpath as a parameter and use absolute paths for unpacking. Also, improve directory handling in do_unpack function. --- pyxamstore/__init__.py | 18 +- pyxamstore/explorer.py | 729 ----------------------------------------- 2 files changed, 10 insertions(+), 737 deletions(-) delete mode 100755 pyxamstore/explorer.py diff --git a/pyxamstore/__init__.py b/pyxamstore/__init__.py index 17252d0..462d9f2 100644 --- a/pyxamstore/__init__.py +++ b/pyxamstore/__init__.py @@ -247,7 +247,7 @@ def __init__(self, in_file_name, manifest_entries, primary=True): i += 1 - def extract_all(self, json_config, outpath="out"): + def extract_all(self, json_config, outpath): """Extract everything""" @@ -414,12 +414,14 @@ def do_unpack(in_directory, in_arch, force): """Unpack a assemblies.blob/manifest""" arch_assemblies = False + in_directory = os.path.abspath(in_directory) + out_directory = os.path.join(in_directory, "out") - if force and os.path.isdir("out/"): - shutil.rmtree("out/") + if force and os.path.isdir(out_directory): + shutil.rmtree(out_directory) # First check if all files exist. - if os.path.isdir("out/"): + if os.path.isdir(out_directory): print("Out directory already exists!") return 3 @@ -446,7 +448,7 @@ def do_unpack(in_directory, in_arch, force): json_data['stores'] = list() json_data['assemblies'] = list() - os.mkdir("out/") + os.mkdir(out_directory) assembly_store = AssemblyStore(assemblies_path, manifest_entries) @@ -455,7 +457,7 @@ def do_unpack(in_directory, in_arch, force): debug("There are more assemblies to unpack here!") # Do extraction. - json_data = assembly_store.extract_all(json_data) + json_data = assembly_store.extract_all(json_data, out_directory) # What about architecture assemblies? if arch_assemblies: @@ -465,10 +467,10 @@ def do_unpack(in_directory, in_arch, force): arch_assembly_store = AssemblyStore(arch_assemblies_path, manifest_entries, primary=False) - json_data = arch_assembly_store.extract_all(json_data) + json_data = arch_assembly_store.extract_all(json_data, out_directory) # Save the large config out. - with open(constants.FILE_ASSEMBLIES_JSON, 'w') as assembly_file: + with open(os.path.join(in_directory, constants.FILE_ASSEMBLIES_JSON), 'w') as assembly_file: assembly_file.write(json.dumps(json_data, indent=4)) def do_pack(in_json_config): diff --git a/pyxamstore/explorer.py b/pyxamstore/explorer.py deleted file mode 100755 index 17252d0..0000000 --- a/pyxamstore/explorer.py +++ /dev/null @@ -1,729 +0,0 @@ -"""Pack and unpack Xamarin AssemblyStore files""" - -from __future__ import print_function -from builtins import object -import struct -import argparse -import os -import os.path -import sys -import json -import shutil - -import lz4.block -import xxhash - -from . import constants - -# Enable debugging here. -DEBUG = False - -def debug(message): - - """Print a debuggable message""" - - if DEBUG: - print("[debug] %s" % message) - - -class ManifestEntry(object): - - """Element in Manifest""" - - hash32 = "" - hash64 = "" - blob_id = 0 - blob_idx = 0 - name = "" - - def __init__(self, hash32, hash64, blob_id, blob_idx, name): - - """Initialize item""" - - self.hash32 = hash32 - self.hash64 = hash64 - self.blob_id = int(blob_id) - self.blob_idx = int(blob_idx) - self.name = name - - -class ManifestList(list): - - """List of manifest entries""" - - def get_idx(self, blob_id, blob_idx): - - """Find entry by ID""" - - for entry in self: - if entry.blob_idx == blob_idx and entry.blob_id == blob_id: - return entry - return None - - -class AssemblyStoreAssembly(object): - - """Assembly Details""" - - data_offset = 0 - data_size = 0 - debug_data_offset = 0 - debug_data_size = 0 - config_data_offset = 0 - config_data_size = 0 - - def __init__(self): - pass - - -class AssemblyStoreHashEntry(object): - - """Hash Details""" - - hash_val = "" - mapping_index = 0 - local_store_index = 0 - store_id = 0 - - def __init__(self): - pass - - -class AssemblyStore(object): - - """AssemblyStore object""" - - raw = "" - - file_name = "" - - manifest_entries = None - - hdr_magic = "" - hdr_version = 0 - hdr_lec = 0 - hdr_gec = 0 - hdr_store_id = 0 - - assembly_list = None - global_hash32 = None - global_hash64 = None - - def __init__(self, in_file_name, manifest_entries, primary=True): - - """Parse and read store""" - - self.manifest_entries = manifest_entries - self.file_name = os.path.basename(in_file_name) - - blob_file = open(in_file_name, "rb") - - self.raw = blob_file.read() - - blob_file.seek(0) - - # Header Section - # - # 0 - 3: Magic - # 4 - 7: Version - # 8 - 11: LocalEntryCount - # 12 - 15: GlobalEntryCount - # 16 - 19: StoreID - - magic = blob_file.read(4) - if magic != constants.ASSEMBLY_STORE_MAGIC: - raise Exception("Invalid Magic: %s" % magic) - - version = struct.unpack("I", blob_file.read(4))[0] - if version > constants.ASSEMBLY_STORE_FORMAT_VERSION: - raise Exception("This version is higher than expected! Max = %d, got %d" - % constants.ASSEMBLY_STORE_FORMAT_VERSION, version) - - self.hdr_version = version - - self.hdr_lec = struct.unpack("I", blob_file.read(4))[0] - self.hdr_gec = struct.unpack("I", blob_file.read(4))[0] - self.hdr_store_id = struct.unpack("I", blob_file.read(4))[0] - - debug("Local entry count: %d" % self.hdr_lec) - debug("Global entry count: %d" % self.hdr_gec) - - self.assemblies_list = list() - - debug("Entries start at: %d (0x%x)" % (blob_file.tell(), blob_file.tell())) - - i = 0 - while i < self.hdr_lec: - - # 0 - 3: DataOffset - # 4 - 7: DataSize - # 8 - 11: DebugDataOffset - # 12 - 15: DebugDataSize - # 16 - 19: ConfigDataOffset - # 20 - 23: ConfigDataSize - - debug("Extracting Assembly: %d (0x%x)" % (blob_file.tell(), blob_file.tell())) - entry = blob_file.read(24) - - assembly = AssemblyStoreAssembly() - - assembly.data_offset = struct.unpack("I", entry[0:4])[0] - assembly.data_size = struct.unpack("I", entry[4:8])[0] - assembly.debug_data_offset = struct.unpack("I", entry[8:12])[0] - assembly.debug_data_size = struct.unpack("I", entry[12:16])[0] - assembly.config_data_offset = struct.unpack("I", entry[16:20])[0] - assembly.config_data_size = struct.unpack("I", entry[20:24])[0] - - self.assemblies_list.append(assembly) - - debug(" Data Offset: %d (0x%x)" % (assembly.data_offset, assembly.data_offset)) - debug(" Data Size: %d (0x%x)" % (assembly.data_size, assembly.data_size)) - debug(" Config Offset: %d (0x%x)" % (assembly.config_data_offset, assembly.config_data_offset)) - debug(" Config Size: %d (0x%x)" % (assembly.config_data_size, assembly.config_data_size)) - debug(" Debug Offset: %d (0x%x)" % (assembly.debug_data_offset, assembly.debug_data_offset)) - debug(" Debug Size: %d (0x%x)" % (assembly.debug_data_size, assembly.debug_data_size)) - - i += 1 - - if not primary: - debug("Skipping hash sections in non-primary store") - return - - # Parse Hash data - # - # The following 2 sections are _required_ to be in order from - # lowest to highest (e.g. 0x00000000 to 0xffffffff). - # Since you're very likely not going to be adding assemblies - # (or renaming) to the store, I'm going to store the hashes with the - # assemblies.json to make sorting easier when packing. - - debug("Hash32 start at: %d (0x%x)" % (blob_file.tell(), blob_file.tell())) - self.global_hash32 = list() - - i = 0 - while i < self.hdr_lec: - - entry = blob_file.read(20) - - hash_entry = AssemblyStoreHashEntry() - - hash_entry.hash_val = "0x%08x" % struct.unpack("") - print("") - print(" MODES:") - print("\tunpack Unpack assembly blobs.") - print("\tpack Repackage assembly blobs.") - print("\thash file_name Generate xxHash values.") - print("\thelp Print this message.") - - return 0 - - -def do_unpack(in_directory, in_arch, force): - - """Unpack a assemblies.blob/manifest""" - - arch_assemblies = False - - if force and os.path.isdir("out/"): - shutil.rmtree("out/") - - # First check if all files exist. - if os.path.isdir("out/"): - print("Out directory already exists!") - return 3 - - manifest_path = os.path.join(in_directory, constants.FILE_ASSEMBLIES_MANIFEST) - assemblies_path = os.path.join(in_directory, constants.FILE_ASSEMBLIES_BLOB) - - if not os.path.isfile(manifest_path): - print("Manifest file '%s' does not exist!" % manifest_path) - return 4 - elif not os.path.isfile(assemblies_path): - print("Main assemblies blob '%s' does not exist!" % assemblies_path) - return 4 - - # The manifest will have all entries (regardless of which - # *.blob they're found in. Parse this first, and then handle - # each blob. - - manifest_entries = read_manifest(manifest_path) - if manifest_entries is None: - print("Unable to parse assemblies.manifest file!") - return 5 - - json_data = dict() - json_data['stores'] = list() - json_data['assemblies'] = list() - - os.mkdir("out/") - - assembly_store = AssemblyStore(assemblies_path, manifest_entries) - - if assembly_store.hdr_lec != assembly_store.hdr_gec: - arch_assemblies = True - debug("There are more assemblies to unpack here!") - - # Do extraction. - json_data = assembly_store.extract_all(json_data) - - # What about architecture assemblies? - if arch_assemblies: - arch_assemblies_path = os.path.join(in_directory, - constants.ARCHITECTURE_MAP[in_arch]) - - arch_assembly_store = AssemblyStore(arch_assemblies_path, - manifest_entries, - primary=False) - json_data = arch_assembly_store.extract_all(json_data) - - # Save the large config out. - with open(constants.FILE_ASSEMBLIES_JSON, 'w') as assembly_file: - assembly_file.write(json.dumps(json_data, indent=4)) - -def do_pack(in_json_config): - - """Create new assemblies.blob/manifest""" - - if not os.path.isfile(in_json_config): - print("Config file '%s' does not exist!" % in_json_config) - return -1 - - if os.path.isfile("assemblies.manifest.new"): - print("Output manifest exists!") - return -2 - - - if os.path.isfile("assemblies.blob.new"): - print("Output blob exists!") - return -3 - - json_data = None - with open(in_json_config, "r") as json_f: - json_data = json.load(json_f) - - # Write new assemblies.manifest - print("Writing 'assemblies.manifest.new'...") - assemblies_manifest_f = open("assemblies.manifest.new", "w") - - assemblies_manifest_f.write("Hash 32 Hash 64 ") - assemblies_manifest_f.write("Blob ID Blob idx Name\r\n") - - #for _, store_json in json_data['stores'].items(): - for assembly in json_data['assemblies']: - hash32, hash64 = gen_xxhash(assembly['name']) - - line = ("0x%08s 0x%016s %03d %04d %s\r\n" - % (hash32, hash64, assembly['store_id'], - assembly['blob_idx'], assembly['name'])) - - assemblies_manifest_f.write(line) - - assemblies_manifest_f.close() - - # This is hacky, but we need the lec/gec if there are multiple stores. - store_zero_lec = 0 - for assembly_store in json_data['stores']: - for store_name, store_data in list(assembly_store.items()): - if store_name == "assemblies.blob": - store_zero_lec = store_data['header']['lec'] - - # Next do the blobs. - for assembly_store in json_data['stores']: - for store_name, store_data in list(assembly_store.items()): - - out_store_name = "%s.new" % store_name - - # Pack the new AssemblyStore structure - print("Writing '%s'..." % out_store_name) - assemblies_blob_f = open(out_store_name, "wb") - - # Write header - json_hdr = store_data['header'] - assemblies_blob_f.write(struct.pack("4sIIII", - constants.ASSEMBLY_STORE_MAGIC, - json_hdr['version'], - json_hdr['lec'], - json_hdr['gec'], - json_hdr['store_id'])) - - # Offsets are weird. - # If this is a primary store, the data is: - # -header - # -ASA header - # -hash32 - # -hash64 - # -ASA data - # But a non-primary does not have hashes. Best to determine early - # if this is primary and act accordingly throughout. - primary = bool(json_hdr['store_id'] == 0) - - next_entry_offset = 20 - next_data_offset = 20 + (json_hdr['lec'] * 24) + (json_hdr['gec'] * 40) - - if not primary: - next_data_offset = 20 + (json_hdr['lec'] * 24) - - # First pass: Write the entries + DLL content. - for assembly in json_data['assemblies']: - - if assembly['store_id'] != json_hdr['store_id']: - debug("Skipping assembly for another store") - continue - - assembly_data = open(assembly['file'], "rb").read() - if assembly['lz4']: - assembly_data = lz4_compress(assembly_data, - assembly['lz4_desc_idx']) - - data_size = len(assembly_data) - - # Write the entry data - assemblies_blob_f.seek(next_entry_offset) - assemblies_blob_f.write(struct.pack("IIIIII", - next_data_offset, - data_size, - 0, 0, 0, 0)) - - # Write binary data - assemblies_blob_f.seek(next_data_offset) - assemblies_blob_f.write(assembly_data) - - # Move all offsets forward. - next_data_offset += data_size - next_entry_offset += 24 - - # Second + third pass: sort the hashes and write them - # But skip if not primary. - if not primary: - assemblies_blob_f.close() - continue - - next_hash32_offset = 20 + (json_hdr['lec'] * 24) - next_hash64_offset = 20 + (json_hdr['lec'] * 24) + (json_hdr['gec'] * 20) - - assembly_data = json_data["assemblies"] - - # hash32 - for assembly in sorted(assembly_data, key=lambda d: d['hash32']): - - # Hash sections - hash32, hash64 = gen_xxhash(assembly['name'], raw=True) - mapping_id = assembly['blob_idx'] if assembly['store_id'] == 0 else store_zero_lec + assembly['blob_idx'] - - # Write the hash32 - assemblies_blob_f.seek(next_hash32_offset) - assemblies_blob_f.write(struct.pack("4sIIII", - hash32, - 0, - mapping_id, - assembly['blob_idx'], - assembly['store_id'])) - - next_hash32_offset += 20 - - # hash64 - for assembly in sorted(assembly_data, key=lambda d: d['hash64']): - - # Hash sections - hash32, hash64 = gen_xxhash(assembly['name'], raw=True) - mapping_id = assembly['blob_idx'] if assembly['store_id'] == 0 else store_zero_lec + assembly['blob_idx'] - - # Write the hash64 - assemblies_blob_f.seek(next_hash64_offset) - assemblies_blob_f.write(struct.pack("8sIII", - hash64, - mapping_id, - assembly['blob_idx'], - assembly['store_id'])) - - next_hash64_offset += 20 - - # Done! - assemblies_blob_f.close() - - return 0 - - -def unpack_store(args): - - """Unpack an assemblies store""" - - parser = argparse.ArgumentParser(prog='pyxamstore unpack', - description='Unpack DLLs from assemblies.blob store.') - parser.add_argument('--dir', '-d', type=str, metavar='val', - default='./', - dest='directory', - help='Where to load blobs/manifest from.') - parser.add_argument('--arch', '-a', type=str, metavar='val', - default='arm64', - dest='architecture', - help='Which architecture to unpack: arm(64), x86(_64)') - parser.add_argument('--force', '-f', action='store_const', - dest='force', const=True, default=False, - help="Force re-create out/ directory.") - - parsed_args = parser.parse_args(args) - - return do_unpack(parsed_args.directory, - parsed_args.architecture, - parsed_args.force) - - -def pack_store(args): - - """Pack an assemblies store""" - - parser = argparse.ArgumentParser(prog='pyxamstore pack', - description='Repackage DLLs into assemblies.blob.') - parser.add_argument('--config', '-c', type=str, metavar='val', - default='assemblies.json', - dest='config_json', - help='Input assemblies.json file.') - - parsed_args = parser.parse_args(args) - - if not os.path.isfile(parsed_args.config_json): - print("File '%s' doesn't exist!" % parsed_args.config_json) - return -3 - - return do_pack(parsed_args.config_json) - - -def gen_hash(args): - - """Generate xxhashes for a given file path/string, mostly for testing""" - - if len(args) < 1: - print("Need to provide a string to hash!") - return -1 - - file_name = args.pop(0) - hash_name = os.path.splitext(os.path.basename(file_name))[0] - - print("Generating hashes for string '%s' (%s)" % (file_name, hash_name)) - hash32, hash64 = gen_xxhash(hash_name) - - print("Hash32: 0x%s" % hash32) - print("Hash64: 0x%s" % hash64) - - return 0 - - -def main(): - - """Main Loop""" - - if len(sys.argv) < 2: - print("Mode is required!") - usage() - return -1 - - sys.argv.pop(0) - mode = sys.argv.pop(0) - - if mode == "unpack": - return unpack_store(sys.argv) - elif mode == "pack": - return pack_store(sys.argv) - elif mode == "hash": - return gen_hash(sys.argv) - elif mode in ['-h', '--h', 'help']: - return usage() - - print("Unknown mode: '%s'" % mode) - return -2 - - -if __name__ == "__main__": - main() From 843b00da86ddee8a05541f63b1fd6855634a77bc Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Sun, 8 Dec 2024 11:20:37 +0530 Subject: [PATCH 7/8] use f-string on exception --- pyxamstore/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyxamstore/__init__.py b/pyxamstore/__init__.py index 462d9f2..5dd30c4 100644 --- a/pyxamstore/__init__.py +++ b/pyxamstore/__init__.py @@ -136,8 +136,7 @@ def __init__(self, in_file_name, manifest_entries, primary=True): version = struct.unpack("I", blob_file.read(4))[0] if version > constants.ASSEMBLY_STORE_FORMAT_VERSION: - raise Exception("This version is higher than expected! Max = %d, got %d" - % constants.ASSEMBLY_STORE_FORMAT_VERSION, version) + raise Exception(f"This version is higher than expected! Max = {constants.ASSEMBLY_STORE_FORMAT_VERSION}, got {version}") self.hdr_version = version From 2d771f8b6aa6aeed6bac108d7a36975ec9aac351 Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Sun, 15 Dec 2024 14:58:15 +0530 Subject: [PATCH 8/8] add tip for new branch --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e05c731..e984914 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # Xamarin AssemblyStore Explorer (pyxamstore) This is an alpha release of an `assemblies.blob` AssemblyStore parser written in Python. The tool is capable of unpack and repackaging `assemblies.blob` and `assemblies.manifest` Xamarin files from an APK. +> [!TIP] +> If you're looking for AssemblyStoreV2 new format support, check out [new](https://github.com/AbhiTheModder/pyxamstore/tree/new) branch. + ## Installing - Using `pip` [recommended]: ```shell