From 45a64788ad941e4de0243be378e82afdc726bd43 Mon Sep 17 00:00:00 2001 From: Andrew Haberlandt Date: Fri, 21 Jun 2024 09:40:15 +0000 Subject: [PATCH] feat: pwndbg compatibility layer, work on fixing multithread support, heap example --- .dockerignore | 1 + Dockerfile | 22 ++- bin/pyda | 2 +- examples/heap.py | 53 +++++++ lib/pyda/__init__.py | 5 +- lib/pyda/arch.py | 24 ++++ lib/pyda/base.py | 30 +++- lib/pyda/hacks/gdb.py | 239 ++++++++++++++++++++++++++++++++ lib/pyda/hacks/pls_no_signal.py | 2 + lib/pyda/hacks/pwndbg_compat.py | 213 ++++++++++++++++++++++++++++ lib/pyda/hacks/signal.py | 0 lib/pyda/process.py | 82 ++++++++++- pyda_core/CMakeLists.txt | 4 +- pyda_core/pyda_core.c | 122 ++++++++++++---- pyda_core/pyda_core.h | 37 +++-- pyda_core/pyda_core_py.c | 198 +++++++++++++++++++++++--- pyda_core/tool.c | 70 ++++++++-- 17 files changed, 1017 insertions(+), 87 deletions(-) create mode 100644 examples/heap.py create mode 100644 lib/pyda/arch.py create mode 100644 lib/pyda/hacks/gdb.py create mode 100644 lib/pyda/hacks/pls_no_signal.py create mode 100644 lib/pyda/hacks/pwndbg_compat.py create mode 100644 lib/pyda/hacks/signal.py diff --git a/.dockerignore b/.dockerignore index 7db294d..6ce227f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,4 @@ build/ cpython/ dynamorio/ +Dockerfile diff --git a/Dockerfile b/Dockerfile index 389f34f..673815d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ RUN cd /opt/custom-python/cpython-3.10.12/ && git apply cpython-3.10.12.patch RUN cd /opt/custom-python/cpython-3.10.12/ && ./configure --prefix=/opt/custom-python-root/ --with-ensurepip=install --enable-shared --with-openssl=/usr/local/ --with-openssl-rpath=auto && \ make install - -ARG PYDA_DEBUG=0 +ARG PYDA_DEBUG=1 # install dynamorio RUN git clone --recurse-submodules -j4 https://github.com/DynamoRIO/dynamorio.git /opt/dynamorio && cd /opt/dynamorio/ && git checkout release_10.0.0 @@ -32,8 +32,6 @@ ENV DYNAMORIO_HOME=/opt/dynamorio/build/ ENV PYTHONHOME=/opt/custom-python-root/ ENV PYTHONPATH=/opt/custom-python-root/lib/python3.10/:/opt/pyda/lib -RUN pip3 install pwntools - COPY ./ /opt/pyda/ WORKDIR /opt/pyda RUN mkdir build && cd build && \ @@ -42,6 +40,20 @@ RUN mkdir build && cd build && \ ENV PATH=$PATH:/opt/pyda/bin +RUN pip3 install pwntools +WORKDIR /tmp + +RUN git clone https://github.com/pwndbg/pwndbg.git && \ + cd pwndbg && git checkout cada600b0f2be0e2873465f59cc9c4c31425951a && \ + sed -i 's/signal.signal/__import__("pls_no_signal").signal/' pwndbg/__init__.py && \ + pip3 install -e . -# RUN bash -c "$(wget https://gef.blah.cat/sh -O -)" -# RUN pip3 install pwntools \ No newline at end of file +WORKDIR /opt/pyda + +ARG PYDA_GEF=1 +RUN bash -c 'if [[ "$PYDA_GEF" = "1" ]]; then \ + apt install -y file; \ + PYTHONPATH= PYTHONHOME= bash -c "$(wget https://raw.githubusercontent.com/hugsy/gef/main/scripts/gef.sh -O -)"; \ + fi' + +RUN pip3 install pwntools diff --git a/bin/pyda b/bin/pyda index aeef86f..ce68d3c 100755 --- a/bin/pyda +++ b/bin/pyda @@ -1,4 +1,4 @@ #!/bin/bash ROOT=$(dirname "$0")/../ -LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PYTHONHOME/lib/ PYDA_SCRIPT=$1 $DYNAMORIO_HOME/bin64/drrun -stack_size 1024K -c $ROOT/build/pyda_core/libtool.so ${@:2} \ No newline at end of file +LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PYTHONHOME/lib/ PYDA_SCRIPT=$1 PWNLIB_NOTERM=1 $DYNAMORIO_HOME/bin64/drrun -stack_size 1024K -c $ROOT/build/pyda_core/libtool.so ${@:2} diff --git a/examples/heap.py b/examples/heap.py new file mode 100644 index 0000000..bb2e020 --- /dev/null +++ b/examples/heap.py @@ -0,0 +1,53 @@ +from pwn import * +from pyda import * + +import pwndbg # must come after pyda import, i think +from termcolor import colored, cprint + +import string +import sys + +p = process() + +e = ELF(p.exe_path) +e.address = p.maps[p.exe_path].base + +libc = ELF("/lib/x86_64-linux-gnu/libc.so.6") +libc.address = p.maps[libc.path].base + +sym_map = { + libc.symbols["malloc"]: "malloc", + libc.symbols["free"]: "free", + libc.symbols["realloc"]: "realloc", +} + +# todo: use dwarf to figure out where tcache pointer is in tls? +# print(f"dwarf: {libc.dwarf}") + +def heap_hook(p): + name = sym_map[p.regs.rip] + + print(f"{name}(" + ", ".join([ + f"rdi={hex(p.regs.rdi)}", + ]) + ")") + +def after_heap_hook(p): + heap = pwndbg.heap.current + tcachebins = heap.tcachebins() + if tcachebins is not None: + for (s, b) in tcachebins.bins.items(): + if len(b.fd_chain) < 2: + continue + print(f"tcache {colored(hex(s), 'yellow')}: ", end="") + print(colored(' -> ', 'yellow').join([hex(x) for x in b.fd_chain])) + + print() + else: + print("heap not initialized yet?") + + +for sym in sym_map: + p.hook(sym, heap_hook) + p.hook_after_call(sym, after_heap_hook) + +p.run() \ No newline at end of file diff --git a/lib/pyda/__init__.py b/lib/pyda/__init__.py index 773cfc4..cbcda72 100644 --- a/lib/pyda/__init__.py +++ b/lib/pyda/__init__.py @@ -1 +1,4 @@ -from .base import * \ No newline at end of file +import sys, os +from .base import * + +sys.path.append(os.path.join(os.path.dirname(__file__), 'hacks')) \ No newline at end of file diff --git a/lib/pyda/arch.py b/lib/pyda/arch.py new file mode 100644 index 0000000..25149c4 --- /dev/null +++ b/lib/pyda/arch.py @@ -0,0 +1,24 @@ + +from enum import Enum + +ARCH = Enum("Arch", "X86 X64") + + +def arch(): + # todo: arch detection from dynamorio + return ARCH.X64 + +def gdb_arch(): + return { + ARCH.X86: "i386", + ARCH.X64: "i386:x86-64", + }[arch()] + +def endianness(): + return "little" + +def os(): + return "linux" + +def ptrsize(): + return 8 diff --git a/lib/pyda/base.py b/lib/pyda/base.py index fade62b..62a8200 100644 --- a/lib/pyda/base.py +++ b/lib/pyda/base.py @@ -1,6 +1,32 @@ import pyda_core -from .process import Process +from pyda_core import MemoryError +from .process import Process, Map +from . import arch +import sys + +INIT = False def process(): + global INIT + # todo: remove the bogus argument - return Process(pyda_core.process("")) + proc = Process(pyda_core.process("")) + + if not INIT: + # by this point, hacks/ is in pythonpath + import pwndbg_compat + + INIT = True + if "pwndbg" in sys.modules: + pwndbg_compat.patch_pwndbg(sys.modules["pwndbg"], proc) + + return proc + +def xinfo(addr): + # print(f"find page: {hex(int(addr))}") + res = pyda_core.get_module_for_addr(addr) + # print(f"res: {res}") + if res is None: + return None + path, start, end, perms = res + return Map(path=path, vaddr=start, size=end - start, perms=perms) diff --git a/lib/pyda/hacks/gdb.py b/lib/pyda/hacks/gdb.py new file mode 100644 index 0000000..0ad1364 --- /dev/null +++ b/lib/pyda/hacks/gdb.py @@ -0,0 +1,239 @@ +from types import SimpleNamespace +import pyda +import pyda_core + +# This is a compatibility layer for pwndbg + +print("import gdb!") + +PARAM_BOOLEAN = 133701 +PARAM_ZINTEGER = 133702 +PARAM_STRING = 133703 +PARAM_ENUM = 133704 +PARAM_AUTO_BOOLEAN = 133705 +COMMAND_USER = 133706 +COMPLETE_EXPRESSION = 133707 +PARAM_OPTIONAL_FILENAME = 133708 +COMMAND_SUPPORT = 133709 +TYPE_CODE_STRUCT = 133710 + +class Parameter(): + def __init__(self, name, type, cls, seq=None): + pass + +events = SimpleNamespace() + +class EventRegistry(): + def connect(self, _): + pass + + pass + +def execute(s, to_string=False, from_tty=False): + if s == "show language": + return 'The current source language is "auto; currently c".' + elif s == "show debug-file-directory": + return 'The directory where separate debug symbols are searched for is "/usr/lib/debug".' + elif s == "show pagination": + return 'State of pagination is off.' + elif s == "help all": + return 'there-are-no-command-why-are-you-asking-me -- Why?\n' + elif s == "show endian": + return f'The target endianness is set automatically (currently little {pyda.arch.endianness()}).' + elif s == "show architecture": + return f"The target architecture is set automatically (currently {pyda.arch.gdb_arch()})", + elif s == "show osabi": + return { + "linux": """The current OS ABI is "auto" (currently "GNU/Linux"). +The default OS ABI is "GNU/Linux". +""" + }[pyda.arch.os()] + elif s == "info win": + return "No stack." + elif s.startswith("set "): + return "The TUI is not active." + elif s.startswith("handle "): + return "The TUI is not active." + else: + print(f"Failed command: {s}") + raise NotImplementedError(f"s={s}") + +class Type: + def __init__(self, sz, signed, float=False): + self.sz = sz + self.signed = signed + self.float = float + + def pointer(self): + return Pointer(self) + + @property + def sizeof(self): + return self.sz + + @property + def alignof(self): + return self.sz + + def array(self, n): + return Array(self, n) + + def __eq__(self, other): + if not isinstance(other, Type): + return False + + return ( + self.sz == other.sz + and self.signed == other.signed + and self.float == other.float + ) + +class Pointer(Type): + def __init__(self, t): + super().__init__(8, False) + self._points_to = t + + def __eq__(self, other): + if not isinstance(other, Pointer): + return False + + return self._points_to == other._points_to + +class Array(Type): + def __init__(self, t, n): + super().__init__(t.sz * n, t.signed) + self._points_to = t + self._n = n + + def __eq__(self, other): + if not isinstance(other, Array): + return False + + return self._points_to == other._points_to and self._n == other._n + + def target(self): + return self._points_to + +class Value: + def __init__(self, v): + self.v = v + self.type = Type(0, False) + + def cast(self, t): + v = Value(self.v) + v.type = t + return v + + def __int__(self): + assert not isinstance(self.type, Array) + assert not self.type.float + if type(self.v) is int: + return self.v + elif type(self.v) is bytes: + return int.from_bytes(self.v, pyda.arch.endianness()) + else: + raise NotImplementedError(f"Value: {self.v}") + + def __getitem__(self, idx): + assert isinstance(self.type, Array), f"type: {self.type.__class__} {Array}" + assert type(self.v) is bytes + assert idx < self.type._n + + elementsz = self.type.target().sz + return Value(self.v[idx * elementsz:(idx + 1) * elementsz]).cast(self.type.target()) + +class Command(): + def __init__(self, name, command_class, completer_class, prefix=None): + self.name = name + self.command_class = command_class + self.prefix = prefix + +class Function(): + def __init__(self, name): + self.name = name + +class Breakpoint(): + def __init__(self): + pass + +class error(BaseException): + def __init__(self, s): + self.s = s + + def __str__(self): + return self.s + +MemoryError = pyda.MemoryError + +VERSION = "12.1" + +def lookup_type(s): + match s: + case "char": + return Type(1, True) + case "short": + return Type(2, True) + case "int": + return Type(4, True) + case "long": + return Type(8, True) + case "long long": + return Type(8, True) + case "unsigned char": + return Type(1, False) + case "unsigned short": + return Type(2, False) + case "unsigned int": + return Type(4, False) + case "unsigned long": + return Type(8, False) + case "unsigned long long": + return Type(8, False) + case "long double": + return Type(16, True, float=True) + case "()" | "void": + return Type(0, False) + case _: + print(f"lookup_type: {s}") + return None + # raise NotImplementedError(f"cmd: {s}") + + +events.exited = EventRegistry() +events.cont = EventRegistry() +events.new_objfile = EventRegistry() +events.stop = EventRegistry() +events.start = EventRegistry() +events.new_thread = EventRegistry() +events.before_prompt = EventRegistry() +events.memory_changed = EventRegistry() +events.register_changed = EventRegistry() + +class Thread(): + def __init__(self, tid): + self.tid = tid + + @property + def global_num(self): + return self.tid + +class Frame(): + def architecture(self): + return GdbArch(pyda.arch.gdb_arch()) + +class GdbArch(): + def __init__(self, s): + self.s = s + + def name(self): + return self.s + +def newest_frame(): + return Frame() + +def selected_thread(): + return Thread(pyda_core.get_current_thread_id()) + +class types(): + def has_field(t, name): + return name in t.keys() diff --git a/lib/pyda/hacks/pls_no_signal.py b/lib/pyda/hacks/pls_no_signal.py new file mode 100644 index 0000000..729a8e3 --- /dev/null +++ b/lib/pyda/hacks/pls_no_signal.py @@ -0,0 +1,2 @@ +def signal(x, y): + pass \ No newline at end of file diff --git a/lib/pyda/hacks/pwndbg_compat.py b/lib/pyda/hacks/pwndbg_compat.py new file mode 100644 index 0000000..486e842 --- /dev/null +++ b/lib/pyda/hacks/pwndbg_compat.py @@ -0,0 +1,213 @@ +import pyda_core +from types import SimpleNamespace +from functools import partial + +from pathlib import Path +from pwnlib.elf.elf import ELF +import sys +import importlib + +# import our fake gdb module +from gdb import Value, Type +import pyda + +class GDBLibInfo(): + def __init__(self): + pass + + def sharedlibrary_paths(self): + mods = pyda_core.list_modules() + return mods + +class GDBLibFile(): + def __init__(self): + pass + + def get_file(self, path, **kwargs): + p = Path(path) + if p.is_file(): + return str(p) + + return None + + +class GDBLibSymbol(): + def __init__(self): + pass + + def static_linkage_symbol_address(self, name): + return None + + def address(self, name): + return None + + def get(self, addr): + res = pyda_core.get_module_for_addr(int(addr)) + if res[0] != 'unknown': + print(f"WARN: Symbol lookup not implemented {hex(addr)} {res}") + + return None + +def get_glibc_section_address(section): + for l in pyda_core.list_modules(): + if "libc.so" in l: + elf = ELF(l) + off = elf.get_section_by_name(section).header.sh_addr + addr = pyda_core.get_base(l) + off + print(f"glibc addr: {hex(addr)}") + return addr + + return None + +class GDBLibMemory(): + def __init__(self, proc): + self._p = proc + + def is_readable_address(self, addr): + try: + self._p.read(addr, 1) + return True + except: + return False + + def poi(self, t, addr): + v = self._p.read(addr, t.sizeof) + # print(f"poi: {hex(addr)} => {v.hex()}") + return Value(v).cast(t) + + def u32(self, addr): + return int.from_bytes(self._p.read(addr, 4), pyda.arch.endianness()) + + def i32(self, addr): + return int.from_bytes(self._p.read(addr, 4), pyda.arch.endianness(), signed=True) + + def u64(self, addr): + return int.from_bytes(self._p.read(addr, 8), pyda.arch.endianness()) + + def s64(self, addr): + return int.from_bytes(self._p.read(addr, 8), pyda.arch.endianness(), signed=True) + + def pvoid(self, addr): + assert pyda.arch.ptrsize() == 8 + return self.u64(addr) + + def peek(self, addr): + return chr(self._p.read(addr, 1)[0]) + + def read(self, addr, size): + return self._p.read(addr, size) + + + +class Page(): + def __init__(self, map: pyda.Map) -> None: + self._map = map + + @property + def end(self): + return self._map.end + + @property + def start(self): + return self._map.start + + def __contains__(self, addr): + return self._map.start <= addr < self._map.end + + @property + def objfile(self): + return self._map.path + + @property + def execute(self): + return self._map.executable + + @property + def rw(self): + return self._map.readable and self._map.writable + + @property + def rwx(self): + return self.rw and self.execute + +class GDBLibVMMap(): + def __init__(self, proc): + pass + + def find(self, addr): + info = pyda.xinfo(int(addr)) + return Page(info) + + def get(self): + return [] + +class GDBLibArch(): + def __init__(self, proc): + pass + + @property + def endian(self): + return pyda.arch.endianness() + + @property + def ptrsize(self): + return pyda.arch.ptrsize() + + def __getattr__(self, name): + print(f"Arch: {name}") + raise AttributeError(f"Arch: {name}") + +def patch_pwndbg(pwndbg, proc): + patch_gdblib(pwndbg.gdblib, proc) + patch_glibc(pwndbg.glibc) + + pwndbg.heap.ptmalloc.HeuristicHeap.multithreaded = False + + pwndbg.heap.current = pwndbg.heap.ptmalloc.HeuristicHeap() + pwndbg.heap.current.is_statically_linked = lambda: False + + pwndbg.heap.current.mp + # pwndbg.heap.resolve_heap(is_first_run=True) + +class GDBLibConfig(): + def __init__(self): + self._d = {} + + def __getattr__(self, name): + if name == "_d": + return super().__getattr__(name) + elif name in self._d: + return self._d[name] + else: + return 0 + + def __setattr__(self, name, value): + if name == "_d": + super().__setattr__(name, value) + else: + self._d[name] = value + +class GDBRegs(): + def __init__(self, proc): + self._p = proc + + def __getattr__(self, name): + return self._p.regs[name] + +def patch_gdblib(gdblib, proc): + gdblib.info = GDBLibInfo() + gdblib.file = GDBLibFile() + gdblib.symbol = GDBLibSymbol() + gdblib.config = GDBLibConfig() + + old_mem = gdblib.memory + gdblib.memory = GDBLibMemory(proc) + gdblib.memory.string = old_mem.string + + gdblib.vmmap = GDBLibVMMap(proc) + gdblib.regs = GDBRegs(proc) + # gdblib.arch = GDBLibArch(proc) + +def patch_glibc(glibc): + glibc.get_data_section_address = partial(get_glibc_section_address, ".data") + glibc.get_got_section_address = partial(get_glibc_section_address, ".got") diff --git a/lib/pyda/hacks/signal.py b/lib/pyda/hacks/signal.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/pyda/process.py b/lib/pyda/process.py index c8c3995..8b046a9 100644 --- a/lib/pyda/process.py +++ b/lib/pyda/process.py @@ -1,12 +1,45 @@ from collections import namedtuple +from dataclasses import dataclass +import pyda_core class Process(): def __init__(self, handle): self._p = handle + self._hooks = {} + def _hook_dispatch(self, addr): + for h in self._hooks[addr]: + h(self) + def hook(self, addr, callback): - hook_wrapper = lambda p: callback(self) - self._p.register_hook(addr, hook_wrapper) + if addr not in self._hooks: + hook_wrapper = lambda p: self._hook_dispatch(addr) + self._p.register_hook(addr, hook_wrapper) + + self._hooks[addr] = [callback] + else: + self._hooks[addr].append(callback) + + def unhook(self, addr, callback=None): + self._hooks[addr] = [c for c in self._hooks[addr] if c != callback] + + if callback is None or len(self._hooks[addr]) == 0: + del self._hooks[addr] + self._p.unregister_hook(addr) + + def hook_after_call(self, addr, callback): + def call_hook(p): + retaddr = int.from_bytes(p.read(p.regs.rsp, 8), "little") + def after_call_hook(p): + print(f"after call to {hex(addr)}") + callback(p) + self.unhook(retaddr, after_call_hook) + self.hook(retaddr, after_call_hook) + + self.hook(addr, call_hook) + + def set_thread_entry(self, callback): + self._p.set_thread_init_hook(callback) def read(self, addr, size): return self._p.read(addr, size) @@ -15,7 +48,7 @@ def write(self, addr, data): return self._p.write(addr, data) def __getattr__(self, name): - # TODO: Move these into CPython extension + # TODO: Move these into CPython extension? if name in "regs": return ProcessRegisters(self._p) elif name == "mem": @@ -29,6 +62,12 @@ def __getattr__(self, name): def run(self): self._p.run() + + @property + def tid(self): + # This returns the thread id of the currently executing thread + return pyda_core.get_current_thread_id() + class ProcessRegisters(): def __init__(self, p): @@ -72,12 +111,43 @@ def __getitem__(self, key): return self._p.read(key, 1)[0] -Map = namedtuple("Map", ("base",)) class ProcessMaps(): def __init__(self, p): self._p = p def __getitem__(self, key): - return Map(base=self._p.get_base(key)) + return Map(vaddr=pyda_core.get_base(key), size=0, path=key, perms=None) + + +@dataclass +class Map: + vaddr: int + size: int + path: str + perms: int + + @property + def base(self): + return self.vaddr + + @property + def start(self): + return self.vaddr + + @property + def end(self): + return self.base + self.size + + @property + def executable(self): + return self.perms & 1 + + @property + def writable(self): + return self.perms & 2 + + @property + def readable(self): + return self.perms & 4 + - \ No newline at end of file diff --git a/pyda_core/CMakeLists.txt b/pyda_core/CMakeLists.txt index 448c1f4..bf1ddc7 100644 --- a/pyda_core/CMakeLists.txt +++ b/pyda_core/CMakeLists.txt @@ -15,7 +15,7 @@ endif(NOT Python3_FOUND) target_include_directories(tool PRIVATE ${Python3_INCLUDE_DIRS}) target_link_libraries(tool ${Python3_LIBRARIES}) -target_compile_options(tool PUBLIC -DPYDA_DYNAMORIO_CLIENT) +target_compile_options(tool PUBLIC -DPYDA_DYNAMORIO_CLIENT -Werror) configure_DynamoRIO_client(tool) use_DynamoRIO_extension(tool drmgr) @@ -25,4 +25,4 @@ use_DynamoRIO_extension(tool drmgr) add_library(pyda_core SHARED pyda_core_py.c pyda_core.c) set_target_properties(pyda_core PROPERTIES PREFIX "") target_include_directories(pyda_core PRIVATE ${Python3_INCLUDE_DIRS}) -target_link_libraries(pyda_core ${Python3_LIBRARIES}) \ No newline at end of file +target_link_libraries(pyda_core ${Python3_LIBRARIES}) diff --git a/pyda_core/pyda_core.c b/pyda_core/pyda_core.c index 73a45ce..8534cd2 100644 --- a/pyda_core/pyda_core.c +++ b/pyda_core/pyda_core.c @@ -5,7 +5,7 @@ #ifndef PYDA_DYNAMORIO_CLIENT -pyda_thread* pyda_mk_process() { +pyda_process* pyda_mk_process() { // TODO: We might be able to use this to fork and launch the process // (in which the entire python file will be reparsed...) ABORT_IF_NODYNAMORIO; @@ -14,40 +14,60 @@ pyda_thread* pyda_mk_process() { #else #include "dr_api.h" -pyda_thread* pyda_mk_process() { +pyda_process* pyda_mk_process() { ABORT_IF_NODYNAMORIO; - pyda_thread *process = dr_global_alloc(sizeof(pyda_thread)); - pthread_cond_init(&process->resume_cond, 0); - pthread_cond_init(&process->break_cond, 0); + pyda_process *proc = dr_global_alloc(sizeof(pyda_process)); + proc->refcount = 2; + proc->dirty_hooks = 0; + proc->main_thread = pyda_mk_thread(proc); + proc->callbacks = NULL; + proc->thread_init_hook = NULL; + return proc; +} + +pyda_thread* pyda_mk_thread(pyda_process *proc) { + ABORT_IF_NODYNAMORIO; + + pyda_thread *thread = dr_global_alloc(sizeof(pyda_thread)); + pthread_cond_init(&thread->resume_cond, 0); + pthread_cond_init(&thread->break_cond, 0); pthread_mutexattr_t attr; pthread_mutexattr_init(&attr); pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); - pthread_mutex_init(&process->mutex, &attr); + pthread_mutex_init(&thread->mutex, &attr); // Start with it locked... - pthread_mutex_lock(&process->mutex); + pthread_mutex_lock(&thread->mutex); - process->python_yielded = 0; - process->app_yielded = 0; - process->callbacks = NULL; - process->dirty_hooks = 0; - process->refcount = 2; + thread->python_yielded = 0; + thread->app_yielded = 0; + thread->proc = proc; + + static volatile unsigned int tid = 0; + thread->tid = dr_atomic_add32_return_sum(&tid, 1); + thread->rip_updated_in_cleancall = 0; + thread->skip_next_hook = 0; // PyErr_SetString(PyExc_RuntimeError, "OK"); - return process; + return thread; } -void pyda_process_destroy(pyda_thread *t) { - if (--t->refcount > 0) return; - - pyda_hook *cb = t->callbacks; +void pyda_process_destroy(pyda_process *p) { + pyda_hook *cb = p->callbacks; while (cb) { void *del = cb; cb = cb->next; dr_global_free(del, sizeof(pyda_hook)); } + dr_global_free(p, sizeof(pyda_process)); +} + +void pyda_thread_destroy(pyda_thread *t) { + if (--t->proc->refcount == 0) { + pyda_process_destroy(t->proc); + } dr_global_free(t, sizeof(pyda_thread)); } @@ -102,7 +122,7 @@ PyObject *pyda_run_until(pyda_thread *proc, uint64_t addr) { return NULL; } -void pyda_add_hook(pyda_thread *t, uint64_t addr, PyObject *callback) { +void pyda_add_hook(pyda_process *t, uint64_t addr, PyObject *callback) { pyda_hook *cb = dr_global_alloc(sizeof(pyda_hook)); cb->py_func = callback; cb->callback_type = 0; @@ -118,11 +138,36 @@ void pyda_add_hook(pyda_thread *t, uint64_t addr, PyObject *callback) { t->dirty_hooks = 1; } +void pyda_remove_hook(pyda_process *p, uint64_t addr) { + pyda_hook **cb = &p->callbacks; + while (*cb) { + if ((*cb)->callback_type == 0 && (*cb)->addr == (void*)addr) { + *cb = (*cb)->next; + break; + } + cb = &(*cb)->next; + } + + p->dirty_hooks = 1; +} + +void pyda_set_thread_init_hook(pyda_process *p, PyObject *callback) { + // TODO: hold some global lock here, just in case this gets called + // other than at startup + + if (p->thread_init_hook) + Py_DECREF(p->thread_init_hook); + + p->thread_init_hook = callback; + Py_INCREF(callback); +} + int pyda_flush_hooks() { pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); - if (!t->dirty_hooks) return 0; + pyda_process *p = t->proc; + if (!p->dirty_hooks) return 0; - pyda_hook *cb = t->callbacks; + pyda_hook *cb = p->callbacks; while (cb) { if (cb->callback_type == 0) { DEBUG_PRINTF("dr_flush_region: %llx\n", (void*)cb->addr); @@ -131,11 +176,11 @@ int pyda_flush_hooks() { } cb = cb->next; } - t->dirty_hooks = 0; + p->dirty_hooks = 0; return 1; } -pyda_hook* pyda_get_callback(pyda_thread *t, void* addr) { - pyda_hook *cb = t->callbacks; +pyda_hook* pyda_get_callback(pyda_process *p, void* addr) { + pyda_hook *cb = p->callbacks; while (cb) { if (cb->callback_type == 0 && cb->addr == addr) return cb; @@ -145,16 +190,21 @@ pyda_hook* pyda_get_callback(pyda_thread *t, void* addr) { } void pyda_hook_cleancall(pyda_hook *cb) { + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + if (t->skip_next_hook) { + t->skip_next_hook = 0; + return; + } + PyGILState_STATE gstate; gstate = PyGILState_Ensure(); - pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); - void *drcontext = dr_get_current_drcontext(); t->cur_context.size = sizeof(dr_mcontext_t); - t->cur_context.flags = DR_MC_INTEGER | DR_MC_CONTROL; // assuming SIMD doesnt exist + t->cur_context.flags = DR_MC_ALL; // dr_redirect_execution requires it dr_get_mcontext(drcontext, &t->cur_context); t->cur_context.pc = (app_pc)cb->addr; + t->rip_updated_in_cleancall = 0; PyObject *result = PyObject_CallFunctionObjArgs(cb->py_func, t->py_obj, NULL); if (result == NULL) { @@ -164,8 +214,24 @@ void pyda_hook_cleancall(pyda_hook *cb) { } Py_DECREF(result); - dr_set_mcontext(drcontext, &t->cur_context); - PyGILState_Release(gstate); + if (t->cur_context.pc == (app_pc)cb->addr && t->rip_updated_in_cleancall) { + if (t->rip_updated_in_cleancall) { + fprintf(stderr, "Hook updated RIP to the same address. This is UB. Aborting.\n"); + dr_abort(); + } + } + + if (pyda_flush_hooks() || t->rip_updated_in_cleancall) { + if (t->cur_context.pc == cb->addr) { + t->skip_next_hook = 1; + } + // we need to call dr_redirect_execution + PyGILState_Release(gstate); + dr_redirect_execution(&t->cur_context); + } else { + dr_set_mcontext(drcontext, &t->cur_context); + PyGILState_Release(gstate); + } } #endif \ No newline at end of file diff --git a/pyda_core/pyda_core.h b/pyda_core/pyda_core.h index c398333..737c80e 100644 --- a/pyda_core/pyda_core.h +++ b/pyda_core/pyda_core.h @@ -16,6 +16,7 @@ extern int is_dynamorio_running; typedef struct pyda_hook_s pyda_hook; typedef struct pyda_thread_s pyda_thread; +typedef struct pyda_process_s pyda_process; // Since we have multiple threads running, we need to keep track of // which one is holding the GIL. @@ -28,11 +29,17 @@ struct pyda_hook_s { pyda_hook *next; }; -struct pyda_thread_s { - unsigned long pid; - uint64_t register_state[32]; - +struct pyda_process_s { pyda_hook *callbacks; + int dirty_hooks; + int refcount; + + pyda_thread *main_thread; + PyObject *thread_init_hook; +}; + +struct pyda_thread_s { + unsigned long tid; pthread_cond_t resume_cond; pthread_cond_t break_cond; @@ -40,18 +47,24 @@ struct pyda_thread_s { int python_yielded, app_yielded; void* start_pc; - int dirty_hooks; - - int refcount; + pyda_process *proc; PyObject *py_obj; + + int rip_updated_in_cleancall; + int skip_next_hook; + #ifdef PYDA_DYNAMORIO_CLIENT dr_mcontext_t cur_context; #endif }; -pyda_thread* pyda_mk_process(); -void pyda_process_destroy(pyda_thread *t); +pyda_process* pyda_mk_process(); +pyda_thread* pyda_mk_thread(pyda_process*); + +void pyda_process_destroy(pyda_process *p); +void pyda_thread_destroy(pyda_thread *t); + PyObject *pyda_run_until(pyda_thread *, uint64_t addr); // yield from python to the executable @@ -61,8 +74,10 @@ void pyda_yield(pyda_thread *t); void pyda_break(pyda_thread *t); void pyda_initial_break(pyda_thread *t); -void pyda_add_hook(pyda_thread *t, uint64_t addr, PyObject *callback); -pyda_hook* pyda_get_callback(pyda_thread *t, void* addr); +void pyda_add_hook(pyda_process *p, uint64_t addr, PyObject *callback); +void pyda_remove_hook(pyda_process *p, uint64_t addr); +void pyda_set_thread_init_hook(pyda_process *p, PyObject *callback); +pyda_hook* pyda_get_callback(pyda_process *p, void* addr); // These can only be called from application threads int pyda_flush_hooks(); diff --git a/pyda_core/pyda_core_py.c b/pyda_core/pyda_core_py.c index 1af892d..c23b6e7 100644 --- a/pyda_core/pyda_core_py.c +++ b/pyda_core/pyda_core_py.c @@ -12,16 +12,22 @@ int is_dynamorio_running = 0; typedef struct { PyObject_HEAD - pyda_thread *t; + pyda_thread *main_thread; // main thread } PydaProcess; static PyObject* pyda_core_process(PyObject *self, PyObject *args, PyObject *kwargs); +static PyObject *pyda_list_modules(PyObject *self, PyObject *noarg); +static PyObject *pyda_get_base(PyObject *self, PyObject *args); +static PyObject *pyda_get_module_for_addr(PyObject *self, PyObject *args); +static PyObject *pyda_get_current_thread_id(PyObject *self, PyObject *noarg); + static void PydaProcess_dealloc(PydaProcess *self); -static PyObject *pyda_process_run(PyObject *self, PyObject *noarg); +static PyObject *PydaProcess_run(PyObject *self, PyObject *noarg); static PyObject *PydaProcess_register_hook(PyObject *self, PyObject *args); +static PyObject *PydaProcess_unregister_hook(PyObject *self, PyObject *args); +static PyObject *PydaProcess_set_thread_init_hook(PyObject *self, PyObject *args); static PyObject *PydaProcess_get_register(PyObject *self, PyObject *args); static PyObject *PydaProcess_set_register(PyObject *self, PyObject *args); -static PyObject *PydaProcess_get_base(PyObject *self, PyObject *args); static PyObject *PydaProcess_read(PyObject *self, PyObject *args); static PyObject *PydaProcess_write(PyObject *self, PyObject *args); static PyObject *PydaProcess_get_main_module(PyObject *self, PyObject *args); @@ -29,6 +35,14 @@ static PyObject *PydaProcess_get_main_module(PyObject *self, PyObject *args); static PyMethodDef PydaGlobalMethods[] = { {"process", (PyCFunction)pyda_core_process, METH_KEYWORDS | METH_VARARGS, "Start a process."}, + {"list_modules", (PyCFunction)pyda_list_modules, METH_NOARGS, + "List all the modules."}, + {"get_base", (PyCFunction)pyda_get_base, METH_VARARGS, + "Get base address for module"}, + {"get_module_for_addr", (PyCFunction)pyda_get_module_for_addr, METH_VARARGS, + "Get module info for addr"}, + {"get_current_thread_id", (PyCFunction)pyda_get_current_thread_id, METH_NOARGS, + "Get current thread id, numbered from 1"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -41,17 +55,30 @@ static struct PyModuleDef pyda_module = { PydaGlobalMethods }; +static PyObject *MemoryError; + PyMODINIT_FUNC PyInit_pyda_core(void) { - return PyModule_Create(&pyda_module); + PyObject *m = PyModule_Create(&pyda_module); + MemoryError = PyErr_NewException("pyda.MemoryError", NULL, NULL); + Py_XINCREF(MemoryError); + if (PyModule_AddObject(m, "MemoryError", MemoryError) < 0) { + Py_XDECREF(MemoryError); + Py_CLEAR(MemoryError); + Py_DECREF(m); + return NULL; + } + + return m; } /* Process class */ static PyMethodDef PydaProcessMethods[] = { - {"run", pyda_process_run, METH_NOARGS, "Run"}, - {"get_base", PydaProcess_get_base, METH_VARARGS, "Get base addr for image"}, + {"run", PydaProcess_run, METH_NOARGS, "Run"}, {"register_hook", PydaProcess_register_hook, METH_VARARGS, "Register a hook"}, + {"unregister_hook", PydaProcess_unregister_hook, METH_VARARGS, "Un-register a hook"}, + {"set_thread_init_hook", PydaProcess_set_thread_init_hook, METH_VARARGS, "Register thread init hook"}, {"get_register", PydaProcess_get_register, METH_VARARGS, "Get a specific register"}, {"set_register", PydaProcess_set_register, METH_VARARGS, "Set a specific register"}, {"get_main_module", PydaProcess_get_main_module, METH_VARARGS, "Get name of main module"}, @@ -88,22 +115,28 @@ pyda_core_process(PyObject *self, PyObject *args, PyObject *kwargs) { *(char*)(bin_path.buf + bin_path.len) = '\0'; + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + if (t != t->proc->main_thread) { + PyErr_SetString(PyExc_RuntimeError, "Only the main thread is currently allowed to call process()."); + return NULL; + } + PyType_Ready(&PydaProcess_Type); result = PyObject_NEW(PydaProcess, &PydaProcess_Type); if (result != NULL) - result->t = pyda_thread_getspecific(g_pyda_tls_idx); + result->main_thread = t; - result->t->py_obj = (PyObject*)result; + result->main_thread->py_obj = (PyObject*)result; PyBuffer_Release(&bin_path); return (PyObject*)result; } static PyObject * -pyda_process_run(PyObject* self, PyObject *noarg) { +PydaProcess_run(PyObject* self, PyObject *noarg) { PydaProcess *p = (PydaProcess*)self; Py_BEGIN_ALLOW_THREADS - pyda_yield(p->t); + pyda_yield(p->main_thread); #ifdef PYDA_DYNAMORIO_CLIENT DEBUG_PRINTF("yield returned\n"); #endif // PYDA_DYNAMORIO_CLIENT @@ -113,6 +146,34 @@ pyda_process_run(PyObject* self, PyObject *noarg) { return Py_None; } +static PyObject * +pyda_list_modules(PyObject* self, PyObject *noarg) { +#ifdef PYDA_DYNAMORIO_CLIENT + PyObject *list = PyList_New(0); + dr_module_iterator_t *iter = dr_module_iterator_start(); + while (dr_module_iterator_hasnext(iter)) { + module_data_t *mod = dr_module_iterator_next(iter); + PyList_Append(list, PyUnicode_FromString(mod->full_path)); + } + dr_module_iterator_stop(iter); + return list; +#endif // PYDA_DYNAMORIO_CLIENT + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +pyda_get_current_thread_id(PyObject* self, PyObject *noarg) { +#ifdef PYDA_DYNAMORIO_CLIENT + int tid = ((pyda_thread*)pyda_thread_getspecific(g_pyda_tls_idx))->tid; + return PyLong_FromLong(tid); +#endif // PYDA_DYNAMORIO_CLIENT + + Py_INCREF(Py_None); + return Py_None; +} + static void PydaProcess_dealloc(PydaProcess *self) { @@ -122,6 +183,7 @@ PydaProcess_dealloc(PydaProcess *self) static PyObject * PydaProcess_get_register(PyObject *self, PyObject *args) { PydaProcess *p = (PydaProcess*)self; + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); const char *regname; @@ -130,7 +192,7 @@ PydaProcess_get_register(PyObject *self, PyObject *args) { #ifdef PYDA_DYNAMORIO_CLIENT // DEBUG_PRINTF("get_register: %s\n", regname); - dr_mcontext_t *mc = &p->t->cur_context; + dr_mcontext_t *mc = &t->cur_context; // TODO: Fix... copilot wrote this. Surely we can write // a macro... @@ -166,6 +228,8 @@ PydaProcess_get_register(PyObject *self, PyObject *args) { return PyLong_FromUnsignedLong((unsigned long)mc->r15); } else if (strcmp(regname, "rdx") == 0) { return PyLong_FromUnsignedLong((unsigned long)mc->rdx); + } else if (strcmp(regname, "fsbase") == 0) { + return PyLong_FromUnsignedLong((unsigned long)dr_get_tls_field(dr_get_current_drcontext())); } else if (strcmp(regname, "rip") == 0 || strcmp(regname, "pc") == 0) { return PyLong_FromUnsignedLong((unsigned long)mc->pc); } @@ -178,6 +242,7 @@ PydaProcess_get_register(PyObject *self, PyObject *args) { static PyObject * PydaProcess_set_register(PyObject *self, PyObject *args) { PydaProcess *p = (PydaProcess*)self; + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); const char *regname; unsigned long long val; @@ -189,7 +254,7 @@ PydaProcess_set_register(PyObject *self, PyObject *args) { #ifdef PYDA_DYNAMORIO_CLIENT DEBUG_PRINTF("set_register: %s %llx\n", regname, val); // DEBUG_PRINTF("get_register: %s\n", regname); - dr_mcontext_t *mc = &p->t->cur_context; + dr_mcontext_t *mc = &t->cur_context; // TODO: Fix... copilot wrote this. Surely we can write // a macro... @@ -226,9 +291,8 @@ PydaProcess_set_register(PyObject *self, PyObject *args) { } else if (strcmp(regname, "rdx") == 0) { mc->rdx = val; } else if (strcmp(regname, "rip") == 0 || strcmp(regname, "pc") == 0) { - // mc->pc = val; - PyErr_SetString(PyExc_RuntimeError, "Setting rip is currently not supported"); - return NULL; + mc->pc = (void*)val; + t->rip_updated_in_cleancall = 1; } #endif // PYDA_DYNAMORIO_CLIENT @@ -240,7 +304,6 @@ static PyObject * PydaProcess_register_hook(PyObject *self, PyObject *args) { PydaProcess *p = (PydaProcess*)self; - const char *name; unsigned long long addr; PyObject *callback; @@ -257,7 +320,51 @@ PydaProcess_register_hook(PyObject *self, PyObject *args) { DEBUG_PRINTF("register_hook: %llx\n", addr); #endif // PYDA_DYNAMORIO_CLIENT Py_INCREF(callback); - pyda_add_hook(p->t, addr, callback); + pyda_add_hook(p->main_thread->proc, addr, callback); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +PydaProcess_set_thread_init_hook(PyObject *self, PyObject *args) { + PydaProcess *p = (PydaProcess*)self; + + PyObject *callback; + + if (!PyArg_ParseTuple(args, "O!", &PyFunction_Type, &callback)) + return NULL; + + PyCodeObject *code = (PyCodeObject*)PyFunction_GetCode(callback); + if (!code || code->co_argcount != 1) { + PyErr_SetString(PyExc_RuntimeError, "Callback must take one argument"); + return NULL; + } + +#ifdef PYDA_DYNAMORIO_CLIENT + DEBUG_PRINTF("set_thread_init_hook\n"); +#endif // PYDA_DYNAMORIO_CLIENT + Py_INCREF(callback); + pyda_set_thread_init_hook(p->main_thread->proc, callback); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +PydaProcess_unregister_hook(PyObject *self, PyObject *args) { + PydaProcess *p = (PydaProcess*)self; + + unsigned long long addr; + + if (!PyArg_ParseTuple(args, "K", &addr)) { + return NULL; + } + +#ifdef PYDA_DYNAMORIO_CLIENT + DEBUG_PRINTF("unregister_hook: %llx\n", addr); +#endif // PYDA_DYNAMORIO_CLIENT + pyda_remove_hook(p->main_thread->proc, addr); Py_INCREF(Py_None); return Py_None; @@ -292,7 +399,7 @@ PydaProcess_get_main_module(PyObject *self, PyObject *args) { static PyObject * -PydaProcess_get_base(PyObject *self, PyObject *args) { +pyda_get_base(PyObject *self, PyObject *args) { const char *name; Py_buffer bin_path; @@ -328,6 +435,59 @@ PydaProcess_get_base(PyObject *self, PyObject *args) { return (PyObject*)result; } +static PyObject * +pyda_get_module_for_addr(PyObject *self, PyObject *args) { + unsigned long addr; + if (!PyArg_ParseTuple(args, "K", &addr)) + return NULL; + + PyObject *result = NULL; + +#ifdef PYDA_DYNAMORIO_CLIENT + unsigned char *base; + size_t size; + unsigned int prot; + unsigned long perms = 0; + if (dr_query_memory((void*)addr, &base, &size, &prot)) { + if (prot & DR_MEMPROT_READ) { + perms |= 4; + } + if (prot & DR_MEMPROT_WRITE) { + perms |= 2; + } + if (prot & DR_MEMPROT_EXEC) { + perms |= 1; + } + } + + result = PyList_New(0); + module_data_t *mod = dr_lookup_module((void*)addr); + if (mod) { + PyList_Append(result, PyUnicode_FromString(mod->full_path)); + PyList_Append(result, PyLong_FromUnsignedLong((unsigned long)mod->start)); + PyList_Append(result, PyLong_FromUnsignedLong((unsigned long)mod->end)); + PyList_Append(result, PyLong_FromUnsignedLong(perms)); + + dr_free_module_data(mod); + return result; + } else { + PyList_Append(result, PyUnicode_FromString("unknown")); + PyList_Append(result, PyLong_FromUnsignedLong((unsigned long)base)); + PyList_Append(result, PyLong_FromUnsignedLong((unsigned long)base + size)); + PyList_Append(result, PyLong_FromUnsignedLong(perms)); + return result; + } + +#else + PyErr_SetString(PyExc_RuntimeError, "Not implemented outside of dynamorio"); + return NULL; +#endif + + Py_INCREF(Py_None); + return Py_None; +} + + static PyObject * PydaProcess_read(PyObject *self, PyObject *args) { PydaProcess *p = (PydaProcess*)self; @@ -348,7 +508,7 @@ PydaProcess_read(PyObject *self, PyObject *args) { void *buf = malloc(count); int success = dr_safe_read((void*)addr, count, buf, NULL); if (!success) { - PyErr_SetString(PyExc_RuntimeError, "Failed to read memory"); + PyErr_SetString(MemoryError, "Failed to read memory"); free(buf); return NULL; } diff --git a/pyda_core/tool.c b/pyda_core/tool.c index 60bc378..89bccf5 100644 --- a/pyda_core/tool.c +++ b/pyda_core/tool.c @@ -12,8 +12,9 @@ #include "pyda_core.h" #include "pyda_threads.h" -void python_thread(); void python_init(); +void python_main_thread(void*); +void python_aux_thread(void*); void module_load_event(void *drcontext, const module_data_t *mod, bool loaded); void thread_init_event(void *drcontext); void thread_exit_event(void *drcontext); @@ -80,24 +81,37 @@ void thread_init_event(void *drcontext) { DEBUG_PRINTF("thread_init_event\n"); // Make a thread structure - pyda_thread *t = pyda_mk_process(); + static pyda_process *global_proc = NULL; + pyda_thread *t; + if (!global_proc) { + global_proc = pyda_mk_process(); + t = global_proc->main_thread; + } else { + t = pyda_mk_thread(global_proc); + } + drmgr_set_tls_field(drcontext, g_pyda_tls_idx, (void*)t); // Every thread has its own corresponding python thread - python_init(); - dr_create_client_thread(python_thread, t); + if (t == global_proc->main_thread) { + python_init(); + dr_create_client_thread(python_main_thread, t); + } else { + dr_create_client_thread(python_aux_thread, t); + } // Store the first pc, we will intrument it to call break - static bool is_main_thread = true; - if (is_main_thread) { + if (t == global_proc->main_thread) { module_data_t *main_mod = dr_get_main_module(); t->start_pc = (void*)main_mod->entry_point; } else { dr_mcontext_t mc; mc.size = sizeof(mc); - mc.flags = DR_MC_INTEGER; + mc.flags = DR_MC_ALL; dr_get_mcontext(drcontext, &mc); t->start_pc = (void*)mc.rip; + DEBUG_PRINTF("start_pc: %p\n", t->start_pc); + dr_flush_region(t->start_pc, 1); } DEBUG_PRINTF("thread_init_event: %p\n", t->start_pc); } @@ -111,7 +125,7 @@ void thread_exit_event(void *drcontext) { pyda_break(t); DEBUG_PRINTF("broke end\n", t); // TODO: exit event - pyda_process_destroy(t); + pyda_thread_destroy(t); } void python_init() { @@ -175,11 +189,14 @@ event_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr, { pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); pyda_hook *callback; + + // XXX: I don't think this is safe, since the thread that updates + // the code cache may not be the executing thread. if (instr_get_app_pc(instr) == t->start_pc) { DEBUG_PRINTF("** Found PC\n"); dr_insert_clean_call(drcontext, bb, instrlist_first_app(bb), (void *)thread_entrypoint_break, false /* save fpstate */, 0); - } else if ((callback = pyda_get_callback(t, instr_get_app_pc(instr)))) { + } else if ((callback = pyda_get_callback(t->proc, instr_get_app_pc(instr)))) { DEBUG_PRINTF("installing hook at %p\n", instr_get_app_pc(instr)); dr_insert_clean_call(drcontext, bb, instr, (void *)pyda_hook_cleancall, false /* save fpstate */, 1, OPND_CREATE_INTPTR(callback)); @@ -191,6 +208,8 @@ static void thread_entrypoint_break() { DEBUG_PRINTF("entrypoint (break)\n"); pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + fprintf(stderr, "[PYDA] New thread %ld\n", t->tid); + pyda_initial_break(t); if (pyda_flush_hooks()) { DEBUG_PRINTF("dr_flush_hooks\n"); @@ -212,9 +231,8 @@ static void thread_entrypoint_break() { void __ctype_init(); void drmgr_thread_init_event(void*); -void adjust_wait_at_safe_spot(void *dcontext, int amt); -void python_thread(pyda_thread *t) { +static void* python_thread_init(pyda_thread *t) { __ctype_init(); void *drcontext = dr_get_current_drcontext(); @@ -224,6 +242,13 @@ void python_thread(pyda_thread *t) { dr_client_thread_set_suspendable(false); pyda_thread_setspecific(g_pyda_tls_idx, (void*)t); + return tls; +} + +void python_main_thread(void *arg) { + pyda_thread *t = arg; + void *drcontext = dr_get_current_drcontext(); + void *tls = python_thread_init(t); pthread_mutex_lock(&python_thread_init1_mutex); PyGILState_STATE gstate; @@ -261,6 +286,27 @@ void python_thread(pyda_thread *t) { dr_thread_free(drcontext, tls, sizeof(void*) * 130); DEBUG_PRINTF("Calling dr_exit\n"); - pyda_process_destroy(t); + // pyda_thread_destroy(t); drmgr_exit(); +} + +void python_aux_thread(void *arg) { + pyda_thread *t = arg; + void *drcontext = dr_get_current_drcontext(); + void *tls = python_thread_init(t); + + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + // We just call the thread init hook, if one exists + if (t->proc->thread_init_hook) { + PyObject *result = PyObject_CallFunctionObjArgs(t->proc->thread_init_hook, t->proc->main_thread->py_obj, NULL); + } + + PyGILState_Release(gstate); + + dr_client_thread_set_suspendable(true); + pyda_yield(t); // unblock + + dr_thread_free(drcontext, tls, sizeof(void*) * 130); } \ No newline at end of file