diff --git a/poetry.lock b/poetry.lock index 73637c0dd6..2c0319dea8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1986,4 +1986,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "e507ec0a98eba805833df08713159004b89bfbe686bba645a082b848e4c3d1c4" +content-hash = "d195fd28a300eef5ffff741baa6351c0754c77aeba0a6129c53d98a39916f7a7" diff --git a/pyproject.toml b/pyproject.toml index e0338a36dd..f73bfd4df3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ lz4 = "^4.3.2" lief = "^0.15.1" cryptography = ">=41.0,<44.0" treelib = "^1.7.0" -unblob-native = "^0.1.1" +unblob-native = "^0.1.2" jefferson = "^0.4.5" rich = "^13.3.5" pyfatfs = "^1.0.5" diff --git a/tests/test_cli.py b/tests/test_cli.py index 3a00145889..5c2fc565fc 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -425,3 +425,26 @@ def test_clear_skip_magics( assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted( skip_magic ), fail_message + + +def test_sandbox_escape(tmp_path: Path): + runner = CliRunner() + + in_path = tmp_path / "input" + in_path.touch() + extract_dir = tmp_path / "extract-dir" + params = ["--extract-dir", str(extract_dir), str(in_path)] + + unrelated_file = tmp_path / "unrelated" + + process_file_mock = mock.MagicMock( + side_effect=lambda *_args, **_kwargs: unrelated_file.write_text( + "sandbox escape" + ) + ) + with mock.patch.object(unblob.cli, "process_file", process_file_mock): + result = runner.invoke(unblob.cli.cli, params) + + assert result.exit_code != 0 + assert isinstance(result.exception, PermissionError) + process_file_mock.assert_called_once() diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py new file mode 100644 index 0000000000..8c4b380c41 --- /dev/null +++ b/tests/test_sandbox.py @@ -0,0 +1,52 @@ +from pathlib import Path + +import pytest + +from unblob.processing import ExtractionConfig +from unblob.sandbox import Sandbox + + +@pytest.fixture +def log_path(tmp_path): + return tmp_path / "unblob.log" + + +@pytest.fixture +def extraction_config(extraction_config, tmp_path): + extraction_config.extract_root = tmp_path / "extract" / "root" + # parent has to exist + extraction_config.extract_root.parent.mkdir() + return extraction_config + + +@pytest.fixture +def sandbox(extraction_config: ExtractionConfig, log_path: Path): + return Sandbox(extraction_config, log_path, None) + + +def test_necessary_resources_can_be_created_in_sandbox( + sandbox: Sandbox, extraction_config: ExtractionConfig, log_path: Path +): + directory_in_extract_root = extraction_config.extract_root / "path" / "to" / "dir" + file_in_extract_root = directory_in_extract_root / "file" + + sandbox.run(extraction_config.extract_root.mkdir, parents=True) + sandbox.run(directory_in_extract_root.mkdir, parents=True) + + sandbox.run(file_in_extract_root.touch) + sandbox.run(file_in_extract_root.write_text, "file content") + + # log-file is already opened + log_path.touch() + sandbox.run(log_path.write_text, "log line") + + +def test_access_outside_sandbox_is_not_possible(sandbox: Sandbox, tmp_path: Path): + unrelated_dir = tmp_path / "unrelated" / "path" + unrelated_file = tmp_path / "unrelated-file" + + with pytest.raises(PermissionError): + sandbox.run(unrelated_dir.mkdir, parents=True) + + with pytest.raises(PermissionError): + sandbox.run(unrelated_file.touch) diff --git a/unblob/cli.py b/unblob/cli.py index cb275e809c..498644e078 100755 --- a/unblob/cli.py +++ b/unblob/cli.py @@ -33,6 +33,7 @@ ExtractionConfig, process_file, ) +from .sandbox import Sandbox from .ui import NullProgressReporter, RichConsoleProgressReporter logger = get_logger() @@ -301,7 +302,8 @@ def cli( ) logger.info("Start processing file", file=file) - process_results = process_file(config, file, report_file) + sandbox = Sandbox(config, log_path, report_file) + process_results = sandbox.run(process_file, config, file, report_file) if verbose == 0: if skip_extraction: print_scan_report(process_results) diff --git a/unblob/processing.py b/unblob/processing.py index 1f9c74432b..ac93c20a1c 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -110,7 +110,6 @@ def get_extract_dir_for(self, path: Path) -> Path: return extract_dir.expanduser().resolve() -@terminate_gracefully def process_file( config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None ) -> ProcessResult: diff --git a/unblob/sandbox.py b/unblob/sandbox.py new file mode 100644 index 0000000000..3608b5bf7f --- /dev/null +++ b/unblob/sandbox.py @@ -0,0 +1,118 @@ +import ctypes +import sys +import threading +from pathlib import Path +from typing import Callable, Iterable, Optional, Type, TypeVar + +from structlog import get_logger +from unblob_native.sandbox import ( + AccessFS, + SandboxError, + restrict_access, +) + +if sys.version_info >= (3, 10): + from typing import ParamSpec +else: + from typing_extensions import ParamSpec + +from unblob.processing import ExtractionConfig + +logger = get_logger() + +P = ParamSpec("P") +R = TypeVar("R") + + +class Sandbox: + """Configures restricted file-systems to run functions in. + + When calling ``run()``, a separate thread will be configured with + minimum required file-system permissions. All subprocesses spawned + from that thread will honor the restrictions. + """ + + def __init__( + self, + config: ExtractionConfig, + log_path: Path, + report_file: Optional[Path], + extra_restrictions: Iterable[AccessFS] = (), + ): + self.restrictions = [ + # Python, shared libraries, extractor binaries and so on + AccessFS.read("/"), + # Multiprocessing + AccessFS.read_write("/dev/shm"), # noqa: S108 + # Extracted contents + AccessFS.read_write(config.extract_root), + AccessFS.make_dir(config.extract_root.parent), + AccessFS.read_write(log_path), + *extra_restrictions, + ] + + if report_file: + self.restrictions += [ + AccessFS.read_write(report_file), + AccessFS.make_reg(report_file.parent), + ] + + def run(self, callback: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R: + """Run callback with restricted filesystem access.""" + exception = None + result = None + + def _run_in_thread(callback, *args, **kwargs): + nonlocal exception, result + + self._try_enter_sandbox() + try: + result = callback(*args, **kwargs) + except BaseException as e: + exception = e + + thread = threading.Thread( + target=_run_in_thread, args=(callback, *args), kwargs=kwargs + ) + thread.start() + + try: + thread.join() + except KeyboardInterrupt: + raise_in_thread(thread, KeyboardInterrupt) + thread.join() + + if exception: + raise exception # pyright: ignore[reportGeneralTypeIssues] + return result # pyright: ignore[reportReturnType] + + def _try_enter_sandbox(self): + try: + restrict_access(*self.restrictions) + except SandboxError: + logger.warning( + "Sandboxing FS access is unavailable on this system, skipping." + ) + + +def raise_in_thread(thread: threading.Thread, exctype: Type) -> None: + if thread.ident is None: + raise RuntimeError("Thread is not started") + + res = ctypes.pythonapi.PyThreadState_SetAsyncExc( + ctypes.c_ulong(thread.ident), ctypes.py_object(exctype) + ) + + # success + if res == 1: + return + + # Need to revert the call to restore interpreter state + ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_ulong(thread.ident), None) + + # Thread could have exited since + if res == 0: + return + + # Something bad have happened + raise RuntimeError("Could not raise exception in thread", thread.ident)