Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental Landlock based sandboxing #597

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
DEFAULT_SKIP_MAGIC,
ExtractionConfig,
)
from unblob.testing import is_sandbox_available
from unblob.ui import (
NullProgressReporter,
ProgressReporter,
Expand Down Expand Up @@ -425,3 +426,31 @@ def test_clear_skip_magics(
assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted(
skip_magic
), fail_message


@pytest.mark.skipif(
not is_sandbox_available(), reason="Sandboxing is only available on Linux"
)
def test_sandbox_escape(tmp_path: Path):
runner = CliRunner()

in_path = tmp_path / "input"
in_path.touch()
extract_dir = tmp_path / "extract-dir"
params = ["--extract-dir", str(extract_dir), str(in_path)]

unrelated_file = tmp_path / "unrelated"
assert not unrelated_file.exists()

process_file_mock = mock.MagicMock(
side_effect=lambda *_args, **_kwargs: unrelated_file.write_text(
"sandbox escape"
)
)
with mock.patch.object(unblob.cli, "process_file", process_file_mock):
result = runner.invoke(unblob.cli.cli, params)

assert result.exit_code != 0
assert isinstance(result.exception, PermissionError)
assert not unrelated_file.exists()
process_file_mock.assert_called_once()
e3krisztian marked this conversation as resolved.
Show resolved Hide resolved
85 changes: 85 additions & 0 deletions tests/test_sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from pathlib import Path

import pytest

from unblob.processing import ExtractionConfig
from unblob.sandbox import Sandbox
from unblob.testing import is_sandbox_available

pytestmark = pytest.mark.skipif(
not is_sandbox_available(), reason="Sandboxing only works on Linux"
)


@pytest.fixture
def log_path(tmp_path):
return tmp_path / "unblob.log"


@pytest.fixture
def extraction_config(extraction_config, tmp_path):
extraction_config.extract_root = tmp_path / "extract" / "root"
# parent has to exist
extraction_config.extract_root.parent.mkdir()
return extraction_config


@pytest.fixture
def sandbox(extraction_config: ExtractionConfig, log_path: Path):
return Sandbox(extraction_config, log_path, None)


def test_necessary_resources_can_be_created_in_sandbox(
sandbox: Sandbox, extraction_config: ExtractionConfig, log_path: Path
):
directory_in_extract_root = extraction_config.extract_root / "path" / "to" / "dir"
file_in_extract_root = directory_in_extract_root / "file"

assert not extraction_config.extract_root.exists()
sandbox.run(extraction_config.extract_root.mkdir, parents=True)
assert extraction_config.extract_root.exists()

assert not directory_in_extract_root.exists()
sandbox.run(directory_in_extract_root.mkdir, parents=True)
assert directory_in_extract_root.exists()

assert not file_in_extract_root.exists()
sandbox.run(file_in_extract_root.touch)
assert file_in_extract_root.exists()

sandbox.run(file_in_extract_root.write_text, "file content")
assert file_in_extract_root.read_text() == "file content"

# log-file is already opened
log_path.touch()
sandbox.run(log_path.write_text, "log line")
e3krisztian marked this conversation as resolved.
Show resolved Hide resolved
assert log_path.read_text() == "log line"


def test_access_outside_sandbox_is_not_possible(sandbox: Sandbox, tmp_path: Path):
unrelated_dir = tmp_path / "unrelated" / "path"
unrelated_file = tmp_path / "unrelated-file"

assert not unrelated_dir.exists()
with pytest.raises(PermissionError):
sandbox.run(unrelated_dir.mkdir, parents=True)
assert not unrelated_dir.exists()

unrelated_dir.mkdir(parents=True)
with pytest.raises(PermissionError):
sandbox.run(unrelated_dir.rmdir)
assert unrelated_dir.exists()

assert not unrelated_file.exists()
with pytest.raises(PermissionError):
sandbox.run(unrelated_file.touch)
e3krisztian marked this conversation as resolved.
Show resolved Hide resolved
assert not unrelated_file.exists()

unrelated_file.write_text("file content")
with pytest.raises(PermissionError):
sandbox.run(unrelated_file.write_text, "overwrite attempt")
assert unrelated_file.read_text() == "file content"

with pytest.raises(PermissionError):
sandbox.run(unrelated_file.unlink)
assert unrelated_file.exists()
4 changes: 3 additions & 1 deletion unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
ExtractionConfig,
process_file,
)
from .sandbox import Sandbox
from .ui import NullProgressReporter, RichConsoleProgressReporter

logger = get_logger()
Expand Down Expand Up @@ -301,7 +302,8 @@ def cli(
)

logger.info("Start processing file", file=file)
process_results = process_file(config, file, report_file)
sandbox = Sandbox(config, log_path, report_file)
process_results = sandbox.run(process_file, config, file, report_file)
if verbose == 0:
if skip_extraction:
print_scan_report(process_results)
Expand Down
7 changes: 3 additions & 4 deletions unblob/pool.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This went into the wrong commit...

Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,9 @@ def make_pool(process_num, handler, result_callback) -> Union[SinglePool, MultiP


def _on_terminate(signum, frame):
with contextlib.suppress(StopIteration):
while True:
pool = next(iter(pools))
pool.close(immediate=True)
pools_snapshot = list(pools)
for pool in pools_snapshot:
pool.close(immediate=True)

if callable(orig_signal_handlers[signum]):
Fixed Show fixed Hide fixed
orig_signal_handlers[signum](signum, frame)
Expand Down
118 changes: 118 additions & 0 deletions unblob/sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import ctypes
import sys
import threading
from pathlib import Path
from typing import Callable, Iterable, Optional, Type, TypeVar

from structlog import get_logger
from unblob_native.sandbox import (
AccessFS,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this comes from Landlock terminology, but this AccessFS name looks so alien to me (is it a file-system?).
I would use FSAccess, if possible.

SandboxError,
restrict_access,
)

if sys.version_info >= (3, 10):
from typing import ParamSpec
else:
from typing_extensions import ParamSpec

from unblob.processing import ExtractionConfig

logger = get_logger()

P = ParamSpec("P")
R = TypeVar("R")


class Sandbox:
"""Configures restricted file-systems to run functions in.

When calling ``run()``, a separate thread will be configured with
minimum required file-system permissions. All subprocesses spawned
from that thread will honor the restrictions.
"""

def __init__(
self,
config: ExtractionConfig,
log_path: Path,
report_file: Optional[Path],
extra_restrictions: Iterable[AccessFS] = (),
):
self.restrictions = [
# Python, shared libraries, extractor binaries and so on
AccessFS.read("/"),
e3krisztian marked this conversation as resolved.
Show resolved Hide resolved
# Multiprocessing
AccessFS.read_write("/dev/shm"), # noqa: S108
# Extracted contents
AccessFS.read_write(config.extract_root),
AccessFS.make_dir(config.extract_root.parent),
AccessFS.read_write(log_path),
*extra_restrictions,
]

if report_file:
self.restrictions += [
AccessFS.read_write(report_file),
AccessFS.make_reg(report_file.parent),
]
Comment on lines +40 to +58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have a deny-all, then allow exceptions mechanism, maybe a more specific name could be used.

Suggested change
extra_restrictions: Iterable[AccessFS] = (),
):
self.restrictions = [
# Python, shared libraries, extractor binaries and so on
AccessFS.read("/"),
# Multiprocessing
AccessFS.read_write("/dev/shm"), # noqa: S108
# Extracted contents
AccessFS.read_write(config.extract_root),
AccessFS.make_dir(config.extract_root.parent),
AccessFS.read_write(log_path),
*extra_restrictions,
]
if report_file:
self.restrictions += [
AccessFS.read_write(report_file),
AccessFS.make_reg(report_file.parent),
]
extra_passthrough: Iterable[AccessFS] = (),
):
self.passthrough = [
# Python, shared libraries, extractor binaries and so on
AccessFS.read("/"),
# Multiprocessing
AccessFS.read_write("/dev/shm"), # noqa: S108
# Extracted contents
AccessFS.read_write(config.extract_root),
AccessFS.make_dir(config.extract_root.parent),
AccessFS.read_write(log_path),
*extra_passthrough,
]
if report_file:
self.passthrough += [
AccessFS.read_write(report_file),
AccessFS.make_reg(report_file.parent),
]


def run(self, callback: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
"""Run callback with restricted filesystem access."""
exception = None
result = None

def _run_in_thread(callback, *args, **kwargs):
nonlocal exception, result

self._try_enter_sandbox()
try:
result = callback(*args, **kwargs)
except BaseException as e:

Check notice

Code scanning / CodeQL

Except block handles 'BaseException' Note

Except block directly handles BaseException.
qkaiser marked this conversation as resolved.
Show resolved Hide resolved
exception = e

thread = threading.Thread(
target=_run_in_thread, args=(callback, *args), kwargs=kwargs
)
thread.start()

try:
thread.join()
except KeyboardInterrupt:
raise_in_thread(thread, KeyboardInterrupt)
thread.join()

if exception:
raise exception # pyright: ignore[reportGeneralTypeIssues]
return result # pyright: ignore[reportReturnType]

def _try_enter_sandbox(self):
try:
restrict_access(*self.restrictions)
except SandboxError:
logger.warning(
"Sandboxing FS access is unavailable on this system, skipping."
)


def raise_in_thread(thread: threading.Thread, exctype: Type) -> None:
if thread.ident is None:
raise RuntimeError("Thread is not started")

res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
ctypes.c_ulong(thread.ident), ctypes.py_object(exctype)
)

# success
if res == 1:
return

# Need to revert the call to restore interpreter state
ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_ulong(thread.ident), None)

# Thread could have exited since
if res == 0:
return

# Something bad have happened
raise RuntimeError("Could not raise exception in thread", thread.ident)
16 changes: 16 additions & 0 deletions unblob/testing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import binascii
import glob
import io
import platform
import shlex
import subprocess
from pathlib import Path
Expand All @@ -10,6 +11,7 @@
from lark.lark import Lark
from lark.visitors import Discard, Transformer
from pytest_cov.embed import cleanup_on_sigterm
from unblob_native.sandbox import AccessFS, SandboxError, restrict_access

from unblob.finder import build_hyperscan_database
from unblob.logging import configure_logger
Expand Down Expand Up @@ -217,3 +219,17 @@ def start(self, s):
rv.write(line.data)

return rv.getvalue()


def is_sandbox_available():
is_sandbox_available = True

try:
restrict_access(AccessFS.read_write("/"))
except SandboxError:
is_sandbox_available = False

if platform.architecture == "x86_64" and platform.system == "linux":
assert is_sandbox_available, "Sandboxing should work at least on Linux-x86_64"

return is_sandbox_available