Skip to content

Commit

Permalink
Merge pull request #16729 from mvdbeek/fix_allowlist_deserialization
Browse files Browse the repository at this point in the history
[23.1] Fix allowlist deserialization in file sources
  • Loading branch information
mvdbeek authored Sep 24, 2023
2 parents 54f3769 + 6341c7c commit 73ad4f4
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 28 deletions.
10 changes: 2 additions & 8 deletions lib/galaxy/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# absolute_import needed for tool_shed package.

import configparser
import ipaddress
import json
import locale
import logging
Expand Down Expand Up @@ -42,6 +41,7 @@
string_as_bool,
unicodify,
)
from galaxy.util.config_parsers import parse_allowlist_ips
from galaxy.util.custom_logging import LOGLV_TRACE
from galaxy.util.dynamic import HasDynamicProperties
from galaxy.util.facts import get_facts
Expand Down Expand Up @@ -906,13 +906,7 @@ def _process_config(self, kwargs: Dict[str, Any]) -> None:
self.tool_secret = kwargs.get("tool_secret", "")
self.metadata_strategy = kwargs.get("metadata_strategy", "directory")
self.use_remote_user = self.use_remote_user or self.single_user
self.fetch_url_allowlist_ips = [
ipaddress.ip_network(unicodify(ip.strip())) # If it has a slash, assume 127.0.0.1/24 notation
if "/" in ip
else ipaddress.ip_address(unicodify(ip.strip())) # Otherwise interpret it as an ip address.
for ip in kwargs.get("fetch_url_allowlist", "").split(",")
if len(ip.strip()) > 0
]
self.fetch_url_allowlist_ips = parse_allowlist_ips(listify(kwargs.get("fetch_url_allowlist")))
self.job_queue_cleanup_interval = int(kwargs.get("job_queue_cleanup_interval", "5"))

# Fall back to legacy job_working_directory config variable if set.
Expand Down
15 changes: 8 additions & 7 deletions lib/galaxy/files/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from galaxy import exceptions
from galaxy.util import plugin_config
from galaxy.util.config_parsers import parse_allowlist_ips
from galaxy.util.dictifiable import Dictifiable

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -235,12 +236,12 @@ def from_app_config(config):
# Formalize what we read in from config to create a more clear interface
# for this component.
kwds = {}
kwds["symlink_allowlist"] = getattr(config, "user_library_import_symlink_allowlist", [])
kwds["fetch_url_allowlist"] = getattr(config, "fetch_url_allowlist", [])
kwds["library_import_dir"] = getattr(config, "library_import_dir", None)
kwds["user_library_import_dir"] = getattr(config, "user_library_import_dir", None)
kwds["ftp_upload_dir"] = getattr(config, "ftp_upload_dir", None)
kwds["ftp_upload_purge"] = getattr(config, "ftp_upload_purge", True)
kwds["symlink_allowlist"] = config.user_library_import_symlink_allowlist
kwds["fetch_url_allowlist"] = [str(ip) for ip in config.fetch_url_allowlist_ips]
kwds["library_import_dir"] = config.library_import_dir
kwds["user_library_import_dir"] = config.user_library_import_dir
kwds["ftp_upload_dir"] = config.ftp_upload_dir
kwds["ftp_upload_purge"] = config.ftp_upload_purge
return ConfiguredFileSourcesConfig(**kwds)

def to_dict(self):
Expand All @@ -257,7 +258,7 @@ def to_dict(self):
def from_dict(as_dict):
return ConfiguredFileSourcesConfig(
symlink_allowlist=as_dict["symlink_allowlist"],
fetch_url_allowlist=as_dict["fetch_url_allowlist"],
fetch_url_allowlist=parse_allowlist_ips(as_dict["fetch_url_allowlist"]),
library_import_dir=as_dict["library_import_dir"],
user_library_import_dir=as_dict["user_library_import_dir"],
ftp_upload_dir=as_dict["ftp_upload_dir"],
Expand Down
13 changes: 12 additions & 1 deletion lib/galaxy/files/sources/drs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,21 @@ def __init__(self, **kwd: Unpack[FilesSourceProperties]):
self._force_http = props.pop("force_http", False)
self._props = props

@property
def _allowlist(self):
return self._file_sources_config.fetch_url_allowlist

def _realize_to(self, source_path, native_path, user_context=None, opts: Optional[FilesSourceOptions] = None):
props = self._serialization_props(user_context)
headers = props.pop("http_headers", {}) or {}
fetch_drs_to_file(source_path, native_path, user_context, headers=headers, force_http=self._force_http)
fetch_drs_to_file(
source_path,
native_path,
user_context,
fetch_url_allowlist=self._allowlist,
headers=headers,
force_http=self._force_http,
)

def _write_from(self, target_path, native_path, user_context=None, opts: Optional[FilesSourceOptions] = None):
raise NotImplementedError()
Expand Down
5 changes: 4 additions & 1 deletion lib/galaxy/files/sources/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import (
cast,
Dict,
List,
Optional,
)

Expand All @@ -15,6 +16,7 @@
get_charset_from_http_headers,
stream_to_open_named_file,
)
from galaxy.util.config_parsers import IpAllowedListEntryT
from . import (
BaseFilesSource,
FilesSourceOptions,
Expand All @@ -27,6 +29,7 @@
class HTTPFilesSourceProperties(FilesSourceProperties, total=False):
url_regex: str
http_headers: Dict[str, str]
fetch_url_allowlist: List[IpAllowedListEntryT]


class HTTPFilesSource(BaseFilesSource):
Expand Down Expand Up @@ -61,7 +64,7 @@ def _realize_to(

with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT) as page:
# Verify url post-redirects is still allowlisted
validate_non_local(page.geturl(), self._allowlist)
validate_non_local(page.geturl(), self._allowlist or extra_props.get("fetch_url_allowlist") or [])
f = open(native_path, "wb") # fd will be .close()ed in stream_to_open_named_file
return stream_to_open_named_file(
page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)
Expand Down
9 changes: 2 additions & 7 deletions lib/galaxy/files/uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import (
List,
Optional,
Union,
)
from urllib.parse import urlparse

Expand All @@ -23,6 +22,7 @@
stream_to_open_named_file,
unicodify,
)
from galaxy.util.config_parsers import IpAllowedListEntryT

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,11 +66,6 @@ def stream_to_file(stream, suffix="", prefix="", dir=None, text=False, **kwd):
return stream_to_open_named_file(stream, fd, temp_name, **kwd)


IpAddressT = Union[ipaddress.IPv4Address, ipaddress.IPv6Address]
IpNetworkT = Union[ipaddress.IPv4Network, ipaddress.IPv6Network]
IpAllowedListEntryT = Union[IpAddressT, IpNetworkT]


def validate_uri_access(uri: str, is_admin: bool, ip_allowlist: List[IpAllowedListEntryT]) -> None:
"""Perform uniform checks on supplied URIs.
Expand Down Expand Up @@ -128,7 +123,7 @@ def validate_non_local(uri: str, ip_allowlist: List[IpAllowedListEntryT]) -> str
parsed_url = parsed_url[:idx]

# safe to log out, no credentials/request path, just an IP + port
log.debug("parsed url, port: %s : %s", parsed_url, port)
log.debug("parsed url %s, port: %s", parsed_url, port)
# Call getaddrinfo to resolve hostname into tuples containing IPs.
addrinfo = socket.getaddrinfo(parsed_url, port)
# Get the IP addresses that this entry resolves to (uniquely)
Expand Down
6 changes: 6 additions & 0 deletions lib/galaxy/model/unittest_utils/data_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ def __init__(self, root=None, **kwd):
self.file_path = os.path.join(self.data_dir, "files")
self.server_name = "main"
self.enable_quotas = False
self.user_library_import_symlink_allowlist = []
self.fetch_url_allowlist_ips = []
self.library_import_dir = None
self.user_library_import_dir = None
self.ftp_upload_dir = None
self.ftp_upload_purge = False

def __del__(self):
if self._remove_root:
Expand Down
21 changes: 21 additions & 0 deletions lib/galaxy/util/config_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import ipaddress
from typing import (
List,
Union,
)

from galaxy.util import unicodify

IpAddressT = Union[ipaddress.IPv4Address, ipaddress.IPv6Address]
IpNetworkT = Union[ipaddress.IPv4Network, ipaddress.IPv6Network]
IpAllowedListEntryT = Union[IpAddressT, IpNetworkT]


def parse_allowlist_ips(fetch_url_allowlist: List[str]) -> List[IpAllowedListEntryT]:
return [
ipaddress.ip_network(unicodify(ip.strip())) # If it has a slash, assume 127.0.0.1/24 notation
if "/" in ip
else ipaddress.ip_address(unicodify(ip.strip())) # Otherwise interpret it as an ip address.
for ip in fetch_url_allowlist
if len(ip.strip()) > 0
]
8 changes: 7 additions & 1 deletion lib/galaxy/util/drs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import time
from os import PathLike
from typing import (
List,
Optional,
Tuple,
Union,
Expand All @@ -17,6 +18,7 @@
from galaxy.files.sources.http import HTTPFilesSourceProperties
from galaxy.files.uris import stream_url_to_file
from galaxy.util import DEFAULT_SOCKET_TIMEOUT
from galaxy.util.config_parsers import IpAllowedListEntryT

TargetPathT = Union[str, PathLike]

Expand Down Expand Up @@ -81,6 +83,7 @@ def fetch_drs_to_file(
force_http=False,
retry_options: Optional[RetryOptions] = None,
headers: Optional[dict] = None,
fetch_url_allowlist: Optional[List[IpAllowedListEntryT]] = None,
):
"""Fetch contents of drs:// URI to a target path."""
if not drs_uri.startswith("drs://"):
Expand All @@ -107,7 +110,10 @@ def fetch_drs_to_file(
access_url, access_headers = _get_access_info(get_url, access_method, headers=headers)
opts = FilesSourceOptions()
if access_method["type"] == "https":
extra_props: HTTPFilesSourceProperties = {"http_headers": access_headers or {}}
extra_props: HTTPFilesSourceProperties = {
"http_headers": access_headers or {},
"fetch_url_allowlist": fetch_url_allowlist or [],
}
opts.extra_props = extra_props
else:
opts.extra_props = {}
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy_test/api/test_drs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
ConfiguredFileSourcesConfig,
DictFileSourcesUserContext,
)
from galaxy.util.config_parsers import parse_allowlist_ips
from galaxy.util.drs import (
fetch_drs_to_file,
RetryOptions,
Expand All @@ -32,7 +33,7 @@


def user_context_fixture():
file_sources_config = ConfiguredFileSourcesConfig()
file_sources_config = ConfiguredFileSourcesConfig(fetch_url_allowlist=parse_allowlist_ips(["127.0.0.0/24"]))
file_sources = ConfiguredFileSources(file_sources_config, load_stock_plugins=True)
user_context = DictFileSourcesUserContext(
preferences={
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy_test/driver/driver_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def setup_galaxy_config(
logging=LOGGING_CONFIG_DEFAULT,
monitor_thread_join_timeout=5,
object_store_store_by="uuid",
fetch_url_allowlist="127.0.0.1",
fetch_url_allowlist=["127.0.0.0/24"],
job_handler_monitor_sleep=0.2,
job_runner_monitor_sleep=0.2,
workflow_monitor_sleep=0.2,
Expand Down
2 changes: 1 addition & 1 deletion packages/files/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ python_requires = >=3.7

[options.packages.find]
exclude =
tests*
tests*

0 comments on commit 73ad4f4

Please sign in to comment.