Skip to content

Commit

Permalink
security fix: use requests for downloading data packages (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
jakub-lat authored Dec 11, 2023
1 parent c8226e3 commit 8d2ac7c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 12 deletions.
20 changes: 9 additions & 11 deletions src/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from celery import Celery, current_task

import subprocess
import traceback

import pandas as pd
Expand All @@ -30,6 +29,7 @@

import os
from collections import defaultdict
import requests

from db import update_progress, update_step, fetch_package_status, SavedPackageData, Session, PackageProcessStatus
from util import (
Expand Down Expand Up @@ -74,19 +74,17 @@ def download_file(package_status_id, package_id, link, session):

if not check_whitelisted_link(link):
print('checking content type')
command = f"curl -L -I {link}"
process = subprocess.run(command, shell=True, capture_output=True, text=True)

if "application/octet-stream" not in process.stdout or "HTTP/2 400" in process.stdout:
print('The link does not point to a zip file.')
raise Exception('EXPIRED_LINK')
r = requests.head(link, allow_redirects=True)
if r.status_code != 200 or 'content-type' not in r.headers or 'application/octet-stream' not in r.headers['content-type']:
print('The link does not point to a valid file.')
raise Exception('INVALID_LINK')

print('downloading')
update_step(package_status_id, package_id, 'DOWNLOADING', session)
command = f"curl -L -o {path} {link}"

process = subprocess.Popen(command, shell=True)
process.wait()
r = requests.get(link, allow_redirects=True, stream=True)
with open(path, 'wb') as f:
for chunk in r.iter_content(chunk_size=10*1024):
f.write(chunk)

return path

Expand Down
2 changes: 1 addition & 1 deletion src/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import jwt
import requests

discord_link_regex = r'https:\/\/click\.discord\.com\/ls\/click\?upn=([A-Za-z0-9-_]{500,})'
discord_link_regex = r'^https:\/\/click\.discord\.com\/ls\/click\?upn=([A-Za-z0-9-_]{500,})$'
dl_whitelisted_domains_raw = os.getenv('DL_ZIP_WHITELISTED_DOMAINS')
dl_whitelisted_domains = dl_whitelisted_domains_raw and dl_whitelisted_domains_raw.split(',') or []

Expand Down

0 comments on commit 8d2ac7c

Please sign in to comment.