Skip to content

Commit

Permalink
Merge pull request #4 from CIRALabs/change-output
Browse files Browse the repository at this point in the history
Allow for CSV output to file
  • Loading branch information
billbelanger authored Jul 9, 2024
2 parents ef60142 + 89a7638 commit 84f5c81
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,5 @@ cython_debug/
#.idea/

input.csv
output.csv
output.json
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ Specifically the global variables at the top of the file `RATELIMIT_REQUESTS` an
These values define how many requests may be made within a specific time range (for example, 5 requests every 20 seconds).
As of right now, there's no way to adjust these dynamically per invocation of the program based on command line parameters - it must be done with hard-coded values.

### Output
The program can output in either of two formats: JSON or CSV
Right now, this is handled by hard-coded values at the top of the file.
The location of the output is also handled by the hard-coded values at the top of the file.

## Localhost
Before you can run the program, you should create a virtual environment for the python executable
```bash
Expand Down
51 changes: 47 additions & 4 deletions db.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
'''Data storage and retrieval interface'''
import csv
from enum import Enum
from io import TextIOWrapper
import json
import sys

SUCCESS_KEY = "succeed_domains"
PRIVACY_KEY = "private_domains"
FAILED_KEY = "failed_domains"
FAILED_KEY = "failed_domains"


class Db:
'''Provides an interface to store/retrieve results'''

class Format(Enum):
'''Output formats'''
JSON = 1
CSV = 2

DB = {}

def record_country(self, domain, country):
Expand Down Expand Up @@ -46,7 +56,40 @@ def get_failed_domain_count(self):
def __str__(self):
return str(self.DB)

# Feature Request: Multiple output locations
def output_results(self):
def output_results(self, output_loc: TextIOWrapper = None, fmt: Format = Format.JSON):
'''Outputs the results stored in the DB'''
print(json.dumps(self.DB, indent=4))
if fmt == Db.Format.JSON:
self._output_results_json(output_loc)
elif fmt == Db.Format.CSV:
self._output_results_csv(output_loc)

def _output_results_json(self, output_loc: TextIOWrapper = None):
'''Outputs the results stored in the DB to a JSON file'''
results = json.dumps(self.DB, indent=4)
if output_loc is None:
print(results)
else:
output_loc.write(results)

def _output_results_csv(self, output_loc: TextIOWrapper = None):
'''Outputs the results stored in the DB to a CSV file'''
fieldnames = ["domain", "country"]
data = []
if SUCCESS_KEY in self.DB:
for country in self.DB[SUCCESS_KEY]:
for domain in self.DB[SUCCESS_KEY][country]:
data.append(
{"country": "N/A" if country is None else country, "domain": domain})
if PRIVACY_KEY in self.DB:
for term in self.DB[PRIVACY_KEY]:
for domain in self.DB[PRIVACY_KEY][term]:
data.append(
{"country": f"Privacy Protected ({term})", "domain": domain})
if FAILED_KEY in self.DB:
for domain in self.DB[FAILED_KEY]:
data.append({"country": "Failed", "domain": domain})
writer = csv.DictWriter(output_loc, fieldnames=fieldnames)
if output_loc is None:
output_loc = sys.stdout
writer.writeheader()
writer.writerows(data)
9 changes: 6 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
from db import Db

# Basic rate limiting: 5 requests per 20 seconds
RATELIMIT_REQUESTS = 5 # Number of requests to rate limit
RATELIMIT_TIMERANGE = 20 # Amount of time to rate limit
RATELIMIT_REQUESTS = 50 # Number of requests to rate limit
RATELIMIT_TIMERANGE = 60 # Amount of time to rate limit
SCHEMA_FILE = "rules.schema.json"
RULES_FILE = "rules.json"
DOMAINS_FILE = "input.csv"
OUTPUT_FORMAT = Db.Format.CSV
OUTPUT_FILE = "output.csv"
ENCODING = "UTF-8"
DB = Db()

Expand Down Expand Up @@ -173,7 +175,8 @@ def main(pagenum: int, pagesize: int) -> int:
log.exception(ex)
return -100 # stop processing immediately

DB.output_results()
with open(OUTPUT_FILE, "w", encoding=ENCODING) as output:
DB.output_results(output, OUTPUT_FORMAT)
return DB.get_failed_domain_count()


Expand Down

0 comments on commit 84f5c81

Please sign in to comment.