Skip to content

Commit

Permalink
Update doc for css validation (#123)
Browse files Browse the repository at this point in the history
* update doc for css validation

* pep8 stype coding

* minor change

* pep8 coding update #2

* pep8 codin update #3

* pep8 codin update #4

* pep8 coding update #5
  • Loading branch information
viskey98 authored and afeena committed May 3, 2018
1 parent 031a7a9 commit b029407
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 26 deletions.
20 changes: 11 additions & 9 deletions clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@
class Cloner(object):
def __init__(self, root, max_depth, css_validate):
self.visited_urls = []
self.root, self.error_page = self.add_scheme(root)
self.root, self.error_page = self.add_scheme(root)
self.max_depth = max_depth
self.moved_root = None
if len(self.root.host) < 4:
sys.exit('invalid taget {}'.format(self.root.host))
self.target_path = '/opt/snare/pages/{}'.format(self.root.host)

if not os.path.exists(self.target_path):
os.mkdir(self.target_path)
os.mkdir(self.target_path)
self.css_validate = css_validate
self.new_urls = Queue()
self.meta = {}
Expand Down Expand Up @@ -152,7 +152,7 @@ async def get_body(self, session):
response = await session.get(current_url, headers={'Accept': 'text/html'})
content_type = response.content_type
data = await response.read()

except (aiohttp.ClientError, asyncio.TimeoutError) as client_error:
self.logger.error(client_error)
else:
Expand All @@ -165,8 +165,8 @@ async def get_body(self, session):
data = str(soup).encode()
with open(os.path.join(self.target_path, hash_name), 'wb') as index_fh:
index_fh.write(data)
if content_type == 'text/css':
css = cssutils.parseString(data, validate=self.css_validate)
if content_type == 'text/css':
css = cssutils.parseString(data, validate=self.css_validate)
for carved_url in cssutils.getUrls(css):
if carved_url.startswith('data'):
continue
Expand All @@ -191,15 +191,16 @@ async def run(self):
session = aiohttp.ClientSession()
try:
await self.new_urls.put((self.root, 0))
await self.new_urls.put((self.error_page,0))
await self.new_urls.put((self.error_page, 0))
await self.get_body(session)
except KeyboardInterrupt:
raise
finally:
with open(os.path.join(self.target_path, 'meta.json'), 'w') as mj:
json.dump(self.meta, mj)
await session.close()



def str_to_bool(v):
if v.lower() == 'true':
return True
Expand All @@ -208,6 +209,7 @@ def str_to_bool(v):
else:
raise argparse.ArgumentTypeError('Boolean value expected')


def main():
if os.getuid() != 0:
print('Clone has to be run as root!')
Expand All @@ -226,7 +228,7 @@ def main():
if args.log_path:
log_err = args.log_path + "clone.err"
else:
log_err = "/opt/snare/clone.err"
log_err = "/opt/snare/clone.err"
logger.Logger.create_clone_logger(log_err, __package__)
print("Error logs will be stored in {}\n".format(log_err))
try:
Expand All @@ -244,6 +246,6 @@ def main():
/ / / / / / / // |/ / __/ / /_/ /
/ /___ / /____ / /_/ // /| / /___/ _, _/
/_____//______//_____//_/ |_/_____/_/ |_|
""")
main()
1 change: 1 addition & 0 deletions docs/source/cloner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Description

* **target** -- url of website to be cloned
* **max--depth** -- maximum depth of the web-pages desired to be cloned (optional), default: full depth of the site
* **css--validate** -- set whether css validation is required (optional), default: None
20 changes: 11 additions & 9 deletions logger.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,51 @@
import logging
import logging.handlers


class LevelFilter(logging.Filter):
"""Filters (lets through) all messages with level < LEVEL"""

def __init__(self, level):
self.level = level

def filter(self, record):
return record.levelno < self.level # "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive
return record.levelno < self.level
# "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive


class Logger:

@staticmethod
def create_logger(debug_filename, err_filename, logger_name):
logger = logging.getLogger(logger_name)
logger.setLevel(logging.DEBUG)
logger.propagate = False
formatter = logging.Formatter(
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

# ERROR log to 'snare.err'
error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8')
error_log_handler.setLevel(logging.ERROR)
error_log_handler.setFormatter(formatter)
logger.addHandler(error_log_handler)

# DEBUG log to 'snare.log'
debug_log_handler = logging.handlers.RotatingFileHandler(debug_filename, encoding='utf-8')
debug_log_handler.setLevel(logging.DEBUG)
debug_log_handler.setFormatter(formatter)
max_level_filter = LevelFilter(logging.ERROR)
debug_log_handler.addFilter(max_level_filter)
logger.addHandler(debug_log_handler)

return logger

@staticmethod
def create_clone_logger(err_filename, logger_name):
logger = logging.getLogger(logger_name)
formatter = logging.Formatter(
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# ERROR log to 'clone.err'
error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8')
error_log_handler.setLevel(logging.ERROR)
error_log_handler.setFormatter(formatter)
logger.addHandler(error_log_handler)

18 changes: 10 additions & 8 deletions snare.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, meta, run_args, debug=False, keep_alive=75, **kwargs):
self.dir = '/opt/snare/pages/{}'.format(run_args.page_dir)

self.meta = meta

self.logger = logging.getLogger(__name__)

self.sroute = StaticRoute(
Expand Down Expand Up @@ -223,9 +223,11 @@ async def parse_tanner_response(self, requested_name, detection):
content = None
status_code = 200
headers = {}
p = re.compile('/+') # Creating a regex object for the pattern of multiple contiguous forward slashes
requested_name = p.sub('/', requested_name) # Substituting all occurrences of the pattern with single forward slash

# Creating a regex object for the pattern of multiple contiguous forward slashes
p = re.compile('/+')
# Substituting all occurrences of the pattern with single forward slash
requested_name = p.sub('/', requested_name)

if detection['type'] == 1:
query_start = requested_name.find('?')
if query_start != -1:
Expand All @@ -235,7 +237,7 @@ async def parse_tanner_response(self, requested_name, detection):
requested_name = self.run_args.index_page
try:
if requested_name[-1] == '/':
requested_name = requested_name[:-1]
requested_name = requested_name[:-1]
requested_name = unquote(requested_name)
file_name = self.meta[requested_name]['hash']
content_type = self.meta[requested_name]['content_type']
Expand All @@ -248,7 +250,7 @@ async def parse_tanner_response(self, requested_name, detection):
with open(path, 'rb') as fh:
content = fh.read()
content = await self.handle_html_content(content)

else:
path = os.path.join(self.dir, file_name)
if os.path.isfile(path):
Expand Down Expand Up @@ -464,9 +466,9 @@ async def check_tanner():
base_page_path = '/opt/snare/pages/'
config = configparser.ConfigParser()
config.read(os.path.join(base_path, args.config))

log_debug = args.log_dir + "snare.log"
log_err = args.log_dir + "snare.err"
log_err = args.log_dir + "snare.err"
logger.Logger.create_logger(log_debug, log_err, __package__)

if args.list_pages:
Expand Down

0 comments on commit b029407

Please sign in to comment.