diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 79876c4c..f1ed0767 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -105,18 +105,20 @@ def get_domain_tags(self, update=False): return self.tags def update_domain_tags(self, item): - if self.r_serv_metadata.exists('tag:{}'.format(item)): - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item)) - # update path here - else: - # need to remove it - if self.paste_directory in item: - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item.replace(self.paste_directory+'/', ''))) - # need to remove it + if item: + + if self.r_serv_metadata.exists('tag:{}'.format(item)): + p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item)) + # update path here else: - p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item))) - for tag in p_tags: - self.tags[tag] = self.tags.get(tag, 0) + 1 + # need to remove it + if self.paste_directory in item: + p_tags = self.r_serv_metadata.smembers('tag:{}'.format(item.replace(self.paste_directory+'/', ''))) + # need to remove it + else: + p_tags = self.r_serv_metadata.smembers('tag:{}'.format(os.path.join(self.paste_directory, item))) + for tag in p_tags: + self.tags[tag] = self.tags.get(tag, 0) + 1 def get_first_crawled(self): res = self.r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(self.type, self.domain, self.port), 0, 0, withscores=True) @@ -150,7 +152,9 @@ def get_domain_crawled_core_item(self, epoch=None): #todo use the right paste def get_last_crawled_pastes(self, item_root=None): if item_root is None: - item_root = self.get_domain_crawled_core_item(self) + item_root = self.get_domain_crawled_core_item() + if item_root: + item_root = item_root['root_item'] return self.get_all_pastes_domain(item_root) def get_all_pastes_domain(self, root_item): diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index adbbe87c..fd68dc93 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -82,6 +82,13 @@ def is_valid_domain(domain): else: return False +def is_valid_service_type(service_type): + accepted_service = ['onion', 'regular'] + if service_type in accepted_service: + return True + else: + return False + def get_onion_status(domain, date): if r_serv_onion.sismember('onion_up:'+date , domain): return True @@ -114,13 +121,17 @@ def get_domain_from_url(url): def get_last_domains_crawled(type): return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) +def get_nb_domains_inqueue(type): + nb = r_serv_onion.scard('{}_crawler_queue'.format(type)) + nb += r_serv_onion.scard('{}_crawler_priority_queue'.format(type)) + return nb + def get_stats_last_crawled_domains(type, date): statDomains = {} statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date)) statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date)) statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] - statDomains['domains_queue'] = r_serv_onion.scard('{}_crawler_queue'.format(type)) - statDomains['domains_queue'] += r_serv_onion.scard('{}_crawler_priority_queue'.format(type)) + statDomains['domains_queue'] = get_nb_domains_inqueue(type) return statDomains def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False): @@ -561,20 +572,28 @@ def last_crawled_domains_with_stats_json(): def get_onions_by_daterange(): date_from = request.form.get('date_from') date_to = request.form.get('date_to') + service_type = request.form.get('service_type') domains_up = request.form.get('domains_up') domains_down = request.form.get('domains_down') domains_tags = request.form.get('domains_tags') - return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags)) + return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, service_type=service_type, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags)) @hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET']) def show_domains_by_daterange(): date_from = request.args.get('date_from') date_to = request.args.get('date_to') + service_type = request.args.get('service_type') domains_up = request.args.get('domains_up') domains_down = request.args.get('domains_down') domains_tags = request.args.get('domains_tags') + # incorrect service type + if not is_valid_service_type(service_type): + service_type = 'onion' + + type_name = dic_type_name[service_type] + date_range = [] if date_from is not None and date_to is not None: #change format @@ -595,12 +614,22 @@ def show_domains_by_daterange(): date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8] date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8] + statDomains = {} + statDomains['domains_up'] = 0 + statDomains['domains_down'] = 0 + statDomains['total'] = 0 + statDomains['domains_queue'] = get_nb_domains_inqueue(service_type) + domains_by_day = {} domain_metadata = {} + stats_by_date = {} for date in date_range: + stats_by_date[date] = {} + stats_by_date[date]['domain_up'] = 0 + stats_by_date[date]['domain_down'] = 0 if domains_up: domains_up = True - domains_by_day[date] = list(r_serv_onion.smembers('onion_up:{}'.format(date))) + domains_by_day[date] = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date))) for domain in domains_by_day[date]: h = HiddenServices(domain, 'onion') domain_metadata[domain] = {} @@ -608,19 +637,21 @@ def show_domains_by_daterange(): domains_tags = True domain_metadata[domain]['tags'] = h.get_domain_tags(update=True) - domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') + domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check') if domain_metadata[domain]['last_check'] is None: domain_metadata[domain]['last_check'] = '********' - domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') + domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen') if domain_metadata[domain]['first_seen'] is None: domain_metadata[domain]['first_seen'] = '********' domain_metadata[domain]['status_text'] = 'UP' domain_metadata[domain]['status_color'] = 'Green' domain_metadata[domain]['status_icon'] = 'fa-check-circle' + statDomains['domains_up'] += 1 + stats_by_date[date]['domain_up'] += 1 if domains_down: domains_down = True - domains_by_day_down = list(r_serv_onion.smembers('onion_down:{}'.format(date))) + domains_by_day_down = list(r_serv_onion.smembers('{}_down:{}'.format(service_type, date))) if domains_up: domains_by_day[date].extend(domains_by_day_down) else: @@ -630,20 +661,27 @@ def show_domains_by_daterange(): domain_metadata[domain] = {} #domain_metadata[domain]['tags'] = h.get_domain_tags() - domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') + domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check') if domain_metadata[domain]['last_check'] is None: domain_metadata[domain]['last_check'] = '********' - domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') + domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen') if domain_metadata[domain]['first_seen'] is None: domain_metadata[domain]['first_seen'] = '********' domain_metadata[domain]['status_text'] = 'DOWN' domain_metadata[domain]['status_color'] = 'Red' domain_metadata[domain]['status_icon'] = 'fa-times-circle' + statDomains['domains_down'] += 1 + stats_by_date[date]['domain_down'] += 1 + + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] - return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, domain_metadata=domain_metadata, + return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, + statDomains=statDomains, type_name=type_name, + domain_metadata=domain_metadata, + stats_by_date=stats_by_date, date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, - domains_tags=domains_tags, bootstrap_label=bootstrap_label) + domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label) @hiddenServices.route("/crawlers/show_domain", methods=['GET']) def show_domain(): diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html index 6c3bab49..a42e3880 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html +++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html @@ -85,62 +85,7 @@
Some quick example text to build on the card title and make up the bulk of the card's content.
-Domain | -First Seen | -Last Check | -Status | -
---|---|---|---|
- {{ domain }}
-
- {% for tag in domain_metadata[domain]['tags'] %}
-
- {{ tag }} {{ domain_metadata[domain]['tags'][tag] }}
-
- {% endfor %}
-
- |
- {{'{}/{}/{}'.format(domain_metadata[domain]['first_seen'][0:4], domain_metadata[domain]['first_seen'][4:6], domain_metadata[domain]['first_seen'][6:8])}} | -{{'{}/{}/{}'.format(domain_metadata[domain]['last_check'][0:4], domain_metadata[domain]['last_check'][4:6], domain_metadata[domain]['last_check'][6:8])}} | -
-
- {{domain_metadata[domain]['status_text']}}
-
- |
-
Some quick example text to build on the card title and make up the bulk of the card's content.
+