Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Items page created and logs page customised #91

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SpiderKeeper
# SpiderKeeper fork

[![Latest Version](http://img.shields.io/pypi/v/SpiderKeeper.svg)](https://pypi.python.org/pypi/SpiderKeeper)
[![Python Versions](http://img.shields.io/pypi/pyversions/SpiderKeeper.svg)](https://pypi.python.org/pypi/SpiderKeeper)
Expand All @@ -12,6 +12,7 @@ A scalable admin ui for spider service
- With a single click deploy the scrapy project
- Show spider running stats
- Provide api
- Show scraped items and download json/csv


Current Support spider service
Expand Down
3 changes: 3 additions & 0 deletions SpiderKeeper/app/proxy/contrib/scrapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ def deploy(self, project_name, file_path):

def log_url(self, project_name, spider_name, job_id):
return self._scrapyd_url() + '/logs/%s/%s/%s.log' % (project_name, spider_name, job_id)

def items_url(self, project_name, spider_name, job_id):
return self._scrapyd_url() + '/items/%s/%s/%s.jl' % (project_name, spider_name, job_id)
8 changes: 8 additions & 0 deletions SpiderKeeper/app/proxy/spiderctrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ def log_url(self, job_execution):
return spider_service_instance.log_url(project.project_name, job_instance.spider_name,
job_execution.service_job_execution_id)

def items_url(self, job_execution):
job_instance = JobInstance.find_job_instance_by_id(job_execution.job_instance_id)
project = Project.find_project_by_id(job_instance.project_id)
for spider_service_instance in self.spider_service_instances:
if spider_service_instance.server == job_execution.running_on:
return spider_service_instance.items_url(project.project_name, job_instance.spider_name,
job_execution.service_job_execution_id)

@property
def servers(self):
return [self.spider_service_instance.server for self.spider_service_instance in
Expand Down
45 changes: 45 additions & 0 deletions SpiderKeeper/app/spider/controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import datetime
import os
import tempfile
import json
import csv

import flask_restful
import requests
Expand All @@ -10,6 +12,7 @@
from flask import redirect
from flask import render_template
from flask import session
from flask import send_from_directory
from flask_restful_swagger import swagger
from werkzeug.utils import secure_filename

Expand Down Expand Up @@ -595,6 +598,48 @@ def job_log(project_id, job_exec_id):
return render_template("job_log.html", log_lines=raw.split('\n'))


@app.route("/project/<project_id>/jobexecs/<job_exec_id>/items")
def job_items(project_id, job_exec_id):
job_execution = JobExecution.query.filter_by(project_id=project_id, id=job_exec_id).first()
res = requests.get(agent.items_url(job_execution))
res.encoding = 'utf8'
json_data = [ json.loads(s) for s in filter(None, res.text.split('\n'))]
return render_template("job_items.html", items=json_data)


@app.route("/project/<project_id>/jobexecs/<job_exec_id>/items/download")
def download_items(project_id, job_exec_id):
format = request.args.get('format')
if not format in ['json', 'csv']:
abort(404)

job_execution = JobExecution.query.filter_by(project_id=project_id, id=job_exec_id).first()

job_instance = JobInstance.find_job_instance_by_id(job_execution.job_instance_id)
project = Project.find_project_by_id(job_instance.project_id)

res = requests.get(agent.items_url(job_execution))
res.encoding = 'utf8'
json_data = [json.loads(s) for s in filter(None, res.text.split('\n'))]

filename = '{}-{}.{}'.format(project.project_name, job_instance.spider_name, format)
if format == 'json':
open(os.path.join(app.static_folder, filename), 'w').write(json.dumps(json_data))
elif format == 'csv':
f = open(os.path.join(app.static_folder, filename), 'w')
csvwriter = csv.writer(f)
count = 0
for item in json_data:
if count == 0:
header = item.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(item.values())
f.close()

return send_from_directory(app.static_folder, filename, as_attachment=True)


@app.route("/project/<project_id>/job/<job_instance_id>/run")
def job_run(project_id, job_instance_id):
job_instance = JobInstance.query.filter_by(project_id=project_id, id=job_instance_id).first()
Expand Down
12 changes: 12 additions & 0 deletions SpiderKeeper/app/static/css/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,16 @@
overflow: hidden;
text-overflow: ellipsis;
width: 100px;
}

#table-count {
counter-reset: rowNumber;
}

#table-count #item {
counter-increment: rowNumber;
}

#table-count #item td:first-child::before {
content: counter(rowNumber);
}
14 changes: 11 additions & 3 deletions SpiderKeeper/app/templates/job_dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ <h3 class="box-title">Running Jobs</h3>
<th style="width: 40px">Runtime</th>
<th style="width: 120px">Started</th>
<th style="width: 40px">Log</th>
<th style="width: 10px">Items</th>
<th style="width: 40px">Running On</th>
<th style="width: 40px">Action</th>
</tr>
Expand Down Expand Up @@ -119,9 +120,12 @@ <h3 class="box-title">Running Jobs</h3>
{% endif %}
<td>{{ timedelta(now,job.start_time) }}</td>
<td>{{ job.start_time }}</td>
<td><a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/log" target="_blank"
<td><a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/log"
data-toggle="tooltip" data-placement="top" title="{{ job.service_job_execution_id }}">Log</a>
</td>
<td><a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/items"
data-toggle="tooltip" data-placement="top" title="{{ job.service_job_execution_id }}">Items</a>
</td>
<td style="font-size: 10px;">{{ job.running_on }}</td>
<td>
<a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/stop"><span
Expand Down Expand Up @@ -154,13 +158,14 @@ <h3 class="box-title">Completed Jobs</h3>
<th style="width: 40px">Runtime</th>
<th style="width: 120px">Started</th>
<th style="width: 10px">Log</th>
<th style="width: 10px">Items</th>
<th style="width: 10px">Status</th>
</tr>
{% for job in job_status.COMPLETED %}
{% if job.job_instance %}
<tr>
<td>{{ job.job_execution_id }}</td>
<td><a href="/project/1/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>{{ job.job_instance_id }}</td>
<td>{{ job.job_instance.spider_name }}</td>
<td class="txt-args" data-toggle="tooltip" data-placement="right"
title="{{ job.job_instance.spider_arguments }}">{{ job.job_instance.spider_arguments }}
Expand All @@ -184,9 +189,12 @@ <h3 class="box-title">Completed Jobs</h3>
{% endif %}
<td>{{ timedelta(job.end_time,job.start_time) }}</td>
<td>{{ job.start_time }}</td>
<td><a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/log" target="_blank"
<td><a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/log"
data-toggle="tooltip" data-placement="top" title="{{ job.service_job_execution_id }}">Log</a>
</td>
<td><a href="/project/{{ project.id }}/jobexecs/{{ job.job_execution_id }}/items"
data-toggle="tooltip" data-placement="top" title="{{ job.service_job_execution_id }}">Items</a>
</td>
{% if job.running_status == 2 %}
<td>
<span class="label label-success">FINISHED</span>
Expand Down
56 changes: 56 additions & 0 deletions SpiderKeeper/app/templates/job_items.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{% extends "base.html" %}
{% block content_header %}
<h1>Job Items</h1>
<ol style="float: right;
margin-top: 0;
margin-bottom: 0;
font-size: 12px;
padding: 7px 5px;
position: absolute;
top: 15px;
right: 10px;">
<button type="button" class="btn btn-success btn-flat" style="margin-top: -10px;" data-toggle="modal"
onclick="location.href='/project/{{ project.id }}/job/dashboard'">< Back
</button>
<button type="button" class="btn btn-success btn-flat" style="margin-top: -10px;" data-toggle="modal"
onclick="location.href='./items/download?format=json'">JSON
</button>
<button type="button" class="btn btn-success btn-flat" style="margin-top: -10px;" data-toggle="modal"
onclick="location.href='./items/download?format=csv'">CSV
</button>
</ol>
{% endblock %}
{% block content_body %}
<!--======items======-->
<div class="box">
<div class="box-header">
<h3 class="box-title">Job Items</h3>
<div class="box-tools pull-right">
<button type="button" class="btn btn-box-tool" data-widget="collapse"><i class="fa fa-minus"></i>
</button>
</div>
</div>
<div class="box-body table-responsive">
<table class="table table-striped" id="table-count">
{% if items|length > 0 %}
<tr>
<th style="width: 10px">#</th>
{% for key, value in items[0].items() %}
<th style="width: 10px">{{ key }}</th>
{% endfor %}
</tr>
{% endif %}

{% for item in items %}
<tr id="item">
<td></td>
{% for key, value in item.items() %}
<td>{{ value }}</td>
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
</div>

{% endblock %}
54 changes: 38 additions & 16 deletions SpiderKeeper/app/templates/job_log.html
Original file line number Diff line number Diff line change
@@ -1,16 +1,38 @@
<html>
<meta charset="utf-8">
<style>
.p-log {
font-size: 12px;
line-height: 1.5em;
color: #1f0909;
text-align: left
}
</style>
<body style="background-color:#F3F2EE;">
{% for line in log_lines %}
<p class="p-log">{{ line }}</p>
{% endfor %}
</body>
</html>
{% extends "base.html" %}
{% block content_header %}
<h1>Job Logs</h1>
<ol style="float: right;
margin-top: 0;
margin-bottom: 0;
font-size: 12px;
padding: 7px 5px;
position: absolute;
top: 15px;
right: 10px;">
<button type="button" class="btn btn-success btn-flat" style="margin-top: -10px;" data-toggle="modal"
onclick="location.href='/project/{{ project.id }}/job/dashboard'">< Back
</button>
</ol>
{% endblock %}
{% block content_body %}
<!--======logs======-->
<div class="box">
<div class="box-header">
<h3 class="box-title">Job Logs</h3>
<div class="box-tools pull-right">
<button type="button" class="btn btn-box-tool" data-widget="collapse"><i class="fa fa-minus"></i>
</button>
</div>
</div>
<div class="box-body table-responsive">
<table class="table table-striped">
{% for line in log_lines %}
<tr>
<td><p class="p-log">{{ line }}</p></td>
</tr>
{% endfor %}
</table>
</div>
</div>

{% endblock %}
Empty file modified screenshot/screenshot_3.png
100644 → 100755
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.