-
Notifications
You must be signed in to change notification settings - Fork 0
/
collector
executable file
·59 lines (45 loc) · 2.09 KB
/
collector
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env node
'use strict'
var config = require('./config');
var request = require('./request');
var parse = require('./parse');
console.log("\nGETTING STARTED\n");
config.cities.forEach(function(city) {
config.craigslist.keys.forEach(function(key) {
var parser = new parse();
// Make initial request to get first set of posts and pagination details
new request({
host: city.subdomain + '.' + config.craigslist.domain,
path: config.craigslist.path + key
}).make(function parseResponse(rawResponse) {
parser.parse(rawResponse);
});
// Posts successfully parsed
parser.events.on('posts-parsed', function() {
// save it to the database and then clean it from posts array on parser object, or wait until every request is finished?
console.log(parser.posts.length + ' posts parsed');
});
// Pagination successfully parsed... move on to the next page
parser.events.on('pagination-parsed', function() {
var secondsBetweenRequests = Math.ceil(Number(60 / config.throttling.requestsPerMinute));
console.log('Range Start: ' + parser.currentRangeStart);
console.log('Range End: ' + parser.currentRangeEnd);
console.log('Total: ' + parser.totalCount);
console.log('Previous URI: ' + parser.previousPageUri);
console.log('Next URI: ' + parser.nextPageUri);
if (parser.currentRangeEnd < parser.totalCount) {
console.log("\nWaiting " + secondsBetweenRequests + " seconds...\n");
setTimeout(function() {
new request({
host: city.subdomain + '.' + config.craigslist.domain,
path: parser.nextPageUri
}).make(function parseResponse(rawResponse) {
parser.parse(rawResponse);
});
}, secondsBetweenRequests * 1000);
} else {
console.log("\nDONE\n");
}
});
});
});