forked from curious-attempt-bunny/node-proxies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
107 lines (102 loc) · 2.7 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
var jsdom = require('jsdom');
var async = require('async');
var http = require('http');
var url = require('url');
var scrapeProxies = function (next) {
var addProxies = function(window, next) {
var text=window.$('td:contains(":")').text();
var reg = /([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+)/g;
var proxy;
while(proxy=reg.exec(text)) {
proxies.push('http://'+proxy[0]);
}
next();
};
jsdom.env(
'http://www.proxy-list.org/en/index.php',
["http://code.jquery.com/jquery.js"],
function (errors, window) {
addProxies(window, function() {
var pages = window.$('a[href*="index.php?sp"]').toArray().map(function(a) { return a.href; });
async.each(pages, function(page, next) {
jsdom.env(
page,
["http://code.jquery.com/jquery.js"],
function (errors, window) {
addProxies(window, next);
});
}, function() {
next();
});
});
});
};
var filterProxies = function(next) {
var remaining = proxies.length;
proxies.forEach(function(proxy) {
var parsedUrl = url.parse(proxy);
var weight = 1;
var request = http.request({
port: parseInt(parsedUrl.port),
host: parsedUrl.hostname,
path: 'http://www.google.com/',
headers: {
Host: "www.google.com"
}
}, function(res) {
res.on('data', function() {
validatedProxies.push(proxy);
remaining -= weight;
weight = 0;
if (next && validatedProxies.length >= 30) {
next();
next = null;
}
request.abort();
});
});
request.setTimeout(8000, function() {
remaining -= weight;
weight = 0;
if (next && remaining == 0) {
next();
next = null;
}
request.abort();
});
request.on('error', function(error) {
remaining -= weight;
weight = 0;
if (next && remaining == 0) {
next();
next = null;
}
request.abort();
});
request.end();
});
};
var proxies = null;
var validatedProxies = null;
var waitingForProxies = false;
module.exports = function(next) {
if (waitingForProxies || validatedProxies == null || validatedProxies.length == 0) {
if (waitingForProxies || (validatedProxies != null && validatedProxies.length == 0)) {
setTimeout(function() {
module.exports(next);
}, 10);
} else {
waitingForProxies = true;
proxies = [];
validatedProxies = [];
scrapeProxies(function() {
filterProxies(function() {
waitingForProxies = false;
module.exports(next);
});
});
}
} else {
next(validatedProxies);
}
};