Commit e1563765 authored by Tobias Steiner's avatar Tobias Steiner
Browse files

lower pressure on the remote server and add a server timeout

parent 44eb6a26
......@@ -32,9 +32,9 @@ const writer = {
const spider = new Spider({
// How many requests can be run in parallel
concurrent: 20,
concurrent: 10,
// How long to wait after each request
delay: 50,
delay: 100,
// A stream to where internal logs are sent, optional
logs: logger,
// Re-visit visited URLs, false by default
......@@ -68,6 +68,8 @@ class Crawler extends Transform {
* @type {number}
*/
private timeout = 20;
private serverTimeout = 15 * 1000;
/**
* Logger
*/
......@@ -89,11 +91,20 @@ class Crawler extends Transform {
console.log('loaded');
if(!spider.full()) {
const data = chunk.parsed;
let foundServer = false;
spider.queue(data.url, (doc) => {
console.log("Successfully got data " + data.url);
data.doc = doc;
foundServer = true;
callback(null, data);
}, data.headers);
// if the server is not reacting wait for a timeout and then call the callback
setTimeout(() => {
if(!foundServer) {
callback(null);
}
}, this.serverTimeout)
} else {
setTimeout(() => {
this._transform(chunk, encoding, callback);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment