diff --git a/docs/apis/self.crawl.md b/docs/apis/self.crawl.md index d8da32f01..9891e2ef6 100644 --- a/docs/apis/self.crawl.md +++ b/docs/apis/self.crawl.md @@ -113,7 +113,7 @@ class Handler(BaseHandler): > `Handler.crawl_config` can be used with `proxy` to set a proxy for whole project. * `etag` - use HTTP Etag mechanism to pass the process if the content of the page is not changed. _default: True_ -* `last_modifed` - use HTTP Last-Modified header mechanism to pass the process if the content of the page is not changed. _default: True_ +* `last_modified` - use HTTP Last-Modified header mechanism to pass the process if the content of the page is not changed. _default: True_ * `fetch_type` - set to `js` to enable JavaScript fetcher. _default: None_ * `js_script` - JavaScript run before or after page loaded, should been wrapped by a function like `function() { document.write("binux"); }`. diff --git a/pyspider/fetcher/tornado_fetcher.py b/pyspider/fetcher/tornado_fetcher.py index 4b60cceae..203cb9654 100644 --- a/pyspider/fetcher/tornado_fetcher.py +++ b/pyspider/fetcher/tornado_fetcher.py @@ -253,10 +253,11 @@ def pack_tornado_request_parameters(self, url, task): if _t and 'If-None-Match' not in fetch['headers']: fetch['headers']['If-None-Match'] = _t # last modifed - if task_fetch.get('last_modified', True): + if task_fetch.get('last_modified', task_fetch.get('last_modifed', True)): + last_modified = task_fetch.get('last_modified', task_fetch.get('last_modifed', True)) _t = None - if isinstance(task_fetch.get('last_modifed'), six.string_types): - _t = task_fetch.get('last_modifed') + if isinstance(last_modified, six.string_types): + _t = last_modified elif track_ok: _t = track_headers.get('last-modified') if _t and 'If-Modified-Since' not in fetch['headers']: diff --git a/pyspider/libs/base_handler.py b/pyspider/libs/base_handler.py index ab39049ce..ae33ea2b0 100644 --- a/pyspider/libs/base_handler.py +++ b/pyspider/libs/base_handler.py @@ -273,6 +273,7 @@ def _crawl(self, url, **kwargs): 'proxy', 'etag', 'last_modifed', + 'last_modified', 'save', 'js_run_at', 'js_script', @@ -332,7 +333,7 @@ def crawl(self, url, **kwargs): cookies proxy etag - last_modifed + last_modified auto_recrawl fetch_type diff --git a/tests/test_fetcher_processor.py b/tests/test_fetcher_processor.py index ca73096fa..8b9cab612 100644 --- a/tests/test_fetcher_processor.py +++ b/tests/test_fetcher_processor.py @@ -276,8 +276,8 @@ def test_a160_etag(self): self.assertFalse(newtasks) self.assertFalse(result) - def test_a170_last_modifed(self): - status, newtasks, result = self.crawl(self.httpbin+'/cache', last_modifed='0', callback=self.json) + def test_a170_last_modified(self): + status, newtasks, result = self.crawl(self.httpbin+'/cache', last_modified='0', callback=self.json) self.assertStatusOk(status) self.assertFalse(newtasks)