Новое в Scrapy Framework. У меня есть ошибка ниже, когда я запускаю свой код Scrapy. Ниже пример моего паука. Отлично, если кто-нибудь может указать, что я делаю что-то не так.
Пример кода паука
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from mycrawler.items import MycrawlerItem
class PageavailabilitySpider(CrawlSpider):
handle_httpstatus_list = [400, 403, 404, 500, 502, 503, 504]
name = 'pageavailability'
# Replace the value with the real domain.
allowed_domains = ['example.com']
# Replace the value with the website URL to crawl from.
start_urls = ['http://www.example.com/']
custom_settings = {
'LOG_FILE': 'logs/pageavailability.log',
'LOG_LEVEL': 'INFO'
}
rules = (
Rule(
LinkExtractor(
allow=('/index/'),
tags='a',
attrs='href',
unique=True
),
callback='parse_item',
follow=True
),
)
def parse_item(self, response):
item = MycrawlerItem()
item['title'] = response.css('title::text').extract_first()
item['url'] = response.url
item['status'] = response.status
return item
Ошибки
2020-04-13 13:52:35 [scrapy.utils.log] INFO: Scrapy 2.0.1 started (bot: scrapybot)
2020-04-13 13:52:35 [scrapy.utils.log] INFO: Versions: lxml 4.5.0.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 20.3.0, Python 3.8.2 (default, Mar 31 2020, 15:23:55) - [GCC 8.3.0], pyOpenSSL 19.1.0 (OpenSSL 1.1.1f 31 Mar 2020), cryptography 2.9, Platform Linux-4.19.76-linuxkit-x86_64-with-glibc2.2.5
2020-04-13 13:52:35 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.epollreactor.EPollReactor
2020-04-13 13:52:35 [scrapy.crawler] INFO: Overridden settings:
{'LOG_FILE': 'logs/pageavailability.log', 'LOG_LEVEL': 'INFO'}
Traceback (most recent call last):
File "./flobot/gospider.py", line 8, in <module>
process.crawl(PageavailabilitySpider)
File "/usr/local/lib/python3.8/site-packages/scrapy/crawler.py", line 176, in crawl
crawler = self.create_crawler(crawler_or_spidercls)
File "/usr/local/lib/python3.8/site-packages/scrapy/crawler.py", line 209, in create_crawler
return self._create_crawler(crawler_or_spidercls)
File "/usr/local/lib/python3.8/site-packages/scrapy/crawler.py", line 214, in _create_crawler
return Crawler(spidercls, self.settings)
File "/usr/local/lib/python3.8/site-packages/scrapy/crawler.py", line 56, in __init__
install_scrapy_root_handler(self.settings)
File "/usr/local/lib/python3.8/site-packages/scrapy/utils/log.py", line 111, in install_scrapy_root_handler
_scrapy_root_handler = _get_handler(settings)
File "/usr/local/lib/python3.8/site-packages/scrapy/utils/log.py", line 127, in _get_handler
handler = logging.FileHandler(filename, encoding=encoding)
File "/usr/local/lib/python3.8/logging/__init__.py", line 1143, in __init__
StreamHandler.__init__(self, self._open())
File "/usr/local/lib/python3.8/logging/__init__.py", line 1172, in _open
return open(self.baseFilename, self.mode, encoding=self.encoding)
FileNotFoundError: [Errno 2] No such file or directory: '/usr/src/app/logs/pageavailability.log'