Я написал скрап-бота для очистки данных, и теперь я хотел сохранить его в MySQL. Для этого я написал сценарий конвейера. Но он не работает, не знаю, с какой проблемой он сталкивается. Может ли кто-нибудь подсказать, в чем именно проблема. Код работает отлично, я думаю, он не может подключиться только к mySQL.
Код бота Scrapy:
from..items import NdtvItem
class NdtvReviewSpider(scrapy.Spider):
name = 'ndtv_review'
page_number = 2
start_urls = ['https://www.ndtv.com/movie-reviews/page-1'
]
def parse(self, response):
items = {}
i = 1
count = response.xpath('//*[@id="ins_storylist"]/ul/li[{}]/div[2]/div[1]/a/i/text()'.format("*")).getall()
count = len(count)
while i<=count:
outputs = NdtvItem()
outputs['page_title'] = response.xpath('//*[@id="ins_storylist"]/ul/li[{}]/div[2]/div[1]/a/i/text()'.format(i)).get()
outputs['review_content'] = response.xpath("//*[@id='ins_storylist']/ul/li[{}]/div[2]/div[3]/text()".format(i)).get()
outputs['review_link'] = response.xpath("//*[@id='ins_storylist']/ul/li[{}]/div[2]/div[1]/a/@href".format(i)).get()
i+=1
fl = 0
if outputs['page_title'] == []:
outputs['page_title'] = ''
fl+=1
if outputs['review_content'] == []:
outputs['review_content'] = ''
fl+=1
if outputs['review_link'] == []:
outputs['review_link'] = ''
fl += 1
else:
yield outputs
#if outputs['review_content'] != []:
# if 'Review:' in outputs['review_content'].split(" ") or 'review:' in outputs['review_content'].split(" ") :
# yield outputs
next_page = 'https://www.ndtv.com/movie-reviews/page-' + str(NdtvReviewSpider.page_number) +''
if NdtvReviewSpider.page_number <= 15:
NdtvReviewSpider.page_number += 1
yield response.follow(next_page, callback = self.parse)
pass
Скрипт конвейера
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import mysql.connector
class ReviewBotPipeline(object):
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = mysql.connector.connect(
host = 'localhost',
user = 'root',
passwd = 'xxxxxx',
database = 'review'
)
self.curr = self.conn.cursor()
def create_table(self):
self.curr.execute(""" DROP TABLE IF EXISTS review_tb """)
self.curr.execute(""" create table review_tb(
page_title text,
review_content text,
review_link text
) """)
def process_item(self, outputs, spider):
self.store_db(outputs)
return outputs
def store_db(self, outputs):
self.curr.execute(""" insert into review_tb values(%s,%s,%s)""",(
outputs['page_title'][0],
outputs['review_content'][0],
outputs['review_link'][0]
))
self.conn.commit()
И это ошибка, которую я получаю.
File "/Users/divyanshu/env/lib/python3.7/site-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/Users/divyanshu/env/lib/python3.7/site-packages/scrapy/utils/defer.py", line 154, in f
return deferred_from_coro(coro_f(*coro_args, **coro_kwargs))
File "/Users/divyanshu/review_bot/review_bot/pipelines.py", line 33, in process_item
self.store_db(outputs)
File "/Users/divyanshu/review_bot/review_bot/pipelines.py", line 40, in store_db
outputs['review_link']
File "/Users/divyanshu/env/lib/python3.7/site-packages/mysql/connector/cursor_cext.py", line 248, in execute
prepared = self._cnx.prepare_for_mysql(params)
File "/Users/divyanshu/env/lib/python3.7/site-packages/mysql/connector/connection_cext.py", line 626, in prepare_for_mysql
result = self._cmysql.convert_to_mysql(*params)
_mysql_connector.MySQLInterfaceError: Python type list cannot be converted