Я не учился Scrapy, но я бы использовал другую библиотеку. Как насчет попробовать следующее решение? Сначала вам нужно установить эту библиотеку, pip install -U simpified_scrapy
from simplified_scrapy import Spider, SimplifiedDoc, SimplifiedMain
class MindopSpider(Spider):
name = 'MD'
allowed_domains = ['zoznamspravcov.sk/']
start_urls = ['https://www.zoznamspravcov.sk/cake_administrator/publishedAdministrators/view/1']
# refresh_urls = True # For debug. If efresh_urls = True, start_urls will be crawled again.
def extract(self, url, html, models, modelNames):
doc = SimplifiedDoc(html)
doc['html']=doc.replaceReg(doc.html,'</th>\s*<td','</td><td') # Correct HTML tags
blocks = doc.selects('div.administrators view>div|table')
datas = []
for block in blocks:
obj = {'rows':[]}
obj['head']=block.h2.text
rows = block.tbody.trs
for row in rows:
obj['rows'].append([c.text for c in row.tds])
datas.append(obj)
print( datas)
return {"Urls": None, "Data": datas} # Return the data to the framework, and the framework will automatically save it.
SimplifiedMain.startThread(MindopSpider()) # Start