На второй странице должен быть указан повар ie, возвращаемый первой страницей, чтобы правильно получить данные. Вот пример.
from simplified_scrapy.spider import Spider, SimplifiedDoc
from simplified_html.request_render import RequestRender
class ToscrapeSpider(Spider):
name = 'agmgranite.com'
allowed_domains = ['www.agmgranite.com']
start_urls = ['https://www.agmgranite.com/paginate.php?page=1&lid=3&f=reset&invp=']
refresh_urls = True # For debug. If efresh_urls = True, start_urls will be crawled again.
def extract(self, url, html, models, modelNames):
doc = SimplifiedDoc(html)
print (doc.html)
return True
req = RequestRender({ 'executablePath': '/Applications/chrome.app/Contents/MacOS/Google Chrome'})
# If you need to log in to get the data you want, please rewrite this method
def login(self):
self.req.getCookies('https://www.agmgranite.com/inventory/hill-country-spicewood/?f=reset', self._callback,selectorOrFunctionOrTimeout='//div[@class="inv_of"]') # To get csrf_token
return True
def _callback(self,cookie,url,data):
if cookie:
self.setCookie(url,cookie)
else:
self.logged_in = False # If the login is not successful, try again next time
from simplified_scrapy.simplified_main import SimplifiedMain
SimplifiedMain.startThread(ToscrapeSpider())
Вот еще примеры упрощенной терапии здесь