Я пытаюсь выполнить базовую очистку с использованием LXML и XPATH, но по некоторым причинам ответ меняется при последующих запусках.Почему это происходит?Я провел дни, модифицируя XPATH без решения.Я бы решил проблему Google, но я даже не знаю, как это будет называться.Цените любые указания.
PY SCRIPT
from lxml import html
import requests
from time import sleep
def parser(asin):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'}
url = "http://www.amazon.com/gp/offer-listing/"+asin+"/ref=olp_f_used?ie=UTF8&f_collectible=true&f_usedAcceptable=true&f_usedGood=true&f_used=true&f_usedLikeNew=true&f_usedVeryGood=true"
page = requests.get(url,headers=headers)
while True:
sleep(1)
try:
doc = html.fromstring(page.content)
RAW_OFFER_USED_CONDITION = doc.xpath('//span[@class="a-size-medium olpCondition a-text-bold"]//text()')
OFFER_USED_CONDITION = RAW_OFFER_USED_CONDITION if RAW_OFFER_USED_CONDITION else None
if page.status_code!=200:
raise ValueError('captha')
data = {
'OFFER_USED_CONDITION':OFFER_USED_CONDITION
}
return data
except Exception as e:
print(e)
def readcsv():
AsinList = ['B00001SHNG']
extracted_data = []
for i in AsinList:
asin = i
print("Processing: "+asin)
extracted_data.append(parser(asin))
sleep(1)
print(extracted_data)
if __name__ == "__main__":
readcsv()
ОТВЕТ 1
[{'OFFER_USED_CONDITION': ['\n\n Used\n - Acceptable\n ', '\n\n Used\n - Good\n ', '\n\n Used\n - Good\n ', '\n\n Used\n - Good\n ', '\n\n Collectible\n - Acceptable\n ', '\n\n Used\n - Very Good\n ', '\n\n Used\n - Good\n ', '\n\n Used\n - Very Good\n ', '\n\n Used\n - Like New\n ', '\n\n Used\n - Very Good\n ']}]
ОТВЕТ 2
[{'OFFER_USED_CONDITION': ['\n Used\n \n\n\n\n-\n', '\n Acceptable\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Good\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Good\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Good\n', '\n', '\n Collectible\n \n\n\n\n-\n', '\n Acceptable\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Very Good\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Good\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Very Good\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Like New\n', '\n', '\n Used\n \n\n\n\n-\n', '\n Very Good\n', '\n']}]