Question

Так что я работал над этим некоторое время, и я не могу найти ответ на него или выяснить это.Поэтому я извлекаю данные из Steam, и мне нужно выяснить, как получить платформы, например, Mac, и превратить их в число (номер строки).Например, если игра поддерживает Mac, она будет отображаться в моем списке как «1», но если это не так, она будет отображаться как «0».У меня проблема с кодом, работающим только один раз и делающим все в «1».

from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from datetime import timedelta
import datetime
import time
import csv
my_url = 'https://store.steampowered.com/search/?specials=1&page=1'

#opening up connectin, grabbing the page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()

#html parsing
page_soup = soup(page_html, "html.parser")

#grab products
containers = page_soup.findAll("div", {"class":"responsive_search_name_combined"})

filename = "products.csv"
f = open(filename, "w", encoding='UTF-8')
headers = "Titles, Release_date, Discount, Price before, Price after, Positive review, Reviewers, Win, Lin, Osx, Time \n"
f.write(headers)
#f.write(headers)
#len(containers)
#containers[1]
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
print(st)

for container in containers:
    titles_container = container.findAll("span",{"class":"title"})
    titl = titles_container[0].text
    print(titl)

    product_container = container.findAll("div",{"class":"search_released"})
    product_date = product_container[0].text
    print(product_date)

    product_discount_container = container.findAll("div",{"class":"search_discount"})
    product_discount = product_discount_container[0].text
    print(product_discount)

    product_price_container_before = container.findAll("div",{"class":"search_price"})
    product_price_before = product_price_container_before[0].text
    test = re.findall('(\d+\W)',product_price_before)
    testing = test[0] + test[1]
    print(testing)

    product_price_container_after = container.findAll("div",{"class":"discounted"})
    for product_price_after in product_price_container_after:
        product_price_after.find("span").extract()
        print(product_price_after.text)

    product_review_container = container.findAll("span",{"class":"search_review_summary"})
    for product_review in product_review_container:
        prr = product_review.get('data-tooltip-html')
        a = re.findall('(\d+%)|(\d+\d+)',prr)
        c = a[1][1]
        print(c)


    product_platform_container = container.findAll("span",{"class":"platform_img"})
    for product_platform in product_platform_container:
        platform = product_platform.get('class')[1]
        platt = re.findall('(\Aw)',platform)
        plattt = re.findall('(\Am)',platform)
        platttt = re.findall('(\Al)',platform)
        print(platt)
        print(plattt)
        print(platttt)

        for p in plattt:
            if "m" in p:
                macken = "1"    
            elif "m" not in p:
                macken = "0"
            print(macken)


    f.write(titl + "," + product_date.replace(",","") + "," + product_discount.replace("\n", "") + "," + testing.replace(",", ".") + "," + product_price_after.text.replace("\n","").replace(" ", "").replace(",",".").replace("\t\t\t\t\t\t\t","") + "," + a[0][0] + "," + c.replace(",","") + "," + y + "," + macken + "," + "blah" + "," + st + "\n")

f.close()
pd.read_csv("products.csv", error_bad_lines=False)

Я также записываю его в файл CSV.Поэтому, когда я записываю его в файл csv, он просто говорит 1, 1, 1, 1, 1 ...

Я получаю данные с этой страницы: 'https://store.steampowered.com/search/?specials=1&page=1'

Iзнаю, что этот вопрос немного сбивает с толку, так что, надеюсь, вы сможете помочь, если вам понадобится еще какой-нибудь код, дайте мне знать.

Sohan Das · Answer 1 · 19 марта 2019

ваше утверждение было неверным, поэтому вы получаете 1, см. Код ниже!

import requests,csv
from bs4 import BeautifulSoup


req = requests.get('https://store.steampowered.com/search/?specials=1&page=1')
soup = BeautifulSoup(req.content,'html.parser')
data = []
for platform in soup.find_all('div', attrs={'class':'col search_name ellipsis'}):
    title = platform.find('span',attrs={'class':'title'}).text
    if platform.find('span',attrs={'class':'win'}):
        win = '1'
    else:
        win = '0'

    if platform.find('span',attrs={'class':'mac'}):
        mac = '1'
    else:
        mac = '0'

    if platform.find('span',attrs={'class':'linux'}):
        linux = '1'
    else:
        linux = '0'

    data.append({
        'title':title.encode('utf-8'),
        'win':win,
        'mac':mac,
        'linux':linux})

with open('data.csv', 'w', newline='') as f:
    fields = ['title','win','mac','linux']
    writer = csv.DictWriter(f, fieldnames=fields)
    writer.writeheader()
    writer.writerows(data)

xbound · Answer 2 · 19 марта 2019

Вот как я бы это сделал:

import csv

# ...

rows = []
product_platform_container = container.findAll("span",{"class":"platform_img"})
for product_platform in product_platform_container:
    platform = product_platform.get('class')[1]
    win_p = re.findall('(\Aw)',platform)
    mac_p = re.findall('(\Am)',platform)
    linux_p = re.findall('(\Al)',platform)
    print(win_p)
    print(mac_p)
    print(linux_p)
    row = {
       "linux": 1 if linux_p else 0,
       "win": 1 if win_p else 0,
       "mac": 1 if mac_p else 0
    }
    rows.append(row)

# After you parsed all entries...
fieldnames = ['mac', 'win', 'linux']
writer = csv.DictWriter(f, fieldnames=fieldnames)

writer.writeheader()
for row in rows:
    writer.writerow(row)

Объяснение : После того, как мы определили платформу с re, мы создаем строку CSV, где mac, win иlinux будет иметь 1, только если их соответствующие совпадения (mac_p, win_p и linux_p) не пусты.Здесь f - ваш открытый файловый объект.Оформить эту статью, в которой показано, как работать с CSV-файлами в Python.

Python, Красивый суп, как извлечь данные и распечатать в CSV-файл

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Python, Красивый суп, как извлечь данные и распечатать в CSV-файл

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы