Сохранить в файл Excel, используя openpyxl вместо CSV - PullRequest
0 голосов
/ 28 июня 2018

enter image description here Приведенный ниже код работает и в настоящее время сохраняется в файл CSV, однако я хочу сохранить в файл Excel вместо использования openpyxl. Я попытался сделать это ниже, но безуспешно. В конечном итоге я хотел бы сохранить это на существующий лист и иметь возможность перезаписать существующие данные. Кто-нибудь может помочь? Спасибо

Рабочий код:

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
import csv
import urllib    

def get_elements_by_xpath(driver, xpath):
    return [entry.text for entry in driver.find_elements_by_xpath(xpath)]


url = 'http://www.tradingview.com/screener'
driver = webdriver.Firefox()
driver.get(url)

try:

    selector = '.js-field-total.tv-screener-table__field-value--total'
    condition = EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
    matches = WebDriverWait(driver, 10).until(condition)
    matches = int(matches.text.split()[0])

except (TimeoutException, Exception):
    print ('Problem finding matches, setting default...')
    matches = 4895 # Set default

# The page loads 150 rows at a time; divide matches by
# 150 to determine the number of times we need to scroll;
# add 5 extra scrolls just to be sure
num_loops = int(matches / 150 + 5)

for _ in range(num_loops):

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(1) # Pause briefly to allow loading time

search_entries = [
    ("tickers",       "//tbody/tr/td[1]/div/a"),
    ("rev annual",      "//tbody/tr/td[10]"),
    ("income",          "//tbody/tr/td[11]")]          

with open('textfile.csv', 'w+', newline= '' ) as f_output:
    csv_output = csv.writer(f_output)

    # Write header
    csv_output.writerow([name for name, xpath in search_entries])
    entries = []

    for name, xpath in search_entries:
        entries.append(get_elements_by_xpath(driver, xpath))

    csv_output.writerows(zip(*entries))

Попробовал это:

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from openpyxl import Workbook
import urllib   

wb = Workbook(write_only=True)
ws = wb.create_sheet() 

def get_elements_by_xpath(driver, xpath):
    return [entry.text for entry in driver.find_elements_by_xpath(xpath)]


url = 'http://www.tradingview.com/screener'
driver = webdriver.Firefox()
driver.get(url)

try:

    selector = '.js-field-total.tv-screener-table__field-value--total'
    condition = EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
    matches = WebDriverWait(driver, 10).until(condition)
    matches = int(matches.text.split()[0])

except (TimeoutException, Exception):
    print ('Problem finding matches, setting default...')
    matches = 4895 # Set default

# The page loads 150 rows at a time; divide matches by
# 150 to determine the number of times we need to scroll;
# add 5 extra scrolls just to be sure
num_loops = int(matches / 150 + 5)

for _ in range(num_loops):

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(1) # Pause briefly to allow loading time

search_entries = [
    ("tickers",       "//tbody/tr/td[1]/div/a"),
    ("rev annual",      "//tbody/tr/td[10]"),
    ("income",          "//tbody/tr/td[11]")]    


entries = []

for name, xpath in search_entries:
        entries.append(get_elements_by_xpath(driver, xpath))     

wb.save('new_big_file.xlsx') 
...