Я пытаюсь сделать некоторое форматирование и анализ с помощью макроса, который я использую в Excel. В частности, id, такой как макрос, запускаемый сразу после создания файла Excel из моего скрипта python. Может ли кто-нибудь дать мне общее руководство о том, как это сделать? Я знаю theres xlwings, но я изо всех сил пытаюсь найти ответ на мой вопрос определенно. Идентификатор причины для запуска макросов, например python, заключается в том, что мой скрипт создает 100-120 отдельных файлов Excel, каждый из которых состоит из 3 листов. Открытие каждого из них по отдельности и запуск макроса будет ужасно неэффективным. Я понимаю, что ответ на мой вопрос, вероятно, довольно прост, поэтому я прошу прощения, если он кажется интуитивным. Я разместил свой сценарий ниже только для справки:
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd
from urllib.request import urlopen as ur
from bs4 import BeautifulSoup as soup
collection = ['&ar=180','&r=21&ar=180','&r=41&ar=180','&r=61&ar=180','&r=81&ar=180','&r=101&ar=180']
for url in collection: #scrape multiple pages
my_url = 'https://finviz.com/screener.ashx?v=111&f=cap_smallover,fa_eps5years_o5,fa_grossmargin_o10,fa_roe_o10,fa_sales5years_o5,fa_salesqoq_o5,geo_usa,sh_avgvol_o750,ta_beta_u2' + str(url)
uClient = ur(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser") #find ticker
stock_tickers = page_soup.findAll("a", {"class":"screener-link-primary"})
for tickers in stock_tickers: #find all of the tickers
ticker = tickers.text
collection = [ticker]
for url in collection: #scrape multiple pages
#balance sheet data
my_url1 = 'https://finance.yahoo.com/quote/' + str(url) + '/balance-sheet?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url1, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_balancesheet = pd.DataFrame(parsed_rows)
df_balancesheet = pd.DataFrame(parsed_rows)
df_balancesheet = df_balancesheet.set_index(0)
df_balancesheet = df_balancesheet.transpose()
cols = list(df_balancesheet.columns)
cols[0] = 'Date'
df_balancesheet = df_balancesheet.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_balancesheet.columns)[1::]
#income statement data
my_url2 = 'https://finance.yahoo.com/quote/' + str(url) + '/financials?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url2, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_incomestatement = pd.DataFrame(parsed_rows)
df_incomestatement = pd.DataFrame(parsed_rows)
df_incomestatement = df_incomestatement.set_index(0)
df_incomestatement = df_incomestatement.transpose()
cols = list(df_incomestatement.columns)
cols[0] = 'Date'
df_incomestatement = df_incomestatement.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_incomestatement.columns)[1::]
# cash flow data
my_url3 = 'https://finance.yahoo.com/quote/' + str(url) + '/cash-flow?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url3, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_cashflow = pd.DataFrame(parsed_rows)
df_cashflow = pd.DataFrame(parsed_rows)
df_cashflow = df_cashflow.set_index(0)
df_cashflow = df_cashflow.transpose()
cols = list(df_cashflow.columns)
cols[0] = 'Date'
df_cashflow = df_cashflow.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_cashflow.columns)[1::]
date = datetime.today().strftime('%Y-%m-%d')
writer = pd.ExcelWriter(ticker + '-' + date + '-scraped.xlsx')
df_incomestatement.to_excel(writer,'Income Statement')
df_balancesheet.to_excel(writer,'Balance Sheet')
df_cashflow.to_excel(writer,'Statement of Cash Flows')
writer.save()