Мне нужно получить данные о запасах S & P 500 за указанное время и сохранить их в формате CSV - PullRequest
0 голосов
/ 09 ноября 2019

Я хочу получить все исторические данные для S & P 500 за определенный период времени, используя yahoo finance в csv, и смог получить только 64 акции.

Я пытался изменить код, но KeyError: 'Date'продолжает появляться.

import bs4 as bs
import datetime as dt
import os
import pandas_datareader.data as web
import pickle
import requests


def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        ticker = ticker[:-1]
        tickers.append(ticker)
    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
    return tickers


def get_data_from_yahoo(reload_sp500=False):

    if reload_sp500:
       tickers = save_sp500_tickers()
    else:
       with open("sp500tickers.pickle", "rb") as f:
          tickers = pickle.load(f)
    if not os.path.exists('stock_dfs'):
       os.makedirs('stock_dfs')
    start = dt.datetime(2000, 1, 1)
    end = dt.datetime(2016,12,31)
    for ticker in tickers:
       #just in case your connection breaks, we'd like to save our progress!
       if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
          df = web.DataReader(ticker, 'yahoo', start, end)
          df.to_csv('stock_dfs/{}.csv'.format(ticker))
       else:
          print('Already have {}'.format(ticker))

get_data_from_yahoo()

Результаты выполнения и отображаемая ошибка:

Already have MMM
Already have ABT
Already have ABBV
Already have ABMD
Already have ACN
Already have ATVI
Already have ADBE
Already have AMD
Already have AAP
Already have AES
Already have AMG
Already have AFL
Already have A
Already have APD
Traceback (most recent call last):
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Date'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "finance6.py", line 43, in <module>
    get_data_from_yahoo()
  File "finance6.py", line 37, in get_data_from_yahoo
    df = web.DataReader(ticker, 'yahoo', start, end)
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\util\_decorators.py", line 208, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\data.py", line 387, in DataReader
    session=session,
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\base.py", line 251, in read
    df = self._read_one_data(self.url, params=self._get_params(self.symbols))
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 165, in _read_one_data
    prices["Date"] = to_datetime(to_datetime(prices["Date"], unit="s").dt.date)
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\frame.py", line 2995, in __getitem__
    indexer = self.columns.get_loc(key)
  File "C:\Users\Debodeep Kar\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\indexes\base.py", line 2899, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Date'
...