Я пытаюсь сохранить фрейм данных в xlsx в Colab. Я получаю данные с praw:
sm = reddit.submission(url="https://www.reddit.com/r/AskReddit/comments/1irtkq/taxi_drivers_whats_the_deepest_secret_youve/")
sm.comments.replace_more(limit=0)
data = []
for top_level_comment in sm.comments.list():
data.append([top_level_comment.body,
top_level_comment.author,
top_level_comment.score,
top_level_comment.created_utc,
top_level_comment.depth,
top_level_comment.id,
top_level_comment.parent_id])
df = pd.DataFrame(data, columns=['body', 'author', 'score', 'created_utc', 'depth', 'id', 'parent_id'])
df
Все выглядит хорошо, я получил все данные. Но когда я сохраняю его, я получаю сообщение об ошибке в библиотеке praw:
directory = '/content/downloads'
file_path = posixpath.join(directory, 'reddit.xlsx')
if not os.path.exists(directory):
os.makedirs(directory)
with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
df.to_excel(writer, sheet_name='Sheet1', index=False)
worksheet = writer.sheets['Sheet1']
writer.save()
---------------------------------------------------------------------------
NotFound Traceback (most recent call last)
<ipython-input-9-b8157734da77> in <module>()
5
6 with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
----> 7 df.to_excel(writer, sheet_name='Sheet1', index=False)
8 worksheet = writer.sheets['Sheet1']
10 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in to_excel(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes)
2254 startcol=startcol,
2255 freeze_panes=freeze_panes,
-> 2256 engine=engine,
2257 )
2258
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/excel.py in write(self, writer, sheet_name, startrow, startcol, freeze_panes, engine)
737 startrow=startrow,
738 startcol=startcol,
--> 739 freeze_panes=freeze_panes,
740 )
741 if need_save:
/usr/local/lib/python3.6/dist-packages/pandas/io/excel/_xlsxwriter.py in write_cells(self, cells, sheet_name, startrow, startcol, freeze_panes)
212 wks.freeze_panes(*(freeze_panes))
213
--> 214 for cell in cells:
215 val, fmt = self._value_with_fmt(cell.val)
216
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/excel.py in get_formatted_cells(self)
685 def get_formatted_cells(self):
686 for cell in itertools.chain(self._format_header(), self._format_body()):
--> 687 cell.val = self._format_value(cell.val)
688 yield cell
689
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/excel.py in _format_value(self, val)
433 elif self.float_format is not None:
434 val = float(self.float_format % val)
--> 435 if getattr(val, "tzinfo", None) is not None:
436 raise ValueError(
437 "Excel does not support datetimes with "
/usr/local/lib/python3.6/dist-packages/praw/models/reddit/base.py in __getattr__(self, attribute)
31 """Return the value of `attribute`."""
32 if not attribute.startswith("_") and not self._fetched:
---> 33 self._fetch()
34 return getattr(self, attribute)
35 raise AttributeError(
/usr/local/lib/python3.6/dist-packages/praw/models/reddit/redditor.py in _fetch(self)
173
174 def _fetch(self):
--> 175 data = self._fetch_data()
176 data = data["data"]
177 other = type(self)(self._reddit, _data=data)
/usr/local/lib/python3.6/dist-packages/praw/models/reddit/redditor.py in _fetch_data(self)
170 name, fields, params = self._fetch_info()
171 path = API_PATH[name].format(**fields)
--> 172 return self._reddit.request("GET", path, params)
173
174 def _fetch(self):
/usr/local/lib/python3.6/dist-packages/praw/reddit.py in request(self, method, path, params, data, files)
630 """
631 return self._core.request(
--> 632 method, path, data=data, files=files, params=params
633 )
634
/usr/local/lib/python3.6/dist-packages/prawcore/sessions.py in request(self, method, path, data, files, json, params)
183 return self._request_with_retries(
184 data=data, files=files, json=json, method=method,
--> 185 params=params, url=url)
186
187
/usr/local/lib/python3.6/dist-packages/prawcore/sessions.py in _request_with_retries(self, data, files, json, method, params, url, retries)
128 retries, saved_exception, url)
129 elif response.status_code in self.STATUS_EXCEPTIONS:
--> 130 raise self.STATUS_EXCEPTIONS[response.status_code](response)
131 elif response.status_code == codes['no_content']:
132 return
NotFound: received 404 HTTP response
Я в замешательстве: у меня уже есть данные. Мне больше не нужен http requests
.
Я обнаружил, что последняя ошибка pandas составляет около timezones
. Что происходит?
raise ValueError(
"Excel does not support datetimes with "
"timezones. Please ensure that datetimes "
"are timezone unaware before writing to Excel."
)