У меня есть файл JSON размером примерно 1,5 ГБ, который мне нужно использовать в качестве фрейма данных, и я пробовал все свои силы в течение 10 часов, чтобы загрузить его как фреймворк, просматривая все возможные ответы на вопросы по StackOverflow тоже! В качестве второго варианта я попытался преобразовать его в csv, а затем загрузить как фрейм данных, но это также не удалось, как и в ранее отвеченных вопросах, люди просто объяснили ошибку, а не дали код, вот как выглядит json :
{'work': '2505753', 'flags': [], 'unixtime': 1260403200, 'stars': 1.0, 'nhelpful': 0, 'time': 'Dec 10, 2009', 'comment': "I really thought that I would like this book. I'm fascinated by this time period, and the plots to assassinate Hitler have always intrigued me. However, this book was so boring that I had to force myself to read it. The author no doubt has a commanding vocabulary, but his writing style and word choices made the book a chore to read. I've read dry textbooks that had more life to them than this novel. ", 'user': 'schatzi'}
{'work': '12458291', 'flags': [], 'unixtime': 1361664000, 'stars': 4.0, 'nhelpful': 0, 'time': 'Feb 24, 2013', 'comment': "After her father's death, Lena discovers that her father had been keeping many secrets from her. Lena is a member of the. Silenti, telepaths who came to our world through a portal. She must learn to navigate through the social, religious, and political pitfalls of her new life. Who can she trust? What will her role be? I enjoyed this story and the world the author created very much. ", 'user': 'aztwinmom'}
Я пробовал этот код как второй вариант преобразования в csv, отлаженная мной ошибка заключалась в одинарной кавычке, но замена "\'"
на "\""
в этих огромных данных займет очень много времени.
Попытка с json
import json
import csv
import os
f = open('test.json')
data = json.load(f)
f.close()
f = open('data.json')
csv_file = csv.writer(f)
count=0
for item in data:
f.writerow(item)
count+=1
if(count==10):
break
f.close()
Traceback
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-115-d75bae392cae> in <module>
1 f = open('test.json')
----> 2 data = json.load(f)
3 f.close()
e:\Anaconda3\lib\json\__init__.py in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
294 cls=cls, object_hook=object_hook,
295 parse_float=parse_float, parse_int=parse_int,
--> 296 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
297
298
e:\Anaconda3\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
346 parse_int is None and parse_float is None and
347 parse_constant is None and object_pairs_hook is None and not kw):
--> 348 return _default_decoder.decode(s)
349 if cls is None:
350 cls = JSONDecoder
e:\Anaconda3\lib\json\decoder.py in decode(self, s, _w)
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
339 if end != len(s):
e:\Anaconda3\lib\json\decoder.py in raw_decode(self, s, idx)
351 """
352 try:
--> 353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
355 raise JSONDecodeError("Expecting value", s, err.value) from None
JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
pd.read_json('test.json')
приводит к:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-118-771e17311e28> in <module>
----> 1 pd.read_json('test.json')
e:\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
212 else:
213 kwargs[new_arg_name] = new_arg_value
--> 214 return func(*args, **kwargs)
215
216 return cast(F, wrapper)
e:\Anaconda3\lib\site-packages\pandas\io\json\_json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression)
606 return json_reader
607
--> 608 result = json_reader.read()
609 if should_close:
610 filepath_or_buffer.close()
e:\Anaconda3\lib\site-packages\pandas\io\json\_json.py in read(self)
729 obj = self._get_object_parser(self._combine_lines(data.split("\n")))
730 else:
--> 731 obj = self._get_object_parser(self.data)
732 self.close()
733 return obj
e:\Anaconda3\lib\site-packages\pandas\io\json\_json.py in _get_object_parser(self, json)
751 obj = None
752 if typ == "frame":
--> 753 obj = FrameParser(json, **kwargs).parse()
754
755 if typ == "series" or obj is None:
e:\Anaconda3\lib\site-packages\pandas\io\json\_json.py in parse(self)
855
856 else:
--> 857 self._parse_no_numpy()
858
859 if self.obj is None:
e:\Anaconda3\lib\site-packages\pandas\io\json\_json.py in _parse_no_numpy(self)
1087 if orient == "columns":
1088 self.obj = DataFrame(
-> 1089 loads(json, precise_float=self.precise_float), dtype=None
1090 )
1091 elif orient == "split":
ValueError: Expected object or value