У меня есть следующее, что я хотел бы проанализировать в JSON. У класса есть список объектов item также
class Item(JSONEncoder):
def __init__(self):
self.Type = ''
self.Content = ''
self.N = None
self.Parent = None
self.Items = []
def reprJSON(self):
d = dict()
for a, v in self.__dict__.items():
if (hasattr(v, "reprJSON")):
d[a] = v.reprJSON()
else:
d[a] = v
return d
Итак, когда я пытаюсь проанализировать экземпляр класса Item, root.reprJSON()
я получаю следующий результат.
{'Type': 'root',
'Content': '',
'N': 'root',
'Parent': None,
'Items': [<Item.Item at 0x10575fb3c88>,
<Item.Item at 0x10575fb3e10>,
<Item.Item at 0x10575fb3eb8>,
<Item.Item at 0x10575fbc080>,
<Item.Item at 0x10575fbc2b0>,
<Item.Item at 0x10575fc6a20>,
<Item.Item at 0x10575fc6a58>,
<Item.Item at 0x10575fc6b70>,
<Item.Item at 0x10575fc6be0>,
<Item.Item at 0x10575fc6c50>,
<Item.Item at 0x10575fc6da0>,
<Item.Item at 0x10575fc6fd0>,
<Item.Item at 0x10575fcb128>,
<Item.Item at 0x10575fcb358>,
<Item.Item at 0x10575fcba90>,
<Item.Item at 0x10575fcbb00>,
<Item.Item at 0x10575fcbb70>,
<Item.Item at 0x10575fcbc18>,
<Item.Item at 0x10575fcbda0>,
<Item.Item at 0x10575fcbfd0>,
<Item.Item at 0x10575fd3208>,
<Item.Item at 0x10575fd34a8>,
<Item.Item at 0x10575fd3550>,
<Item.Item at 0x10575fd35c0>,
<Item.Item at 0x10575fd36d8>,
<Item.Item at 0x10575fd37f0>,
<Item.Item at 0x10575fd3898>,
<Item.Item at 0x10575fd3940>,
<Item.Item at 0x10575fd39b0>,
<Item.Item at 0x10575fd3a20>,
<Item.Item at 0x10575fd3ac8>,
<Item.Item at 0x10575fd3b70>,
<Item.Item at 0x10575fd3c88>,
<Item.Item at 0x10575fd3d68>,
<Item.Item at 0x10575fd3dd8>,
<Item.Item at 0x10575fd3e10>,
<Item.Item at 0x10575fd3ef0>,
<Item.Item at 0x10575fdc080>,
<Item.Item at 0x10575fdc0b8>,
<Item.Item at 0x10575fdc128>,
<Item.Item at 0x10575fdc1d0>,
<Item.Item at 0x10575fdc240>,
<Item.Item at 0x10575fdc390>,
<Item.Item at 0x10575fdc438>,
<Item.Item at 0x10575fdc550>,
<Item.Item at 0x10575fdc5c0>,
<Item.Item at 0x10575fdc630>,
<Item.Item at 0x10575fdc6a0>,
<Item.Item at 0x10575fdc6d8>,
<Item.Item at 0x10575fdc780>,
<Item.Item at 0x10575fdc908>,
<Item.Item at 0x10575fdc9e8>,
<Item.Item at 0x10575fdca58>,
<Item.Item at 0x10575fdcac8>,
<Item.Item at 0x10575fdcb00>,
<Item.Item at 0x10575fdcba8>,
<Item.Item at 0x10575fdccc0>,
<Item.Item at 0x10575fdcd30>,
<Item.Item at 0x10575fdcda0>,
<Item.Item at 0x10575fdce48>,
<Item.Item at 0x10575fdceb8>,
<Item.Item at 0x10575fdcf28>,
<Item.Item at 0x10575fe22e8>,
<Item.Item at 0x10575fe2828>,
<Item.Item at 0x10575fe2940>,
<Item.Item at 0x10575fe2b70>,
<Item.Item at 0x10575fe2be0>,
<Item.Item at 0x10575fe2c88>,
<Item.Item at 0x10575fe2cc0>,
<Item.Item at 0x10575fe2cf8>]}
НоЯ хотел бы получить значения этого элемента также в один объект JSON. Я не знаю, как это сделать, буду признателен за любую помощь. Спасибо
Редактировать
Следующий код создает экземпляр класса элемента и заполняет его данными.
def Crawl(parsedPDF):
soup = BeautifulSoup(parsedPDF, "html.parser")
root = Item()
root.Type = "root"
root.N = "root"
parent = root
head = root
body = RemoveEmptyTags(soup.body)
for tag in body:
elements = RemoveEmptyChild(tag.contents)
for element in elements:
if element.name == "head":
head = CreateHeading(root, parent, element)
parent = head.Parent
elif element.name == "p":
AddParagraph(head, element)
elif element.name == "figure":
pass
elif element.name == "figdesc":
pass
elif element.name == "table":
#elem = AddElement(head, element)
pass
else:
#elem = AddElement(head, element)
pass
pass
return root
def AddParagraph(head, element):
# split the paragraph into multiple lines based on alphabetize bullet points
lines = split_with_AplhabetizeBullets(element.text, '\.\s(\(.*?\)\s)')
for line in lines:
item = Item()
item.Content = line
item.Type = element.name
item.Parent = head
head.Items.append(item)
def CreateHeading(root, parent, element):
item = Item()
item.Content = element.text
item.Type = element.name
item.Parent = parent
try:
item.N = element["n"]
except:
pass
if item.N is None:
bracketTextLength = 0
try:
result = re.search(r'\(.*?\)',item.Content)
bracketTextLength = len(result.group)
except:
pass
item.N = item.Content
# to check if the heading without 'N' is a heading or its a subheading
if len(item.Content) > 3 and bracketTextLength == 0:
root.Items.append(item)
item.Parent = item
pass
else:
parent.Items.append(item)
pass
else: # item.N is not None
if parent.N is None:
item.Parent = item
parent = item.Parent
pass
#else: # if the new heading sharing the same reference as of its parent then
if parent.N in item.N[:len(parent.N)]:
parent.Items.append(item)
pass
else: # if the new heading has no parent then add it into root
root.Items.append(item)
item.Parent = item
pass
return item