Как преобразовать вложенный JSON в таблицу CSV, используя python - PullRequest
0 голосов
/ 09 мая 2019

Я очень плохо знаком с python и хотел бы преобразовать вложенный файл JSON в таблицу CSV с помощью python.Но я столкнулся с проблемой, в то время как после экспорта моих данных в файл CSV все мои значения находятся в одной строке со многими столбцами, но я хочу, чтобы в ней было несколько строк.

Ниже приведены примеры данных:

[
    {
        "by_app": [
            {
                "app": 5,
                "cat": 3,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 372,
                        "rx_packets": 3,
                        "tx_bytes": 1361,
                        "tx_packets": 4
                    },
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 56896,
                        "rx_packets": 191,
                        "tx_bytes": 210622,
                        "tx_packets": 460
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 60837,
                "rx_packets": 203,
                "tx_bytes": 213435,
                "tx_packets": 475
            },
            {
                "app": 94,
                "cat": 19,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 64654,
                        "rx_packets": 147,
                        "tx_bytes": 19533,
                        "tx_packets": 138
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 42416,
                        "rx_packets": 68,
                        "tx_bytes": 12419,
                        "tx_packets": 74
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 5421117,
                "rx_packets": 4779,
                "tx_bytes": 243979,
                "tx_packets": 2377
            },
            {
                "app": 162,
                "cat": 20,
                "rx_bytes": 3295298,
                "rx_packets": 2935,
                "tx_bytes": 171266,
                "tx_packets": 2032
            },
            {
                "app": 209,
                "cat": 13,
                "rx_bytes": 21763,
                "rx_packets": 38,
                "tx_bytes": 4433,
                "tx_packets": 30
            },
            {
                "app": 222,
                "cat": 13,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 300,
                        "rx_packets": 3,
                        "tx_bytes": 503,
                        "tx_packets": 4
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 3452,
                        "rx_packets": 24,
                        "tx_bytes": 4176,
                        "tx_packets": 26
                    },
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 0,
                        "rx_packets": 0,
                        "tx_bytes": 396,
                        "tx_packets": 6
                    }
                ],
                "known_clients": 3,
                "rx_bytes": 4742,
                "rx_packets": 32,
                "tx_bytes": 5787,
                "tx_packets": 42
            },
            {
                "app": 167,
                "cat": 20,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 5761,
                        "rx_packets": 14,
                        "tx_bytes": 1574,
                        "tx_packets": 13
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 138686,
                "rx_packets": 237,
                "tx_bytes": 31821,
                "tx_packets": 155
            },
            {
                "app": 112,
                "cat": 4,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 135381,
                        "rx_packets": 161,
                        "tx_bytes": 42596,
                        "tx_packets": 140
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 135381,
                "rx_packets": 161,
                "tx_bytes": 42596,
                "tx_packets": 140
            },
            {
                "app": 62,
                "cat": 8,
                "rx_bytes": 7219,
                "rx_packets": 10,
                "tx_bytes": 1153,
                "tx_packets": 9
            },
            {
                "app": 185,
                "cat": 20,
                "rx_bytes": 4733026,
                "rx_packets": 4666,
                "tx_bytes": 728026,
                "tx_packets": 2688
            },
            {
                "app": 130,
                "cat": 4,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 113871,
                        "rx_packets": 121,
                        "tx_bytes": 16442,
                        "tx_packets": 116
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 113871,
                "rx_packets": 121,
                "tx_bytes": 16442,
                "tx_packets": 116
            },
            {
                "app": 65535,
                "cat": 255,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 8195,
                        "rx_packets": 27,
                        "tx_bytes": 3834,
                        "tx_packets": 25
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 27338,
                        "rx_packets": 93,
                        "tx_bytes": 11330,
                        "tx_packets": 86
                    },
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 19974,
                        "rx_packets": 181,
                        "tx_bytes": 2447,
                        "tx_packets": 34
                    },
                    {
                        "mac": "f0:9f:c2:c6:63:5a",
                        "rx_bytes": 90,
                        "rx_packets": 1,
                        "tx_bytes": 0,
                        "tx_packets": 0
                    }
                ],
                "known_clients": 4,
                "rx_bytes": 6417768,
                "rx_packets": 10254,
                "tx_bytes": 1193280,
                "tx_packets": 7326
            },
            {
                "app": 190,
                "cat": 13,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 25041,
                        "rx_packets": 34,
                        "tx_bytes": 32420,
                        "tx_packets": 49
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 25041,
                "rx_packets": 34,
                "tx_bytes": 32420,
                "tx_packets": 49
            },
            {
                "app": 106,
                "cat": 18,
                "clients": [
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 360,
                        "rx_packets": 4,
                        "tx_bytes": 360,
                        "tx_packets": 4
                    },
                    {
                        "mac": "f0:9f:c2:c6:63:5a",
                        "rx_bytes": 90,
                        "rx_packets": 1,
                        "tx_bytes": 180,
                        "tx_packets": 2
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 450,
                "rx_packets": 5,
                "tx_bytes": 540,
                "tx_packets": 6
            },
            {
                "app": 1,
                "cat": 6,
                "rx_bytes": 23370,
                "rx_packets": 35,
                "tx_bytes": 4388,
                "tx_packets": 26
            },
            {
                "app": 61,
                "cat": 9,
                "rx_bytes": 4825,
                "rx_packets": 24,
                "tx_bytes": 2040,
                "tx_packets": 24
            },
            {
                "app": 32,
                "cat": 17,
                "rx_bytes": 27068,
                "rx_packets": 42,
                "tx_bytes": 6002,
                "tx_packets": 27
            },
            {
                "app": 3,
                "cat": 24,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 3791,
                        "rx_packets": 8,
                        "tx_bytes": 1258,
                        "tx_packets": 9
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 25745,
                        "rx_packets": 109,
                        "tx_bytes": 21603,
                        "tx_packets": 104
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 29536,
                "rx_packets": 117,
                "tx_bytes": 22861,
                "tx_packets": 113
            },
            {
                "app": 63,
                "cat": 18,
                "rx_bytes": 0,
                "rx_packets": 0,
                "tx_bytes": 114,
                "tx_packets": 2
            },
            {
                "app": 21,
                "cat": 3,
                "rx_bytes": 41992,
                "rx_packets": 53,
                "tx_bytes": 9788,
                "tx_packets": 34
            },
            {
                "app": 21,
                "cat": 14,
                "rx_bytes": 31920,
                "rx_packets": 114,
                "tx_bytes": 19203,
                "tx_packets": 82
            }
        ],
        "by_cat": [
            {
                "apps": [
                    5,
                    21
                ],
                "cat": 3,
                "rx_bytes": 102829,
                "rx_packets": 256,
                "tx_bytes": 223223,
                "tx_packets": 509
            },
            {
                "apps": [
                    112,
                    130
                ],
                "cat": 4,
                "rx_bytes": 249252,
                "rx_packets": 282,
                "tx_bytes": 59038,
                "tx_packets": 256
            },
            {
                "apps": [
                    1
                ],
                "cat": 6,
                "rx_bytes": 23370,
                "rx_packets": 35,
                "tx_bytes": 4388,
                "tx_packets": 26
            },
            {
                "apps": [
                    62
                ],
                "cat": 8,
                "rx_bytes": 7219,
                "rx_packets": 10,
                "tx_bytes": 1153,
                "tx_packets": 9
            },
            {
                "apps": [
                    61
                ],
                "cat": 9,
                "rx_bytes": 4825,
                "rx_packets": 24,
                "tx_bytes": 2040,
                "tx_packets": 24
            },
            {
                "apps": [
                    209,
                    222,
                    190
                ],
                "cat": 13,
                "rx_bytes": 51546,
                "rx_packets": 104,
                "tx_bytes": 42640,
                "tx_packets": 121
            },
            {
                "apps": [
                    21
                ],
                "cat": 14,
                "rx_bytes": 31920,
                "rx_packets": 114,
                "tx_bytes": 19203,
                "tx_packets": 82
            },
            {
                "apps": [
                    32
                ],
                "cat": 17,
                "rx_bytes": 27068,
                "rx_packets": 42,
                "tx_bytes": 6002,
                "tx_packets": 27
            },
            {
                "apps": [
                    106,
                    63
                ],
                "cat": 18,
                "rx_bytes": 450,
                "rx_packets": 5,
                "tx_bytes": 654,
                "tx_packets": 8
            },
            {
                "apps": [
                    94
                ],
                "cat": 19,
                "rx_bytes": 5421117,
                "rx_packets": 4779,
                "tx_bytes": 243979,
                "tx_packets": 2377
            },
            {
                "apps": [
                    162,
                    167,
                    185
                ],
                "cat": 20,
                "rx_bytes": 8167010,
                "rx_packets": 7838,
                "tx_bytes": 931113,
                "tx_packets": 4875
            },
            {
                "apps": [
                    3
                ],
                "cat": 24,
                "rx_bytes": 29536,
                "rx_packets": 117,
                "tx_bytes": 22861,
                "tx_packets": 113
            },
            {
                "apps": [
                    65535
                ],
                "cat": 255,
                "rx_bytes": 6417768,
                "rx_packets": 10254,
                "tx_bytes": 1193280,
                "tx_packets": 7326
            }
        ],
        "last_updated": 1557123138
    }
]

Ниже приведен мой код Python:


import json
import sys
import csv

def to_string(s):
    try:
        return str(s)
    except:
        return s.encode('utf-8')

def reduce_item(key, value):
    global reduced_item

    if type(value) is list:
        i=0
        for sub_item in value:
            reduce_item(key+'_'+to_string(i), sub_item)
            i=i+1

    elif type(value) is dict:
        sub_keys = value.keys()
        for sub_key in sub_keys:
            reduce_item(key+'_'+to_string(sub_key), value[sub_key])

    else:
        reduced_item[to_string(key)] = to_string(value)


node="by_app"
fp = open("sample.json", 'r')
json_value = fp.read()
raw_data = json.loads(json_value)
fp.close()
data_to_be_processed=[]

for i in node:
    try:
        data_to_be_processed.append(raw_data[i])
    except:
        data_to_be_processed.append(raw_data)

    processed_data = []
    header = []

for item in data_to_be_processed:
    reduced_item = {}
    reduce_item(i, item)

    header += reduced_item.keys()

    processed_data.append(reduced_item)

header = list(set(header))
header.sort()


with open("test.csv", 'w+', newline='') as f:
    f_csv = csv.DictWriter(f, header, quoting=csv.QUOTE_ALL)
    f_csv.writeheader()
    for row in processed_data:
        f_csv.writerow(row)


Я ожидаю, что мой вывод в CSV-файле будет:! (https://json -csv.com / c / 2Who )

1 Ответ

0 голосов
/ 09 мая 2019

В вашем подходе есть небольшая ошибка; Попробуйте перейти по следующему алгоритму

Использовать следующие зависимости

import csv
import json
import logging

from itertools import chain
from six import string_types, PY2
logger = logging.getLogger("json2csv")
JSONError = ValueError if PY2 else json.decoder.JSONDecodeError

Эта функция поможет вам преобразовать json в объект словаря путем внутреннего вызова json_to_dicts:

def json_to_csv(input_file_path, output_file_path):
    with open(input_file_path) as input_file:
        json = input_file.read()
    dicts = json_to_dicts(json)
    with open(output_file_path, "w") as output_file:
        dicts_to_csv(dicts, output_file)

Это актуальная логика для json-to-dict

def json_to_dicts(json_str):
    try:
        objects = json.loads(json_str)
    except JSONError:
        objects = [json.loads(l) for l in json_str.split('\n') if l.strip()]

    return [dict(to_keyvalue_pairs(obj)) for obj in objects]

Форматирует пару ключ-значение из словаря

def to_keyvalue_pairs(source, ancestors=[], key_delimeter='_'):
    def is_sequence(arg):
        return (not isinstance(arg, string_types)) and (hasattr(arg, "__getitem__") or hasattr(arg, "__iter__"))

    def is_dict(arg):
        return isinstance(arg, dict)

    if is_dict(source):
        result = [to_keyvalue_pairs(source[key], ancestors + [key]) for key in source.keys()]
        return list(chain.from_iterable(result))
    elif is_sequence(source):
        result = [to_keyvalue_pairs(item, ancestors + [str(index)]) for (index, item) in enumerate(source)]
        return list(chain.from_iterable(result))
    else:
        return [(key_delimeter.join(ancestors), source)]

Это запишет словарь в CSV

def dicts_to_csv(source, output_file):
    def build_row(dict_obj, keys):
        return [dict_obj.get(k, "") for k in keys]

    keys = sorted(set(chain.from_iterable([o.keys() for o in source])))
    rows = [build_row(d, keys) for d in source]

    cw = csv.writer(output_file)
    cw.writerow(keys)
    if PY2:
        for row in rows:
            cw.writerow([c.encode('utf-8') if isinstance(c, str) or isinstance(c, unicode) else c for c in row])
    else:
        for row in rows:
            cw.writerow([c for c in row])


def write_csv(headers, rows, file):
    cw = csv.writer(file)
    cw.writerow(headers)
    if PY2:
        for row in rows:
            cw.writerow([c.encode('utf-8') if isinstance(c, str) or isinstance(c, unicode) else c for c in row])
    else:
        for row in rows:
            cw.writerow([c for c in row])
...