Разбор данных из почтового запроса в python - PullRequest
0 голосов
/ 22 апреля 2020

Я получаю эти данные из почтового запроса с URL-адреса, что будет лучшим способом для анализа этих данных? Все, что мне нужно, это то, что находится внутри переменной «data»:

NOUPDATE<script>$('#tButtons').html('');$('#tpHead').html('');$('#tpTitle').html('Casos diarios por Estado + Nacional (Defunciones)');$("#tpHead").html('<div class="table-responsive"><table class="table table-bordered text-xs text-nowrap display nowrap" id="dataTable" width="100%" cellspacing="0" id="tpHead"><tbody><thead id="tpHead"><tr><th>cve_ent</th><th>poblacion</th><th>nombre</th><th>11-03-2020</th><th>12-03-2020</th><th>13-03-2020</th><th>14-03-2020</th><th>15-03-2020</th><th>16-03-2020</th><th>17-03-2020</th><th>18-03-2020</th><th>19-03-2020</th><th>20-03-2020</th><th>21-03-2020</th><th>22-03-2020</th><th>23-03-2020</th><th>24-03-2020</th><th>25-03-2020</th><th>26-03-2020</th><th>27-03-2020</th><th>28-03-2020</th><th>29-03-2020</th><th>30-03-2020</th><th>31-03-2020</th><th>01-04-2020</th><th>02-04-2020</th><th>03-04-2020</th><th>04-04-2020</th><th>05-04-2020</th><th>06-04-2020</th><th>07-04-2020</th><th>08-04-2020</th><th>09-04-2020</th><th>10-04-2020</th><th>11-04-2020</th><th>12-04-2020</th><th>13-04-2020</th><th>14-04-2020</th><th>15-04-2020</th><th>16-04-2020</th><th>17-04-2020</th><th>18-04-2020</th></tr></thead></tbody></table></div>');    var mTable = $("#dataTable").DataTable({
                                        "data": [["01",1434635,"AGUASCALIENTES",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],["02",3634868,"BAJA CALIFORNIA",0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,4,2,6,4,2,9,3,3,6,3,10,2,3,5,1,1,3,1,2,0,0,0,0],["03",804708,"BAJA CALIFORNIA SUR",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0],["04",1000617,"CAMPECHE",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],["07",5730367,"CHIAPAS",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],["08",3801487,"CHIHUAHUA",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,4,2,3,2,3,2,4,1,1,1,1,0,0,2,0,2,0,1,0,0,0],["09",9018645,"DISTRITO FEDERAL",0,1,0,0,1,1,0,1,3,5,3,3,5,5,4,7,8,9,8,8,4,7,11,5,6,7,7,6,10,6,3,2,4,8,4,4,0,0,0],["05",3218720,"COAHUILA",0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,5,1,3,0,0,0,1,3,0,1,1,0,0,0,0,0,0,0,0,0],["06",785153,"COLIMA",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0],["10",1868996,"DURANGO",0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],["11",6228175,"GUANAJUATO",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0],["12",3657048,"GUERRERO",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,2,1,0,0,1,0,0],["13",3086414,"HIDALGO",0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,2,1,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,2],["14",8409693,"JALISCO",0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,0,1,2,1,1,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0],["15",17427790,"MEXICO",0,0,0,0,2,0,0,0,0,1,2,0,0,3,1,1,4,1,2,1,3,2,3,7,3,3,7,5,3,4,8,2,0,4,2,4,1,0,0],["16",4825401,"MICHOACAN",0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,2,0,3,0,0,1,1,3,0,0,0,0,1,1,0,1,0,0],["17",2044058,"MORELOS",0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,2,0,0,0,1,0,0,1,0,1,0,0],["18",1288571,"NAYARIT",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0],["19",5610153,"NUEVO LEON",0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0],["20",4143593,"OAXACA",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,1,1,0,0,0,0,0],["21",6604451,"PUEBLA",0,0,0,0,0,1,0,0,0,0,0,0,0,2,1,3,2,2,1,1,3,2,4,1,2,2,2,1,1,4,2,3,2,2,1,1,1,0,0],["22",2279637,"QUERETARO",0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],["23",1723259,"QUINTANA ROO",0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,2,1,1,5,5,0,0,1,2,1,4,1,0,1,0,2,1,0,1,1,0,0],["24",2866142,"SAN LUIS POTOSI",0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],["25",3156674,"SINALOA",0,0,0,0,0,1,0,0,0,0,0,0,0,2,2,3,2,2,1,1,3,2,4,2,2,2,5,3,3,1,2,1,1,0,0,0,0,0,0],["26",3074745,"SONORA",0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,1,2,0,1,0,2,0,0,1,1,0,0,0,0,1,0,0,0],["27",2572287,"TABASCO",0,0,0,0,0,0,0,0,0,0,0,0,3,1,0,0,1,0,0,1,1,2,0,3,1,3,1,1,1,0,3,1,2,5,2,2,3,2,4],["28",3650602,"TAMAULIPAS",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0],["29",1380011,"TLAXCALA",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0],["30",8539862,"VERACRUZ",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,3,1,0,1,0,1,0,0,1,0,1,1],["31",2259098,"YUCATAN",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,4,0,0,0,0,0,0,0,0,2,1,0,0,0,1,0,0,0],["32",1666426,"ZACATECAS",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],["000",127792286,"Nacional",0,1,0,0,4,5,2,4,3,7,8,7,15,21,15,23,29,25,27,30,29,47,35,34,28,30,46,34,33,25,28,16,23,26,15,18,9,3,7]],
                                        "lengthChange": false,
                                        "searching": true,
                                        "info": false,
                                        "paging": false,
                                        "order": [[ 0, "asc" ]],
                                        "oLanguage": {
                                                "sSearch": "Buscar:"
                                          }
                                        });

                                        var buttons = new $.fn.dataTable.Buttons(mTable, {
                                                buttons: [{extend: "csv",
                                                title: "Casos_Diarios_Estado_Nacional_Defunciones",
                                                text: '<i class="fas fa-download"></i> Descargar CSV</a>',
                                                className: "btn btn-xs btn-primary",
                                                action: function ( e, dt, node, config ){ $("body").loadingModal("show"); setTimeout(function(){$.fn.dataTable.ext.buttons.csvHtml5.action.call(dt.button(this), e, dt, node, config);}, 500);}
                                        }]
                                }).container().appendTo($("#tButtons"));
                        $('body').loadingModal('hide');</script>

Ответы [ 2 ]

0 голосов
/ 22 апреля 2020

Я хотел бы go полное регулярное выражение здесь:

import re

regex = re.compile(r'(?<=\"data\"\: )\[\[(.*)\]\]')


string = """NOUPDATE<script>$('#tButtons').html('');$('#tpHead').html('');$('#tpTitle').html('Casos diarios por Estado + Nacional (Defunciones)');$("#tpHead").html('<div class="table-responsive"><table class="table table-bordered text-xs text-nowrap display nowrap" id="dataTable" width="100%" cellspacing="0" id="tpHead"><tbody><thead id="tpHead"><tr><th>cve_ent</th><th>poblacion</th><th>nombre</th><th>11-03-2020</th><th>12-03-2020</th><th>13-03-2020</th><th>14-03-2020</th><th>15-03-2020</th><th>16-03-2020</th><th>17-03-2020</th><th>18-03-2020</th><th>19-03-2020</th><th>20-03-2020</th><th>21-03-2020</th><th>22-03-2020</th><th>23-03-2020</th><th>24-03-2020</th><th>25-03-2020</th><th>26-03-2020</th><th>27-03-2020</th><th>28-03-2020</th><th>29-03-2020</th><th>30-03-2020</th><th>31-03-2020</th><th>01-04-2020</th><th>02-04-2020</th><th>03-04-2020</th><th>04-04-2020</th><th>05-04-2020</th><th>06-04-2020</th><th>07-04-2020</th><th>08-04-2020</th><th>09-04-2020</th><th>10-04-2020</th><th>11-04-2020</th><th>12-04-2020</th><th>13-04-2020</th><th>14-04-2020</th><th>15-04-2020</th><th>16-04-2020</th><th>17-04-2020</th><th>18-04-2020</th></tr></thead></tbody></table></div>');    var mTable = $("#dataTable").DataTable({
                                        "data": [["01",1434635,"AGUASCALIENTES",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],["02",3634868,"BAJA CALIFORNIA",0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,4,2,6,4,2,9,3,3,6,3,10,2,3,5,1,1,3,1,2,0,0,0,0],["03",804708,"BAJA CALIFORNIA SUR",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0],["04",1000617,"CAMPECHE",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],["07",5730367,"CHIAPAS",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],["08",3801487,"CHIHUAHUA",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,4,2,3,2,3,2,4,1,1,1,1,0,0,2,0,2,0,1,0,0,0],["09",9018645,"DISTRITO FEDERAL",0,1,0,0,1,1,0,1,3,5,3,3,5,5,4,7,8,9,8,8,4,7,11,5,6,7,7,6,10,6,3,2,4,8,4,4,0,0,0],["05",3218720,"COAHUILA",0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,5,1,3,0,0,0,1,3,0,1,1,0,0,0,0,0,0,0,0,0],["06",785153,"COLIMA",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0],["10",1868996,"DURANGO",0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],["11",6228175,"GUANAJUATO",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,0],["12",3657048,"GUERRERO",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,1,1,1,1,0,2,1,0,0,1,0,0],["13",3086414,"HIDALGO",0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,2,1,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,2],["14",8409693,"JALISCO",0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,0,1,2,1,1,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0],["15",17427790,"MEXICO",0,0,0,0,2,0,0,0,0,1,2,0,0,3,1,1,4,1,2,1,3,2,3,7,3,3,7,5,3,4,8,2,0,4,2,4,1,0,0],["16",4825401,"MICHOACAN",0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,1,2,0,3,0,0,1,1,3,0,0,0,0,1,1,0,1,0,0],["17",2044058,"MORELOS",0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,2,0,0,0,1,0,0,1,0,1,0,0],["18",1288571,"NAYARIT",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0],["19",5610153,"NUEVO LEON",0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0],["20",4143593,"OAXACA",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,1,1,0,0,0,0,0],["21",6604451,"PUEBLA",0,0,0,0,0,1,0,0,0,0,0,0,0,2,1,3,2,2,1,1,3,2,4,1,2,2,2,1,1,4,2,3,2,2,1,1,1,0,0],["22",2279637,"QUERETARO",0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],["23",1723259,"QUINTANA ROO",0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,2,1,1,5,5,0,0,1,2,1,4,1,0,1,0,2,1,0,1,1,0,0],["24",2866142,"SAN LUIS POTOSI",0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],["25",3156674,"SINALOA",0,0,0,0,0,1,0,0,0,0,0,0,0,2,2,3,2,2,1,1,3,2,4,2,2,2,5,3,3,1,2,1,1,0,0,0,0,0,0],["26",3074745,"SONORA",0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,1,2,0,1,0,2,0,0,1,1,0,0,0,0,1,0,0,0],["27",2572287,"TABASCO",0,0,0,0,0,0,0,0,0,0,0,0,3,1,0,0,1,0,0,1,1,2,0,3,1,3,1,1,1,0,3,1,2,5,2,2,3,2,4],["28",3650602,"TAMAULIPAS",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0],["29",1380011,"TLAXCALA",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0],["30",8539862,"VERACRUZ",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,3,1,0,1,0,1,0,0,1,0,1,1],["31",2259098,"YUCATAN",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,4,0,0,0,0,0,0,0,0,2,1,0,0,0,1,0,0,0],["32",1666426,"ZACATECAS",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],["000",127792286,"Nacional",0,1,0,0,4,5,2,4,3,7,8,7,15,21,15,23,29,25,27,30,29,47,35,34,28,30,46,34,33,25,28,16,23,26,15,18,9,3,7]],
                                        "lengthChange": false,
                                        "searching": true,
                                        "info": false,
                                        "paging": false,
                                        "order": [[ 0, "asc" ]],
                                        "oLanguage": {
                                                "sSearch": "Buscar:"
                                          }
                                        });

                                        var buttons = new $.fn.dataTable.Buttons(mTable, {
                                                buttons: [{extend: "csv",
                                                title: "Casos_Diarios_Estado_Nacional_Defunciones",
                                                text: '<i class="fas fa-download"></i> Descargar CSV</a>',
                                                className: "btn btn-xs btn-primary",
                                                action: function ( e, dt, node, config ){ $("body").loadingModal("show"); setTimeout(function(){$.fn.dataTable.ext.buttons.csvHtml5.action.call(dt.button(this), e, dt, node, config);}, 500);}
                                        }]
                                }).container().appendTo($("#tButtons"));
                        $('body').loadingModal('hide');</script>"""

data = re.search(regex, string)
dataString = data.group(1)
dataArray = dataString.split("],[")
i=0
for value in dataArray:
    dataArray[i] = dataArray[i].split(',')
    i += 1
print(dataArray)

Примечания:

r '(? <= \ "Data \":) [[(. *)]] 'означает после' 'данные': 'захватывает все между' [[]] '. </p>

Скобки предназначены для формирования группы и извлечения ее без' [[]] '

Тогда это просто раскол

0 голосов
/ 22 апреля 2020

Самый простой способ - перебор, используя регулярное выражение. Прочитайте выражение в строку и запустите поиск по строке для получения ваших данных. Это должно работать:

import re
# assuming your raw data is in datastr
expr = '(\"data\"\:.+?\]\])'    # the '?' at the end is to ensure non-greedy matching
data = re.search(expr, datastr)
print(data[0])    # the extracted data
...