Как вы извлекаете данные JSON в строке в python? - PullRequest
0 голосов
/ 27 марта 2020

Ниже приведены данные, которые я удалил с веб-сайта и в теге; Я хочу извлечь данные Var DataSet.

Я преобразовал вывод в строку (используя python), но пытаюсь извлечь dataSet (который я считаю JSON данными), а затем хотел бы преобразовать это во фрейм данных.

Я новичок в python, поэтому я ценю помощь! Приведенный ниже вывод получен при выполнении печати (данных) в Sublime.

<script charset="utf-8" type="text/javascript">
var current = ending = 10;
var dataSet = [{"1":{"data":"2007-08"},"2":{"data":"2008-09"},"3":{"data":"2009-10"},"4":{"data":"2010-11"},"5":{"data":"2011-12"},"6":{"data":"2012-13"},"7":{"data":"2013-14"},"8":{"data":"2014-15"},"9":{"data":"2015-16"},"10":{"data":"2016-17"},"11":{"data":"2017-18"},"12":{"data":"2018-19"},"13":{"data":"2019-20"}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":0}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":0},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":0},"10":{"data":0},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":2},"10":{"data":2},"11":{"data":1},"12":{"data":0},"13":{"data":0}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":0}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":2},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":0},"9":{"data":0},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":0},"9":{"data":0},"10":{"data":0},"11":{"data":0},"12":{"data":0},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":0},"9":{"data":0},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":0},"8":{"data":0},"9":{"data":0},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":0},"2":{"data":0},"3":{"data":0},"4":{"data":0},"5":{"data":0},"6":{"data":0},"7":{"data":1},"8":{"data":0},"9":{"data":0},"10":{"data":0},"11":{"data":0},"12":{"data":0},"13":{"data":0}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":0},"6":{"data":1},"7":{"data":1},"8":{"data":0},"9":{"data":1},"10":{"data":1},"11":{"data":2},"12":{"data":1},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":1},"7":{"data":1},"8":{"data":0},"9":{"data":1},"10":{"data":1},"11":{"data":2},"12":{"data":0},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":2},"4":{"data":1},"5":{"data":1},"6":{"data":3},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":1},"11":{"data":1},"12":{"data":1},"13":{"data":1}},{"1":{"data":1},"2":{"data":1},"3":{"data":1},"4":{"data":1},"5":{"data":1},"6":{"data":1},"7":{"data":1},"8":{"data":1},"9":{"data":1},"10":{"data":0},"11":{"data":0},"12":{"data":0},"13":{"data":1}}];

$(function(){
    $('#next_years_button').addClass('next_years_button_disabled');

    $('#previous_years_button').click(function(){movePrevious()});
    $('#previous_years_button').keypress(function(event){if(event.which == 13 || event.which == 32){movePrevious()}});
    $('#next_years_button').click(function(){moveNext()});
    $('#next_years_button').keypress(function(event){if(event.which == 13 || event.which == 32){moveNext()}});

    function movePrevious(){
        current = Math.max(1, current-1);
        updateIt();
        $('#previous_years_button').removeClass('previous_years_button_disabled');
        $('#next_years_button').removeClass('next_years_button_disabled');
        if (current == 1)
            $('#previous_years_button').addClass('previous_years_button_disabled');
    }
    function moveNext(){
        current = Math.min(ending, current+1);
        updateIt();
        $('#previous_years_button').removeClass('previous_years_button_disabled');
        $('#next_years_button').removeClass('next_years_button_disabled');
        if (current == ending)
            $('#next_years_button').addClass('next_years_button_disabled');
    }

    updateIt();


    // default tooltips to hidden
    $(".ledger-tip").attr("aria-hidden","true");
    $(".ledger-tip").addClass("hidden");

    // display tooltip
    $(".ledger-trigger-tip").focus(function(){tooltipShow($(this).parent().children('.ledger-tip'));});
    $(".ledger-trigger-tip").mouseover(function(){tooltipShow($(this).parent().children('.ledger-tip'));});

    $(".ledger-trigger-tip").blur(function(){tooltipHide($(this).parent().children('.ledger-tip'));});
    $(".ledger-trigger-tip").mouseleave(function(){tooltipHide($(this).parent().children('.ledger-tip'));});

    $(".ledger-trigger-tip").keydown(function(ev){
        var tip = $(this).parent().children('.ledger-tip');
        if (ev.which == 27)  { // esc key closes tip
            tooltipHide(tip);
            ev.preventDefault();
            return false;
        }
    });
  //*
      $('.ledger-popover').on('click', function(e)     {e.preventDefault(); return true;}).popover({
        container: 'body'
    })
//*/
});

function textIt(num){
    if (isNaN(num))
        return num;
    if (num == 1)
        return '1<span class="hidden"> authorized course</span>';
    if (num > 1)
        return num + '<span class="hidden"> authorized courses</span>';
    return '--<span class="hidden"> no authorized courses</span>';
}

function updateIt() {
    $.each(dataSet, function(row,data){
        $('.apcl-row[data-row='+row+'] th[data-dyn=1], .apcl-row[data-row='+row+'] td[data-dyn=1]').html(textIt(data[current]['data']));
        $('.apcl-row[data-row='+row+'] th[data-dyn=2], .apcl-row[data-row='+row+'] td[data-dyn=2]').html(textIt(data[current+1]['data']));
        $('.apcl-row[data-row='+row+'] th[data-dyn=3], .apcl-row[data-row='+row+'] td[data-dyn=3]').html(textIt(data[current+2]['data']));
        $('.apcl-row[data-row='+row+'] th[data-dyn=4], .apcl-row[data-row='+row+'] td[data-dyn=4]').html(textIt(data[current+3]['data']));
    });
}

function tooltipShow(tip) {
    tip.attr("aria-hidden","false");
    tip.removeClass("hidden");
}

function tooltipHide(tip) {
    tip.attr("aria-hidden","true");
    tip.addClass("hidden");
}
</script>

1 Ответ

0 голосов
/ 27 марта 2020

IIU C: Вы можете использовать регулярные выражения для извлечения полезных данных, а затем проанализировать извлеченные данные с помощью встроенного пакета json в python.

Попробуйте:

import re
import json

mobj = re.search("var\s*dataSet\s*=\s*(\[.*?\])", data)
json_obj = json.loads(mobj.group(1))

print(json.dumps(json_obj, indent=4))

Вывод:

[
    {
        "1": {
            "data": "2007-08"
        },
        "2": {
            "data": "2008-09"
        },
        "3": {
            "data": "2009-10"
        },
        "4": {
            "data": "2010-11"
        },
        "5": {
            "data": "2011-12"
        },
        "6": {
            "data": "2012-13"
        },
        "7": {
            "data": "2013-14"
        },
        "8": {
            "data": "2014-15"
        },
        "9": {
            "data": "2015-16"
        },
        "10": {
            "data": "2016-17"
        },
        "11": {
            "data": "2017-18"
        },
        "12": {
            "data": "2018-19"
        },
        "13": {
            "data": "2019-20"
        }
    },
...
...
]
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...