Как мне извлечь этот конкретный элемент с этой веб-страницы, используя ax ios, reactjs и cheerio - PullRequest
1 голос
/ 14 июля 2020

Я пытаюсь извлечь текущие цены на товары из группы CME с помощью веб-скраппинга с помощью ax ios и cheerio. Мне не удается найти правильный путь для cheerio, чтобы получить каждый элемент в таблице, которую я отбрасываю. Я пытаюсь прямо сейчас просто получить месяц JLY20 из тега span для каждой строки.

Ссылка на фактическую веб-страницу: https://www.cmegroup.com/trading/metals/base/copper_quotes_settlements_futures.html

Вот что я прямо сейчас:

Сервер. js

  componentDidMount() {
    axios.get(`https://www.cmegroup.com/trading/metals/base/copper_quotes_settlements_futures.html`)
      .then(response => {
        if(response.status === 200)
          {
            const html = response.data;
            const $ = cheerio.load(html);
            let data = [];
            $('table.cmeTable').each((i, elem) => {
                console.log($(elem).find('span.noWrap').text())
                data.push({
                  Month: $(elem).find('th.cmeFixedColumn').text()
                  // title: $(elem).find('h2.entry-title').text(),
                  // excerpt: $(elem).find('p.hide_xxs').text().trim(),
                  // link: $(elem).find('h2.entry-title a').attr('href')
                })

            });
            console.log(data);
            // fs.writeFile('devtoList.json',
            // JSON.stringify(devtoListTrimmed, null, 4),
            // (err)=> console.log('File successfully written!'))
          }
        }, (error) => console.log('err') );
  }

Вот исходный код целевой ссылки:

<div class="cmeTableBlockWrapper cmeContentSection cmeContentGroup" style=""><div class="cmeTableResponsiveScrollableWrapper">
<table id="settlementsFuturesProductTable" class="cmeTable" border="0" cellpadding="2" cellspacing="0" summary="Settlements Table">
    <thead>
        <tr>
            <th scope="col" class="invisibleElement cmeFixedColumn" style="height: 33px; width: 120px; min-width: 120px;">Month</th>
            <th scope="col">Open</th>
            <th scope="col">High</th>
            <th scope="col">Low</th>
            <th scope="col">Last</th>
            <th scope="col">Change</th>
            <th scope="col">Settle</th>
            <th scope="col">Estimated Volume</th>
            <th scope="col">Prior Day Open Interest</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JLY 20</span></th>
            <td>2.8990</td>
            <td>2.9210</td>
            <td>2.8945</td>
            <td>2.9155</td>
            <td><span>-.0260</span></td>
            <td>2.9160</td>
            <td class="cmeTableRight">818</td>
            <td class="cmeTableRight">3,140</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">AUG 20</span></th>
            <td>2.9105</td>
            <td>2.9330</td>
            <td>2.8980</td>
            <td>2.9270</td>
            <td><span>-.0245</span></td>
            <td>2.9250</td>
            <td class="cmeTableRight">191</td>
            <td class="cmeTableRight">2,994</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">SEP 20</span></th>
            <td>2.9160</td>
            <td>2.9460</td>
            <td>2.8980</td>
            <td>2.9300</td>
            <td><span>-.0225</span></td>
            <td>2.9325</td>
            <td class="cmeTableRight">80,068</td>
            <td class="cmeTableRight">115,684</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">OCT 20</span></th>
            <td>2.9350</td>
            <td>2.9400</td>
            <td>2.9280</td>
            <td>2.9400</td>
            <td><span>-.0220</span></td>
            <td>2.9405</td>
            <td class="cmeTableRight">10</td>
            <td class="cmeTableRight">2,012</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">NOV 20</span></th>
            <td>2.9375</td>
            <td>2.9380</td>
            <td>2.9330</td>
            <td>2.9330</td>
            <td><span>-.0215</span></td>
            <td>2.9470</td>
            <td class="cmeTableRight">10</td>
            <td class="cmeTableRight">2,123</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">DEC 20</span></th>
            <td>2.9340</td>
            <td>2.9630</td>
            <td>2.9150</td>
            <td>2.9480B</td>
            <td><span>-.0205</span></td>
            <td>2.9505</td>
            <td class="cmeTableRight">12,155</td>
            <td class="cmeTableRight">52,370</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JAN 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9465A</td>
            <td>2.9465A</td>
            <td><span>-.0195</span></td>
            <td>2.9560</td>
            <td class="cmeTableRight">4</td>
            <td class="cmeTableRight">592</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">FEB 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9525A</td>
            <td>2.9525A</td>
            <td><span>-.0195</span></td>
            <td>2.9590</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">361</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAR 21</span></th>
            <td>2.9535</td>
            <td>2.9720</td>
            <td>2.9300</td>
            <td>2.9590</td>
            <td><span>-.0185</span></td>
            <td>2.9615</td>
            <td class="cmeTableRight">8,055</td>
            <td class="cmeTableRight">31,345</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">APR 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9575A</td>
            <td>2.9575A</td>
            <td><span>-.0175</span></td>
            <td>2.9650</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">181</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAY 21</span></th>
            <td>2.9665</td>
            <td>2.9720</td>
            <td>2.9480</td>
            <td>2.9655B</td>
            <td><span>-.0165</span></td>
            <td>2.9655</td>
            <td class="cmeTableRight">1,619</td>
            <td class="cmeTableRight">6,208</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JUN 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9610A</td>
            <td>2.9610A</td>
            <td><span>-.0155</span></td>
            <td>2.9685</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">160</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JLY 21</span></th>
            <td>2.9585</td>
            <td>2.9755B</td>
            <td>2.9540</td>
            <td>2.9670B</td>
            <td><span>-.0155</span></td>
            <td>2.9690</td>
            <td class="cmeTableRight">471</td>
            <td class="cmeTableRight">934</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">AUG 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9640A</td>
            <td>2.9640A</td>
            <td><span>-.0160</span></td>
            <td>2.9715</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">114</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">SEP 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9635A</td>
            <td>2.9635A</td>
            <td><span>-.0155</span></td>
            <td>2.9720</td>
            <td class="cmeTableRight">4</td>
            <td class="cmeTableRight">437</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">OCT 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9685A</td>
            <td>2.9685A</td>
            <td><span>-.0160</span></td>
            <td>2.9755</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">79</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">NOV 21</span></th>
            <td>-</td>
            <td>-</td>
            <td>2.9720A</td>
            <td>2.9720A</td>
            <td><span>-.0160</span></td>
            <td>2.9760</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">33</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">DEC 21</span></th>
            <td>2.9795</td>
            <td>2.9795</td>
            <td>2.9520A</td>
            <td>2.9680</td>
            <td><span>-.0155</span></td>
            <td>2.9765</td>
            <td class="cmeTableRight">65</td>
            <td class="cmeTableRight">1,065</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JAN 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0155</span></td>
            <td>2.9795</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">4</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">FEB 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>2.9820</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAR 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0135</span></td>
            <td>2.9830</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">136</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">APR 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0155</span></td>
            <td>2.9910</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAY 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>2.9905</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">5</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JUN 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>2.9930</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JLY 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>2.9935</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">20</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">SEP 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>2.9995</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">DEC 22</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0030</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">25</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAR 23</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0070</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAY 23</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0095</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JLY 23</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0125</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">SEP 23</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0150</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">DEC 23</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0440</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAR 24</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0445</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAY 24</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0450</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">JLY 24</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0455</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">SEP 24</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0460</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">DEC 24</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0465</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAR 25</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0470</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">MAY 25</span></th>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td>-</td>
            <td><span>-.0145</span></td>
            <td>3.0475</td>
            <td class="cmeTableRight">0</td>
            <td class="cmeTableRight">0</td>
        </tr>
        <tr>
            <th scope="row" class="invisibleElement cmeFixedColumn" style="height: 41px; width: 120px;"><span class="noWrap">Total</span></th>
            <td></td>
            <td></td>
            <td></td>
            <td></td>
            <td><span></span></td>
            <td></td>
            <td class="cmeTableRight">103,470</td>
            <td class="cmeTableRight">220,022</td>
        </tr>
    </tbody>
</table>

Благодарю за любую помощь. Спасибо.

1 Ответ

1 голос
/ 14 июля 2020

При просмотре сайта, на который вы ссылаетесь: причина, по которой вы не можете выбрать контент, заключается в том, что таблица данных загружается асинхронно; это означает, что ваш скрипт выполняется до рендеринга HTML.

Если вы откроете инструменты разработчика для сайта, который вы связали с , вы увидите, что существует асинхронный вызов этой конечной точки .

Лучшей стратегией было бы собрать данные из URL, на который я ссылался выше.

Изменить: при дальнейшем изучении исходного кода вы можете получить нужные данные для создания асинхронного c URL из window.cmeComponents

...