Соскоб YouTube с кукловодом и node.js - PullRequest
0 голосов
/ 12 января 2020

Я пытаюсь очистить раздел видео на канале YouTube. Пока что мне удалось получить заголовок, просмотры, дату и краткую информацию о некоторых видео. У меня есть пара вопросов:

1- Я думаю, кукловод использует браузер хрома каждый раз, когда я запускаю скрипт. Есть ли способ отправки HTTP-запросов без использования браузера? (Я знаю, что Cheerio делает это, но по какой-то причине не смог выбрать элементы, которые я хочу.) 2- Могу ли я заставить кукловода прокрутить страницу немного перед тем, как делать это, чтобы страница загружала больше видео, чтобы я мог ее почистить. Я могу только очистить первые 30 видео сейчас. 3- По какой-то причине я не могу получить значения атрибута sr c после 12-го видео. Как я могу это исправить? 4 - Я получаю символы на выходе, когда я переключаю язык с Engli sh на другие языки. Как я могу избавиться от этих персонажей? 5- Я знаю, что node.js для серверных приложений, но возможно ли превратить этот скрипт в расширение для браузера или хотя бы создать страницу, похожую на страницу подписки на YouTube? Вот скрипт:

const puppeteer = require('puppeteer');

let scrape = async () => {
    const browser = await puppeteer.launch({headless: true});
    const page = await browser.newPage();

    await page.goto('https://www.youtube.com/user/PewDiePie/videos');
    var links =[]
    for (var i=1; i<=30; i++){ 
        //grab href and src(thumbnail) of each video

        var href = await page.$$eval("ytd-grid-video-renderer.style-scope:nth-child("+i+") > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > h3:nth-child(1) > a:nth-child(2)", el => el.map(x => x.getAttribute("href")));
        //var src = await page.$$eval("ytd-grid-video-renderer.style-scope:nth-child("+i+") > div:nth-child(1) > ytd-thumbnail:nth-child(1) > a:nth-child(1) > yt-img-shadow:nth-child(1) > img:nth-child(1)", el => el.map(x => x.getAttribute("src")));
        var src2 = await page.evaluate('document.querySelector("ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > ytd-thumbnail:nth-child(1) > a:nth-child(1) > yt-img-shadow:nth-child(1) > img:nth-child(1)").getAttribute("src")');


        href="https://www.youtube.com"+href;


        links.push({href,src2});
    }

    const result = await page.evaluate(() => {
        let viddata = []; // Create an empty array that will store our data
        let channelName = document.querySelector('ytd-channel-name.ytd-c4-tabbed-header-renderer > div:nth-child(1) > div:nth-child(1) > yt-formatted-string:nth-child(1)').innerHTML;

        var numvids =document.querySelector('div.ytd-grid-renderer:nth-child(2)').childElementCount;
        console.log("THERE ARE "+numvids+" VIDEOS");
        for (var i=1; i<numvids; i++){ // Loop through each video

            var title = document.querySelector('ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > h3:nth-child(1) > a:nth-child(2)').innerHTML; 
            var views = document.querySelector('ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > span:nth-child(1)').innerHTML;
            var date = document.querySelector('ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > span:nth-child(2)').innerHTML;

            viddata.push({title,views,date,channelName});  
        }

        return viddata; // Return our data array
    });

    //merge href and src with other data
    for (var i=0; i<29; i++){
        result[i].links=links[i];
    }


    browser.close();
    return result; // Return the data
};


scrape().then((value) => {
    console.log(value);  
});

Вывод:

[
  {
    title: 'He payed $150 000 to look like BTS JIMIN',
    views: '3,1&nbsp;Mn görüntüleme',
    date: '17 saat önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=Wv1E7AmzUqI',
      src2: 'https://i.ytimg.com/vi/Wv1E7AmzUqI/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLB7XV9GtFJFfXFJVX5EmqWisw_j-A'
    }
  },
  {
    title: '5/5 Rated Pewdiepie Fan Game',
    views: '3,5&nbsp;Mn görüntüleme',
    date: '1 gün önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=pz0hWlevaJ8',
      src2: 'https://i.ytimg.com/vi/pz0hWlevaJ8/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLApOpcA7XV5ds6QZmfqwz7fuT9UdA'
    }
  },
  {
    title: 'Designs that will make you MAD!',
    views: '5,2&nbsp;Mn görüntüleme',
    date: '2 gün önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=HcOw8mxVdvQ',
      src2: 'https://i.ytimg.com/vi/HcOw8mxVdvQ/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLA-Kzdbw4TG-8gYUWULxIYxX-LZ2A'
    }
  },
  {
    title: 'You Laugh You DONATE',
    views: '5,2&nbsp;Mn görüntüleme',
    date: '3 gün önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=C8r3GhpWJEI',
      src2: 'https://i.ytimg.com/vi/C8r3GhpWJEI/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAv5NQ77j8woabvUxnOAoY5Lx8VmA'
    }
  },
  {
    title: 'NINJA is drafted for WW3...',
    views: '6,2&nbsp;Mn görüntüleme',
    date: '4 gün önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=5WF0D4piAsA',
      src2: 'https://i.ytimg.com/vi/5WF0D4piAsA/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLCQi6Q5XYFRi0qRd5Ge7ShMnDZXMg'
    }
  },
  {
    title: '2020 Memes are gonna be EPIC',
    views: '5,9&nbsp;Mn görüntüleme',
    date: '5 gün önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=4jlDCS-z7TI',
      src2: 'https://i.ytimg.com/vi/4jlDCS-z7TI/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLDt5HHFhHYwgRbFpfCD6ZCeL-AYXg'
    }
  },
  {
    title: 'I FAILED the EASIEST Test',
    views: '5,3&nbsp;Mn görüntüleme',
    date: '6 gün önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=olx4XJybNhM',
      src2: 'https://i.ytimg.com/vi/olx4XJybNhM/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLBJIy9j9cEhK95hRX5nxp2DFqUFWA'
    }
  },
  {
    title: 'Pewdiepie NETWORTH revealed! ?PEW NEWS ?',
    views: '4,8&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=8gvxaYH6sO0',
      src2: 'https://i.ytimg.com/vi/8gvxaYH6sO0/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAsSXfC88MFfZvVkphMN1iOdxY8mw'
    }
  },
  {
    title: 'Ace of Seafood - The rise of the Anthropods',
    views: '3,6&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=ZddvddGEQg0',
      src2: 'https://i.ytimg.com/vi/ZddvddGEQg0/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAFnhCQoJURNUwudBbsHQ1DQTlfzA'
    }
  },
  {
    title: 'Happy Wheels is Cancelled',
    views: '5,9&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=dPjJJxUTr4Y',
      src2: 'https://i.ytimg.com/vi/dPjJJxUTr4Y/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAjOZgpw30Az-_puHR2tp5AdPUJLw'
    }
  },
  {
    title: 'DIWHY top All Reddit - 5 Minute Crafts - Needs to be STOPPED! #59 REDDIT REVIEW',
    views: '6,9&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=xdjj5sAOfBg',
      src2: 'https://i.ytimg.com/vi/xdjj5sAOfBg/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLCQFDJnig9HMnqa2fOTbTInpk-fXQ'
    }
  },
  {
    title: 'Decade of Pewdiepie, photos from my childhood',
    views: '4,4&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: {
      href: 'https://www.youtube.com/watch?v=AAODp5upEF0',
      src2: 'https://i.ytimg.com/vi/AAODp5upEF0/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLC5d0P0Jpz2z0IadCv4rUI_LI6bEQ'
    }
  },
  {
    title: 'Addressing the Reddit Controversy - LWIAY #00104',
    views: '5,4&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=pSi5IzMs13o', src2: null }
  },
  {
    title: "YouTube Rewind 2019, but it's actually good",
    views: '14&nbsp;Mn görüntüleme',
    date: '1 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=diT6jc9flkc', src2: null }
  },
  {
    title: 'Answering Very Personal Questions',
    views: '6,3&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=IcJhmhA8tHE', src2: null }
  },
  {
    title: '#59 [REDDIT REVIEW]',
    views: '4,9&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=vhl9wWLv2Yo', src2: null }
  },
  {
    title: 'Pigeon Simulator',
    views: '3,3&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=J5P-7qGkomk', src2: null }
  },
  {
    title: 'Terraria - Part 6  - My wedding 2.0',
    views: '3,7&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=9zH_4RPaCvI', src2: null }
  },
  {
    title: 'Jump King - i HATE this game',
    views: '3,8&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=Sk6_yurXCJg', src2: null }
  },
  {
    title: 'You LAUGH You LAUGH Challenge (Impossible)(NotEasy)  YLYL #0068',
    views: '5,5&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=XEMEYM43Ihk', src2: null }
  },
  {
    title: 'Misery STALKER: Call of Pripyat - Mod - NOT playing this again....................................',
    views: '4,4&nbsp;Mn görüntüleme',
    date: '2 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=ELF-koTSnUM', src2: null }
  },
  {
    title: 'Unboxing 100 MIL Award 2.0 - LWIAY #00103',
    views: '7,4&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=zbgxk5OvpcM', src2: null }
  },
  {
    title: 'Video flagged for: False Information  [MEME REVIEW] ? ?#73',
    views: '6,4&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=K2i-fPWWy4A', src2: null }
  },
  {
    title: 'Terraria - Part 5 - I beat the HARDEST Boss on 1st TRY! world record',
    views: '4,8&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=8kI-CtnWez4', src2: null }
  },
  {
    title: 'World of Tanks - Sweden FINALLY invades the WORLD!',
    views: '3,9&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=LP0MSIfrhHg', src2: null }
  },
  {
    title: "Breaking News: 'Pewdiepie Has QUIT YouTube' ?PEW NEWS ?",
    views: '6,3&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=RoFSqtrivFs', src2: null }
  },
  {
    title: 'I hate twitter',
    views: '5,2&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=1n_cPIhag28', src2: null }
  },
  {
    title: 'I will get in trouble for this (not good) /r/cursedcomments #58 [REDDIT REVIEW]',
    views: '5,2&nbsp;Mn görüntüleme',
    date: '3 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=M6nnYaUjeqw', src2: null }
  },
  {
    title: "YouTube's New Update Has A BIG FLAW! ?PEW NEWS ?",
    views: '5,4&nbsp;Mn görüntüleme',
    date: '4 hafta önce',
    channelName: 'PewDiePie',
    links: { href: 'https://www.youtube.com/watch?v=t9-4eMdBejk', src2: null }
  }
]
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...