Я пытаюсь очистить раздел видео на канале YouTube. Пока что мне удалось получить заголовок, просмотры, дату и краткую информацию о некоторых видео. У меня есть пара вопросов:
1- Я думаю, кукловод использует браузер хрома каждый раз, когда я запускаю скрипт. Есть ли способ отправки HTTP-запросов без использования браузера? (Я знаю, что Cheerio делает это, но по какой-то причине не смог выбрать элементы, которые я хочу.) 2- Могу ли я заставить кукловода прокрутить страницу немного перед тем, как делать это, чтобы страница загружала больше видео, чтобы я мог ее почистить. Я могу только очистить первые 30 видео сейчас. 3- По какой-то причине я не могу получить значения атрибута sr c после 12-го видео. Как я могу это исправить? 4 - Я получаю символы на выходе, когда я переключаю язык с Engli sh на другие языки. Как я могу избавиться от этих персонажей? 5- Я знаю, что node.js для серверных приложений, но возможно ли превратить этот скрипт в расширение для браузера или хотя бы создать страницу, похожую на страницу подписки на YouTube? Вот скрипт:
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto('https://www.youtube.com/user/PewDiePie/videos');
var links =[]
for (var i=1; i<=30; i++){
//grab href and src(thumbnail) of each video
var href = await page.$$eval("ytd-grid-video-renderer.style-scope:nth-child("+i+") > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > h3:nth-child(1) > a:nth-child(2)", el => el.map(x => x.getAttribute("href")));
//var src = await page.$$eval("ytd-grid-video-renderer.style-scope:nth-child("+i+") > div:nth-child(1) > ytd-thumbnail:nth-child(1) > a:nth-child(1) > yt-img-shadow:nth-child(1) > img:nth-child(1)", el => el.map(x => x.getAttribute("src")));
var src2 = await page.evaluate('document.querySelector("ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > ytd-thumbnail:nth-child(1) > a:nth-child(1) > yt-img-shadow:nth-child(1) > img:nth-child(1)").getAttribute("src")');
href="https://www.youtube.com"+href;
links.push({href,src2});
}
const result = await page.evaluate(() => {
let viddata = []; // Create an empty array that will store our data
let channelName = document.querySelector('ytd-channel-name.ytd-c4-tabbed-header-renderer > div:nth-child(1) > div:nth-child(1) > yt-formatted-string:nth-child(1)').innerHTML;
var numvids =document.querySelector('div.ytd-grid-renderer:nth-child(2)').childElementCount;
console.log("THERE ARE "+numvids+" VIDEOS");
for (var i=1; i<numvids; i++){ // Loop through each video
var title = document.querySelector('ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > h3:nth-child(1) > a:nth-child(2)').innerHTML;
var views = document.querySelector('ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > span:nth-child(1)').innerHTML;
var date = document.querySelector('ytd-grid-video-renderer.style-scope:nth-child('+i+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > span:nth-child(2)').innerHTML;
viddata.push({title,views,date,channelName});
}
return viddata; // Return our data array
});
//merge href and src with other data
for (var i=0; i<29; i++){
result[i].links=links[i];
}
browser.close();
return result; // Return the data
};
scrape().then((value) => {
console.log(value);
});
Вывод:
[
{
title: 'He payed $150 000 to look like BTS JIMIN',
views: '3,1 Mn görüntüleme',
date: '17 saat önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=Wv1E7AmzUqI',
src2: 'https://i.ytimg.com/vi/Wv1E7AmzUqI/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLB7XV9GtFJFfXFJVX5EmqWisw_j-A'
}
},
{
title: '5/5 Rated Pewdiepie Fan Game',
views: '3,5 Mn görüntüleme',
date: '1 gün önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=pz0hWlevaJ8',
src2: 'https://i.ytimg.com/vi/pz0hWlevaJ8/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLApOpcA7XV5ds6QZmfqwz7fuT9UdA'
}
},
{
title: 'Designs that will make you MAD!',
views: '5,2 Mn görüntüleme',
date: '2 gün önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=HcOw8mxVdvQ',
src2: 'https://i.ytimg.com/vi/HcOw8mxVdvQ/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLA-Kzdbw4TG-8gYUWULxIYxX-LZ2A'
}
},
{
title: 'You Laugh You DONATE',
views: '5,2 Mn görüntüleme',
date: '3 gün önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=C8r3GhpWJEI',
src2: 'https://i.ytimg.com/vi/C8r3GhpWJEI/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAv5NQ77j8woabvUxnOAoY5Lx8VmA'
}
},
{
title: 'NINJA is drafted for WW3...',
views: '6,2 Mn görüntüleme',
date: '4 gün önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=5WF0D4piAsA',
src2: 'https://i.ytimg.com/vi/5WF0D4piAsA/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLCQi6Q5XYFRi0qRd5Ge7ShMnDZXMg'
}
},
{
title: '2020 Memes are gonna be EPIC',
views: '5,9 Mn görüntüleme',
date: '5 gün önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=4jlDCS-z7TI',
src2: 'https://i.ytimg.com/vi/4jlDCS-z7TI/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLDt5HHFhHYwgRbFpfCD6ZCeL-AYXg'
}
},
{
title: 'I FAILED the EASIEST Test',
views: '5,3 Mn görüntüleme',
date: '6 gün önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=olx4XJybNhM',
src2: 'https://i.ytimg.com/vi/olx4XJybNhM/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLBJIy9j9cEhK95hRX5nxp2DFqUFWA'
}
},
{
title: 'Pewdiepie NETWORTH revealed! ?PEW NEWS ?',
views: '4,8 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=8gvxaYH6sO0',
src2: 'https://i.ytimg.com/vi/8gvxaYH6sO0/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAsSXfC88MFfZvVkphMN1iOdxY8mw'
}
},
{
title: 'Ace of Seafood - The rise of the Anthropods',
views: '3,6 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=ZddvddGEQg0',
src2: 'https://i.ytimg.com/vi/ZddvddGEQg0/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAFnhCQoJURNUwudBbsHQ1DQTlfzA'
}
},
{
title: 'Happy Wheels is Cancelled',
views: '5,9 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=dPjJJxUTr4Y',
src2: 'https://i.ytimg.com/vi/dPjJJxUTr4Y/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLAjOZgpw30Az-_puHR2tp5AdPUJLw'
}
},
{
title: 'DIWHY top All Reddit - 5 Minute Crafts - Needs to be STOPPED! #59 REDDIT REVIEW',
views: '6,9 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=xdjj5sAOfBg',
src2: 'https://i.ytimg.com/vi/xdjj5sAOfBg/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLCQFDJnig9HMnqa2fOTbTInpk-fXQ'
}
},
{
title: 'Decade of Pewdiepie, photos from my childhood',
views: '4,4 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: {
href: 'https://www.youtube.com/watch?v=AAODp5upEF0',
src2: 'https://i.ytimg.com/vi/AAODp5upEF0/hqdefault.jpg?sqp=-oaymwEZCNACELwBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLC5d0P0Jpz2z0IadCv4rUI_LI6bEQ'
}
},
{
title: 'Addressing the Reddit Controversy - LWIAY #00104',
views: '5,4 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=pSi5IzMs13o', src2: null }
},
{
title: "YouTube Rewind 2019, but it's actually good",
views: '14 Mn görüntüleme',
date: '1 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=diT6jc9flkc', src2: null }
},
{
title: 'Answering Very Personal Questions',
views: '6,3 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=IcJhmhA8tHE', src2: null }
},
{
title: '#59 [REDDIT REVIEW]',
views: '4,9 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=vhl9wWLv2Yo', src2: null }
},
{
title: 'Pigeon Simulator',
views: '3,3 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=J5P-7qGkomk', src2: null }
},
{
title: 'Terraria - Part 6 - My wedding 2.0',
views: '3,7 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=9zH_4RPaCvI', src2: null }
},
{
title: 'Jump King - i HATE this game',
views: '3,8 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=Sk6_yurXCJg', src2: null }
},
{
title: 'You LAUGH You LAUGH Challenge (Impossible)(NotEasy) YLYL #0068',
views: '5,5 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=XEMEYM43Ihk', src2: null }
},
{
title: 'Misery STALKER: Call of Pripyat - Mod - NOT playing this again....................................',
views: '4,4 Mn görüntüleme',
date: '2 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=ELF-koTSnUM', src2: null }
},
{
title: 'Unboxing 100 MIL Award 2.0 - LWIAY #00103',
views: '7,4 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=zbgxk5OvpcM', src2: null }
},
{
title: 'Video flagged for: False Information [MEME REVIEW] ? ?#73',
views: '6,4 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=K2i-fPWWy4A', src2: null }
},
{
title: 'Terraria - Part 5 - I beat the HARDEST Boss on 1st TRY! world record',
views: '4,8 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=8kI-CtnWez4', src2: null }
},
{
title: 'World of Tanks - Sweden FINALLY invades the WORLD!',
views: '3,9 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=LP0MSIfrhHg', src2: null }
},
{
title: "Breaking News: 'Pewdiepie Has QUIT YouTube' ?PEW NEWS ?",
views: '6,3 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=RoFSqtrivFs', src2: null }
},
{
title: 'I hate twitter',
views: '5,2 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=1n_cPIhag28', src2: null }
},
{
title: 'I will get in trouble for this (not good) /r/cursedcomments #58 [REDDIT REVIEW]',
views: '5,2 Mn görüntüleme',
date: '3 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=M6nnYaUjeqw', src2: null }
},
{
title: "YouTube's New Update Has A BIG FLAW! ?PEW NEWS ?",
views: '5,4 Mn görüntüleme',
date: '4 hafta önce',
channelName: 'PewDiePie',
links: { href: 'https://www.youtube.com/watch?v=t9-4eMdBejk', src2: null }
}
]