Вы можете использовать библиотеку Request.js (https://www.npmjs.com/package/request)) для извлечения html-данных для ресторана, затем использовать Cheerio.js (https://www.npmjs.com/package/cheerio) для анализа полученного HTML.
Вот пример разбора отзывов.
NB. Помните, что при нарушении условий сайта вы нарушаете условия. Например, не перебирайте страницу каждые 100 мс, вы будете заблокированы!
"use strict";
const request = require('request');
const fs = require('fs');
const cheerio = require('cheerio');
const _ = require('lodash');
/* Change to whichever! */
const restaurantUrl = "https://www.tripadvisor.ie/Restaurant_Review-g60745-d1954989-Reviews-Italian_Express_Pizzeria-Boston_Massachusetts.html";
function getReviewDate(reviewRoot) {
return _.get(reviewRoot, 'children[1].attribs.title', null);
}
function getReviewTitle(reviewRoot) {
return _.get(reviewRoot.parent, 'children[1].children[0].children[0].children[0].data', null);
}
function getReviewDetails(htmlData) {
const $ = cheerio.load(htmlData);
var result = $('div.rating.reviewItemInline');
let reviews = [];
for(let resultIndex = 0; resultIndex < result.length; resultIndex++)
{
var review = { date: getReviewDate(result[resultIndex]), title: getReviewTitle(result[resultIndex])};
reviews.push(review);
}
return reviews;
}
function getReviewSummaries(htmlData) {
const $ = cheerio.load(htmlData);
var result = $('label.filterLabel');
var reviewObj = [];
for(var i = 0; i < 5; i++) {
result[i].children.forEach((c) => {
if (c.children) c.children.forEach ( (gr) => {
if (gr.type === 'text') {
if (reviewObj[i] === undefined) reviewObj[i] = {};
if (gr.parent && gr.parent.attribs && gr.parent.attribs.class === 'row_label') {
reviewObj[i].name = gr.data;
} else {
reviewObj[i].value = gr.data;
}
}
})
});
}
return reviewObj;
}
var options = {
url: restaurantUrl,
method: "get"
};
console.log('Requesting page..');
request(options, function (error, response, body) {
if (error) {
console.error('error:', error);
} else {
console.log('Response: StatusCode:', response && response.statusCode);
let reviews = getReviewSummaries(body);
console.log('Review summary: \r\n', reviews);
let details = getReviewDetails(body);
console.log("\r\n");
console.log('Review details: \r\n', details);
}
});
Вы увидите такой результат, даты просмотра теперь отображаются:
Response: StatusCode: 200
Review summary:
[ { name: 'Excellent', value: '554' },
{ name: 'Very good', value: '92' },
{ name: 'Average', value: '32' },
{ name: 'Poor', value: '9' },
{ name: 'Terrible', value: '6' } ]
Review details:
[ { date: '24 June 2018',
title: 'Whatever you choose, you can\'t go wrong' },
{ date: '23 June 2018', title: 'That\'s Amore!' },
{ date: '20 June 2018', title: 'Amazing pasta' },
{ date: '20 June 2018', title: 'Best Pizza' },
{ date: '18 June 2018', title: 'Italian food' },
{ date: '16 June 2018', title: 'Boston Dinner Adventure' },
{ date: '11 June 2018',
title: 'Delicious food - friendly service' },
{ date: '3 June 2018',
title: 'Hearty, Homemade, and Delicious!!!' },
{ date: '31 May 2018',
title: 'Amazing dinner - YOU WON\'T LEAVE HUNGRY!!!' },
{ date: '31 May 2018', title: 'Homemade ' } ]