Просто какой-то нубский вопрос от меня: Как получить эти данные (в зеленой рамке) из Это изображение
(извините, я пока не могу опубликовать изображение на stackoverflow, потому что моя репутациямне еще меньше 10. Это мой первый вопрос о SO.)
Я использую laravel 5.7 и пакеты:
Код
Приложение / MyScraper / Bukalapak.php
<?php
namespace App\MyScraper;
use Clue\React\Buzz\Browser;
use function React\Promise\all;
use React\Promise\PromiseInterface;
use Psr\Http\Message\ResponseInterface;
use Symfony\Component\DomCrawler\Crawler;
use App\MyScraper\Objects\BukalapakObject;
class Bukalapak
{
private $browser;
public function __construct(Browser $browser)
{
$this->browser = $browser;
}
public function scrape(string ...$urls): PromiseInterface
{
$promises = array_map(function ($url) {
return $this->extractFromUrl($url);
}, $urls);
return all($promises);
}
private function extract(string $responseBody) : BukalapakObject
{
$crawler = new Crawler($responseBody);
$name = $crawler->filter('.c-product-detail__name')->text();
$price = $crawler->filter('.c-product-detail-price')->attr('data-reduced-price');
$image = $crawler->filter('.c-product-image-gallery__main img')->attr('src');
$stock = preg_replace('/[^0-9]+/', '', $crawler->filter('.qa-pd-stock')->text());
$weight = preg_replace('/[^0-9]+/', '', $crawler->filter('.qa-pd-weight-value')->text());
$condition = $crawler->filter('.qa-pd-condition-value > span.c-label')->text();
$description = trim($crawler->filter('.qa-pd-description')->html(), "\n");
$assurance = 'Tidak';
$courier = 'jner|j&tr';
return new BukalapakObject($name, $price, $image, $stock, $weight, $condition, $assurance, $courier);
}
private function extractFromUrl(string $url) : PromiseInterface
{
return $this->browser->get($url)->then(function (ResponseInterface $response) {
return $this->extract((string) $response->getBody());
});
}
}
Приложение / MyScraper /Помощники / BukalapakHelper.php
<?php
namespace App\MyScraper\Helpers;
use Goutte;
class BukalapakHelper
{
public static function fetchAllProductsLink(string $shopUrl)
{
$goutte = Goutte::request('GET', $shopUrl);
$data = $goutte->filter('.product-display')->each(function ($node) {
return [
'https://www.bukalapak.com'
.explode('?', $node->filter('.js-tracker-product-link')->attr('href'))[0]
];
});
unset($goutte);
return array_column($data, 0);
}
public static function writeToCSV(string $fileName, array $data = [])
{
$handle = fopen($fileName, 'w');
foreach ($data as $value) {
fputcsv($handle, [key($data), $value]);
}
fclose($handle);
}
}
Приложение / MyScraper / Объекты / BukalapakObject.php
<?php
namespace App\MyScraper\Objects;
final class BukalapakObject
{
public $name;
public $price;
public $image;
public $stock;
public $weight;
public $condition;
public $assurance;
public $courier;
public function __construct(
string $name,
string $price,
string $image,
string $stock,
string $weight,
string $condition,
string $assurance,
string $courier
)
{
$this->name = $name;
$this->price = $price;
$this->image = $image;
$this->stock = $stock;
$this->weight = $weight;
$this->condition = $condition;
$this->assurance = $assurance;
$this->courier = $courier;
}
}
Приложение / Http / Контроллеры /TestController.php
<?php
namespace App\Http\Controllers;
use Illuminate\Http\Request;
use Clue\React\Buzz\Browser;
use App\MyScraper\Bukalapak;
use App\MyScraper\Helpers\BukalapakHelper;
class TestController extends Controller
{
public function index()
{
$shop = 'https://www.bukalapak.com/u/attiqahijab';
$urls = BukalapakHelper::fetchAllProductsLink($shop);
$loop = \React\EventLoop\Factory::create();
$browser = new Browser($loop);
$scraper = new Bukalapak($browser);
$data = $scraper->scrape(...$urls)->then(function($result) {
return $result;
});
$loop->run();
print_r($data);
return view('backend.test');
}
}
Ожидаемые результаты:
Только данные в зеленой рамке