Я не знаю, как сделать это с DomCrawler Symfony, но PHP имеет приличные встроенные инструменты для анализа HTML, а именно "DOMDocument" и "DOMXPath", и в DOMDocument это будет выглядеть так:
$domd = @DOMDocument::loadHTML($html);
$xp = new DOMXPath($domd);
$tags = array();
$artists = array();
foreach ($xp->query("//a[contains(@href,'/tag/')]/span[1]") as $tag) {
$tags[trim($tag->textContent)] = merge_relative_absolute_urls('https://hentaifox.com/gallery/58091/', $tag->parentNode->getAttribute("href"));
}
foreach ($xp->query("//a[contains(@href,'/artist/')]/span[1]") as $artist) {
$artists[trim($artist->textContent)] = merge_relative_absolute_urls('https://hentaifox.com/gallery/58091/', $artist->parentNode->getAttribute("href"));
}
print_r([
'artists' => $artists,
'tags' => $tags
]);
function merge_relative_absolute_urls(string $base_url, string $relative_url): string
{
// strip ?whatever in base url (the browser does this too, i think)
$pos = strpos($base_url, "?");
if (false !== $pos) {
$base_url = substr($base_url, 0, $pos);
}
// strip file.php from /file.php if present
$pos = strrpos($base_url, "/");
if (false !== $pos) {
$base_url = substr($base_url, 0, $pos + 1);
}
if (0 === stripos($relative_url, "http://") || 0 === stripos($relative_url, "https://") || 0 === strpos($relative_url, "//") || 0 === strpos($relative_url, "://")) {
return $relative_url;
}
if (substr($relative_url, 0, 1) === "/") {
$info = parse_url($base_url);
$url = ($info['scheme'] ?? "") . "://" . $info['host'];
if (isset($info['port'])) {
$url .= ":" . $info['port'];
}
$url .= $relative_url;
return $url;
}
$url = $base_url . $relative_url;
return $url;
}
выход:
$ php wtf3.php
Array
(
[artists] => Array
(
[Sahara-wataru] => https://hentaifox.com/artist/sahara-wataru/
)
[tags] => Array
(
[Big-breasts] => https://hentaifox.com/tag/big-breasts/
[Sole-male] => https://hentaifox.com/tag/sole-male/
[Nakadashi] => https://hentaifox.com/tag/nakadashi/
[Blowjob] => https://hentaifox.com/tag/blowjob/
[Full-color] => https://hentaifox.com/tag/full-color/
[Big-ass] => https://hentaifox.com/tag/big-ass/
[Blowjob-face] => https://hentaifox.com/tag/blowjob-face/
)
)