Я создал этот класс для своего проекта подсчета слов в MediaWiki.
// Copyright PHPExperts.pro
// License: Any user on Stackflow may use this code under the BSD License.
/**
* Web page datatype that holds all the various parts
* and info about a web page.
*/
class WebPage
{
public $url;
public $headers;
public $body;
public $text;
public function __construct($url)
{
// 1. Bail out now if the CURL extension is not loaded.
if (!in_array('curl', get_loaded_extensions()))
{
throw new Exception(WebPageException::MISSING_CURL);
}
// 2. Make sure the URL is valid.
self::ensureValidURL($url);
// 3. Store the URL.
$this->url = $url;
}
/**
* Determine if a URL is valid.
*
* @param string $url
* @returns true if the URL is a string and is a valid URL. False, otherwise.
*/
public static function isURLValid($url)
{
return (is_string($url) &&
filter_var($url, FILTER_VALIDATE_URL) !== false);
}
public static function ensureValidURL($url)
{
if (!self::isURLValid($url))
{
throw new WebPageException(WebPageException::INVALID_URL, array($url));
}
}
// captureHeader() donated by bendavis78@gmail.com,
// via http://us.php.net/curl_setopt_array
private function captureHeader($ch, $header)
{
$this->headers[] = $header;
return strlen($header);
}
public function fetchURL()
{
$ch = curl_init();
curl_setopt_array($ch, array(CURLOPT_URL => $this->url,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_HEADERFUNCTION => array($this, 'captureHeader'),
CURLOPT_TIMEOUT => 5,
)
);
$data = curl_exec($ch);
curl_close($ch);
if ($data === false || is_null($data) || $data == '')
{
throw new WebPageException(WebPageException::BLANK_URL, array($this->url));
}
// TODO: Need to handle HTTP error messages, such as 404 and 502.
$this->body = $data;
// Uses code from php@wizap.dom
$this->text = remove_HTML($data);
}
}
После запуска WebPage::captureHeader()
вы просто просматриваете $this->headers
, и если вы не найдете HTTP / 1.0 403 Forbidden, все готово.
Это полностью отвечает на ваш вопрос, поэтому я ожидаю кредита.