<?php
// ------------------------------------------------------------------------- //
// Contrôle la validité d'une URL et récupére le status //
// ------------------------------------------------------------------------- //
// Auteur: Pierre Crevoisier <pierre.crevoisier@epfl.ch> //
// Web: //
// ------------------------------------------------------------------------- //
?>
<?php
// urlCode.inc.php
// ---------------
// Il est nécessaire de joindre le code suivant dans un fichier et le nommer
// "urlCodes.inc.php":
$codes = array();
$codes["200"] = "Client Request Successful: OK";
$codes["201"] = "Client Request Successful: Created";
$codes["202"] = "Client Request Successful: Accepted";
$codes["203"] = "Client Request Successful: Non-Authoritative Information";
$codes["204"] = "Client Request Successful: No Content";
$codes["205"] = "Client Request Successful: Reset Content";
$codes["206"] = "Client Request Successful: Partial Content";
$codes["300"] = "Redirection: Multiple Choice";
$codes["301"] = "Redirection: Moved Permanently";
$codes["302"] = "Redirection: Moved Temporarily";
$codes["303"] = "Redirection: See Other";
$codes["304"] = "Redirection: Not Modified";
$codes["305"] = "Redirection: Use Proxy";
$codes["400"] = "Client Request Incomplete: Bad Request";
$codes["401"] = "Client Request Incomplete: Unauthorized";
$codes["402"] = "Client Request Incomplete: Payment Required";
$codes["403"] = "Client Request Incomplete: Forbidden";
$codes["404"] = "Client Request Incomplete: Not Found";
$codes["405"] = "Client Request Incomplete: Method Not Allowed";
$codes["406"] = "Client Request Incomplete: Not Acceptable";
$codes["407"] = "Client Request Incomplete: Proxy Authentication Required";
$codes["408"] = "Client Request Incomplete: Request Time-Out";
$codes["409"] = "Client Request Incomplete: Conflict";
$codes["410"] = "Client Request Incomplete: Gone";
$codes["411"] = "Client Request Incomplete: Length Required";
$codes["412"] = "Client Request Incomplete: Precondition Failed";
$codes["413"] = "Client Request Incomplete: Request Entity Too Large";
$codes["414"] = "Client Request Incomplete: Request-URI Too Long";
$codes["415"] = "Client Request Incomplete: Unsupported Media Type";
$codes["500"] = "Server Errors: Internal Server Error";
$codes["501"] = "Server Errors: Not Implemented";
$codes["502"] = "Server Errors: Bad Gateway";
$codes["503"] = "Server Errors: Service Unavailable";
$codes["504"] = "Server Errors: Gateway Time-Out";
$codes["505"] = "Server Errors: HTTP Version not supported";
$codes["999"] = "Request timed out or server does not exist.";
?>
<?php
/*
# check_url.php
# -------------
# argument: $url
#
# annex: "urlCodes.inc.php"
# this file allows a nice display of the returned http status code
# for more informations, have a look at the W3C site
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1.1
# This function performs the url check by:
# 1) opening a socket
# 2) sending a GET request (HEAD didn't work with some servers)
# 3) collecting data
# usage:
// call the function
$http = link_check($url);
// print the result
echo "<TABLE border='1' cellpadding=3 cellspacing=0>\n";
echo "<TR align='left' valign='top'><TD>\n";
echo "Current check: <b>".$url."</b>";
echo "</TD></TR>\n";
echo "<TR align='left' valign='top'><TD>\n";
while(list($k,$value)=each($http)) {
if ($k=="code") echo "[".$k."]: ".$codes[$value]."<br>";
else echo "[".$k."]: ".$value."<br>";
}
echo "</TD></TR>\n";
echo "<TR>\n";
echo "<TD align='left'>\n";
echo "More informations:<br><a
href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1.1\"
TARGET='_blank'> <small>Hypertext Transfer Protocol -- HTTP/1.1</a><br>";
echo "<a href=\"http://www.w3.org/\">World Wide Web Consortium
[W3C]</a></TD>\n";
echo "</TR>\n";
echo "</TABLE>\n";
*/
require_once 'urlCodes.inc.php';
function link_check($url)
{
// timeout_limit can be set manually just below to control how long
// we want to wait for a header (software)
set_time_limit(0);
$timeout_limit = 10000;
$parts = parse_url($url);
// port 80 is the WWW port
// when there is no path the funtion returns an error (so we add a slash
// to the end of adresses like http://www.hotbot.com )
if(!$parts[port]) $parts[port] = 80;
if(!$parts[path]) $parts[path] = "/";
if($sockd = fsockopen($parts[host], 80, &$errno, &$errstr, 30))
{
set_socket_blocking($sockd, 1);
fputs($sockd, "GET ".$parts[path]." HTTP/1.0\n");
fputs($sockd, "Host: ".$parts["host"]."\n\n");
$timeout = 0;
while (!feof ($sockd))
{
$line = fgets($sockd, 128);
$file .= $line;
if(substr($line,0,4)=="HTTP") {
$http["version"] = substr($line,5,3);
$http["code"] = substr($line,9,3);
$http["status"] = trim(substr($line,13));
// If we get a redirection code we don't close the socket
// and wait for a Location: header to come
if ($http["code"] <300 || $http["code"]> 400)
$timeout = $timeout_limit + 1;
} elseif (eregi("Content-Location",$line)) {
// Catch the location Header and close the socket
$http["location"] = substr($line,17);
$timeout = $timeout_limit + 1;
}
$timeout++;
}
fclose($sockd);
if (empty($http["location"])) $http["location"] = $url;
if (!is_array($http)) {
/* Handle the case where the check timed out or if
* the socket could not be opened
* 999 is arbitrary ...
*/
$http = array();
$http["code"] = 999;
} else {
/* Handle the case where the server gives a line like:
* HTTP 200 Document follows
* i.e without a version number
* HTTP/1.1 200 OK
*/
if ($http["code"] == 0 && $http["version"] == 200)
$http["code"] == 200;
}
return $http;
}
}
?>