Here is the phonifier code that broke a few months ago:
<?php
/**
* Phonifier - Websites and RSS mobile accessible!
*
* Copyright (C) 2005 Peter de Blieck, Paragin
*
* Licensed under the Creative Commons Attribution-ShareAlike License.
* http://creativecommons.org/licenses/by-sa/2.0/nl/
*
* For further information visit:
* http://www.phonifier.com/
*
* File Name: Phonifier.php
*
* This Class was made originally for ShotCode.com
*
* This is a PHP-class that allows users to access webpages
* on small screen-devices by removing unnecessary code.
*
* Features:
* - HTTP + HTTPS
* - Follow redirects
* - Forms (GET AND POST)
* - Cookies
* - Url-rewriting (../, /, //, mailto:, javascript:, #, <base href='#'>)
* - Frames
* - RSS+ATOM-feeds
* - Removing lots of unnecessary code
* - Optionally replacing images with their alt-text
* - Wap-pages passthru
* - Follow <meta refresh>-tags
*
* Limitations:
* - Pages that require an Authorization username and password (not implemented for safety reasons)
* - If you come accross other limitations, please let me know
*
**/
session_start();
class Phonifier {
var $baseurl = ""; //Baseurl of the phonifier
var $user_agent = 'Mozilla/5.0 (compatible; Phonifier; +http://www.phonifier.com)'; //The name of the user-agent
var $time_out = 5; //Maximum time you want fsockopen to access the url
var $data = ""; //The outputdata
var $header = ""; //The headerdata
var $scheme = ""; //HTTP OR HTTPS
var $port = 80; //HTTP=80 (default), HTTPS=443
var $times = 0; //Number of redirects followed
var $iswap = false; //Is the accessed page a wml-page?
var $post = ""; //Post-values
var $method = "GET"; //GET (default), POST
var $url = "http://"; //The url the parser has to access
var $img = true; //Show images in the result or replace them with their alt-text
var $urlinfo; //Result of parse_url()
var $contenttype = ""; //Content-type of the result
var $contentlength = 0; //Content-lenght of the result
var $starttime = 0; //Start-time, used for benchmarking
var $error = false; //No errors yet :-)
var $errortype = ""; //The errortype (for debugging)
var $feed = false; //Is this a feed?
//The template used for displaying the result
var $template ="<!DOCTYPE html PUBLIC '-/W3C//DTD XHTML 1.0 Transitional//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>%1\$s</title>
<meta http-equiv='Content-Type' content='%2\$s' />
<link rel='stylesheet' type='text/css' media='all' href='%3\$s/phonify.css'/>
<base href='%3\$s/' />
</head>
<body>
<a name='phonifiertop'></a>
<form action='index.php' method='get'>
<input type='hidden' name='l' value='1' />
<input type='text' name='u' value='%4\$s' size='30' />
<input type='submit' value='go' /><br />
images <input type='radio' name='i' value='1'%5\$s/>on /<input type='radio' name='i' value='0'%6\$s/>off
<hr />
</form>
%7\$s
<br /><a href='#phonifiertop'>top</a>
</body>
</html>
";
/**
* Make an instance of the Phonifier-class
*
* $url (string) : The url that you want to access
**/
function Phonifier($url)
{
//get the baseurl of the phonifier
$this->baseurl = preg_replace("/\/$/","",str_replace('\\','/',dirname("http://".$_SERVER['HTTP_HOST'].$_SERVER['SCRIPT_NAME'])));
$this->starttime = $this->microtime_float();
$this->url = $url;
//If there is a POST, set the variables right
if(isset($_POST) && sizeof($_POST)>0)
{
foreach($_POST as $name=>$value ){
$this->post .= urlencode($name)."=".urlencode($value).'&';
}
$this->post = substr($this->post,0,-1);
$this->method = "POST";
}
}
/**
* This function activates the Phonifier
* Run this after you set values like $img
**/
function run()
{
if(eregi("^(http|https)+(:\/\/)",$this->url))
{
//initiate vars
$this->init();
if(!$this->error)
{
//get the contents of the url
$this->get();
}
}
else
{
//the url doesnt start with http(s)
$this->error = true;
$this->errortype = "Url not valid, no http(s)://";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'",empty($this->url)?"Insert url in textfield and press 'go'":"Error: the url is not valid");
}
}
/**
* Initiate function
* This function sets variables to the right value
**/
function init()
{
//url seems valid...
$this->data = "";
$this->header = "";
$this->urlinfo = @parse_url($this->url);
//get the hostname withoud www
$host = eregi_replace("^www.","",$_SERVER['HTTP_HOST']);
if(eregi($host,$this->urlinfo['host']) || eregi($_SERVER['SERVER_ADDR'],$this->urlinfo['host']) || eregi("127.0.0.1",$this->urlinfo['host']) || eregi("localhost",$this->urlinfo['host']))
{
//don't call yourself
$this->error = true;
$this->errortype = "Own domain";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Error: choose a domain outside {$host}");
}
//when the scheme is https fsockopen wants a 'ssl://'- url
//and off course a different port
if($this->urlinfo['scheme']=='https')
{
$this->port = 443;
$this->scheme = "ssl://";
}
}
/**
* returns time
* Used for benchmarking
**/
function microtime_float()
{
list($usec, $sec) = explode(" ", microtime());
return ((float)$usec + (float)$sec);
}
/**
* The get function
* These are the 'brains' of the Phonifier
**/
function get()
{
//do the request
$this->request();
$requesttime = round($this->microtime_float() - $this->starttime,5);
//Put the cookie in the session when set
if(preg_match_all("/Set-Cookie: (.*)=(.*);/Uis",$this->header, $cookies))
{
if(!isset($_SESSION[$this->urlinfo['host']]))
{
$_SESSION[$this->urlinfo['host']] = array();
}
for($i=0;$i<sizeof($cookies[1]);$i++)
{
if(eregi("^deleted",$cookies[2][$i]) && isset($_SESSION[$this->urlinfo['host']][$cookies[1][$i]]))
{
unset($_SESSION[$this->urlinfo['host']][$cookies[1][$i]]);
}
else
{
$_SESSION[$this->urlinfo['host']][$cookies[1][$i]] = $cookies[2][$i];
}
}
}
//Check contenttype
if(preg_match("/Content-Type:(.*)\\n/Uis",$this->header, $content_type))
{
$this->contenttype = trim($content_type[1]);
$this->iswap = (strpos($this->contenttype,"text/vnd.wap.wml") !== false)? true : false;
//rss and atom feeds
if(eregi("^(application|text)\/(atom\+)?xml",$this->contenttype))
{
$this->error = true;
$this->feed = true;
$this->data = $this->feedToHtml();
}
//anything except (x)html or wap
else if(!eregi("^text\/html",$this->contenttype) && !eregi("application\/xhtml\+xml",$this->contenttype) && !$this->iswap)
{
$this->error = true;
$this->errortype = "Wrong Content-type";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"Wrong Content-type",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","The page that I found cannot be optimized for use on a mobile device.<br />Click <a href='{$this->url}'>here</a> to access the address without optimization.");
}
}
//rewrite the urls
if(!$this->error)
{
$this->rewrite();
}
$rewritetime = round($this->microtime_float() -$this->starttime,5);
//clean non-used tags and meta-data
if(!$this->iswap && !$this->error)
{
$this->tiny();
}
$cleantime = round($this->microtime_float() -$this->starttime,5);
$this->data = eregi_replace("(\r\n|\r|\n| )+", " ", $this->data); //remove all spaces and linebreaks
if(!$this->iswap)
{
$this->data.= "\r\n<!-- Request: {$requesttime}|{$rewritetime}|{$cleantime} -->"; //benchmarks
}
if($this->feed)
{
$this->error = false;
}
$this->contentlength = strlen($this->data);
$this->contenttype = empty($this->contenttype)? "text/html; charset=iso-8859-1":$this->contenttype;
}
function contentlength() {
$this->contentlength = strlen($this->data);
return $this->contentlength;
}
/**
* The request-function. It checks the url and handles the request
**/
function request()
{
if ($this->url == "" or $this->url == "http://" or $this->url == "https://")
{
$this->error = true;
$this->errortype = "Insert url in textfield and press 'go'";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Insert url in textfield and press 'go'");
}
else
{
//every time we call this function we count it... we don't want to run forever...
if($this->times<4)
{
if(isset($this->urlinfo['host']) && isset($this->urlinfo['scheme']))
{
//open the socket
$fp=@fsockopen($this->scheme.$this->urlinfo['host'], $this->port, $errno, $errstr, $this->time_out);
if($fp) {
//get the data
/***************[BEGIN]***************/
$receivingheaders = true;
$this->data = "";
$this->header = '';
$this->urlinfo['path'] = isset($this->urlinfo['path'])? $this->urlinfo['path'] : "";
$accept = isset($_SERVER['HTTP_ACCEPT']) ? $_SERVER['HTTP_ACCEPT'] : "*/*";
$head = "{$this->method} {$this->urlinfo['path']}?".(isset($this->urlinfo['query'])?"{$this->urlinfo['query']}":"")." HTTP/1.0\r\nHost: {$this->urlinfo['host']}\r\nUser-Agent: {$this->user_agent}\r\nAccept: {$accept}\r\n";
if(isset($_SESSION[$this->urlinfo['host']]) && sizeof($_SESSION[$this->urlinfo['host']])>0)
{
$head.= 'Cookie: ';
foreach ($_SESSION[$this->urlinfo['host']] as $cookieKey => $cookieVal ) {
$head.= $cookieKey."=".urlencode($cookieVal)."; ";
}
$head = substr($head,0,-2) . "\r\n";
}
if(!empty($this->post))
{
$length = strlen($this->post);
$head.= "Content-Type: application/x-www-form-urlencoded\r\n";
$head.= "Content-Length: $length\r\n";
$head.= "\r\n";
$head.= $this->post;
}
$head.= "\r\n";
fputs($fp,$head);
while(!feof($fp)) {
if($data=trim(@fgets($fp, 8192))) {
if(!$receivingheaders)
{
$this->data.=$data."\r\n";
}
else
{
$this->header.=$data."\r\n";
}
}
else
{
$receivingheaders = false;
}
}
/***************[/END]***************/
fclose($fp);
//now we have the result. but there can be problems...
//check if we get a new Location
$location = "";
preg_match("/Location:(.*)\\n/Uis",$this->header, $loc);
preg_match("/<meta.+http-equiv=('|\")refresh('|\").*content=.*; url=(.*)('|\").*>/Uis",$this->data, $loc2);
if(sizeof($loc)==2)
{
$location = $loc[1];
}
else if(sizeof($loc2)==5)
{
$location = $loc2[3];
}
if(!empty($location))
{
$this->url = eregi_replace("^./","/",trim($location));
//not a valid redirect... but we take care of it
if($this->url{0}=="/")
{
$this->url = "{$this->urlinfo['scheme']}://{$this->urlinfo['host']}{$this->url}";
}
else if(!eregi("^(http|https)+(:\/\/)",$this->url))
{
$this->url = "{$this->urlinfo['scheme']}://{$this->urlinfo['host']}/{$this->url}";
}
if(eregi("^(http|https)+(:\/\/)",$this->url))
{
//re-initiate vars
$this->init();
$this->post="";
$this->times++;
$this->request();
}
}
//or is there is a bad request...try again
else if(eregi("(400 )?Bad Request",$this->header) || eregi("302 Found",$this->header) || strlen(trim($this->header))==0)
{
//possible error of missing the last /.. change it and try again
$this->url.='/';
$this->urlinfo['path'].= '/';
$this->post="";
$this->times++;
$this->request();
}
//or is the page moved?
else if(eregi("301 Moved Permanently",$this->header))
{
//we checked this first but if there isn't a
//Location-header we can't do anything
$this->error = true;
$this->errortype = "Redirect misunderstood";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Error: Redirect misunderstood.<br />Click <a href='{$this->url}'>here</a> to access the address without optimization.");
}
//or doesn't the page exist?
else if(eregi("404 Not Found",$this->header))
{
$this->error = true;
$this->errortype = "404";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"404 File Not Found",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Error 404: The page you want to visit does not exist");
}
//or is the page password protected?
else if(eregi("401 Authorization Required",$this->header))
{
$this->error = true;
$this->errortype = "Authorization required";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"401 Authorization Required",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","The page you want to visit is password-protected. Click <a href='{$this->url}'>here</a> to access this page.");
}
}
else
{
$this->error = true;
$this->errortype = "Timeout";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Error: the request timed out.<br />Click <a href='{$this->url}'>here</a> to access the address without optimization.");
}
}
else
{
$this->error = true;
$this->errortype = "No optimization";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Error: the url could not be optimized.<br />Click <a href='{$this->url}'>here</a> to access the address without optimization.");
}
}
else
{
$this->error = true;
$this->errortype = "Too many redirects";
$this->contenttype = "text/html; charset=iso-8859-1";
$this->data = sprintf($this->template,"",$this->contenttype,$this->baseurl,htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'","Error: redirected too many times.. quitting.<br />Click <a href='{$this->url}'>here</a> to access the address without optimization.");
}
}
}
/**
* Rewrite the urls in de responsedata
**/
function rewrite()
{
$base = "{$this->urlinfo['scheme']}://{$this->urlinfo['host']}";
$path = $this->urlinfo['path'];
$baseuri = "";
$img = $this->img? 1 : 0;
if(!eregi("/$",$path))
{
$path = preg_replace("/\/$/","",str_replace('\\','/',dirname($path)));
}
//if there is a base-url specified.. use it in every link
preg_match("/<base.+href=('|\")(.*)('|\").*>/Ui",$this->data, $baseurl);
if(sizeof($baseurl)==4)
{
$baseuri = $baseurl[2];
}
/**
* The function that replaces the urls
* This function is not my favourite, it must be possible to do it better
**/
function replaceUrl($base,$path,$baseuri,$img,$type,$a,$b,$c)
{
$type = strtolower($type);
if(!empty($baseuri))
{
$current = $baseuri;
}
else
{
$current = $base.$path;
}
if(eregi("^/",$b))
{
if(substr($b,0,2)!="//")
{
$b = (empty($baseuri)?$base:$baseuri).$b;
}
else
{
$b = "http:".$b;
}
}
else if(eregi("^../",$b))
{
if($base==$current)
{
$b = $current.substr($b,2);
}
else
{
$b = $current.(eregi("/$",$current)?"":"/").$b;
}
}
else if(eregi("^mailto:",$b))
{
$b = "#";
}
else if(eregi("^#",$b))
{
$b = $b;
}
else if(eregi("^javascript:",$b))
{
$b = "#";
}
else if(!eregi("^(http|https)://",$b))
{
$b = $current.(substr($current,-1)=="/"?"":"/").$b;
}
if($type=="href" && !eregi("^#",$b))
{
$output = $type.'='.$a."?i={$img}&u=".urlencode(html_entity_decode($b)).$c;
}
else if($type=="action" && !eregi("^#",$b))
{
$scheme = substr($b,0,strpos($b, "/")-1);
$b = trim(substr($b,strpos($b, "/")+2));
$output = $type.'='.$a."index.php/{$img}/{$scheme}/".$b.$c;
}
else
{
$output = $type.'='.$a.$b.$c;
}
return stripslashes($output);
}
$this->data = preg_replace("/(href|action|src|;url)+=(['\"])?+(.+)([\"'> ])/Uie","replaceUrl('$base','$path','$baseuri','$img','\\1','\\2','\\3','\\4')",$this->data);
}
/**
* Clean all tags you don't want
**/
function tiny()
{
if(preg_match("/text\/html\; charset=(.*)['\"]/iU",$this->data, $ct))
{
$this->contenttype = "text/html; charset={$ct[1]}";
}
$title = preg_match("/<title>(.*)<\/title>/i",$this->data, $titlecontents)? $titlecontents[1] : "";
$search = array(
'@<!--(.*)-->@Usi' //html-comments
,'@\/\/<!\[CDATA\[.*?\/\/\]\]>@ism'
,'@on(click|mouseover|mouseout|blur|error|focus|load|unload|submit|reset|abort|change|select)=(\'|").*(["\'])+( |>)@Usi' //all javascript-triggers that results in an js-error
,'@<link(.*)media=(\'|")(screen|print)(\'|").*?'.'>\r\n@Uie' //remove stylesheets that a mobile phone doesn't use
,'@<(noedit|iframe|script)[^>]*?'.'>.*?<\/(noedit|iframe|script)>@ism' //script and object-tags
,'@<(head|object|style|map)[^>]*?'.'>.*?<\/(head|object|style|map)>@ism' //script and object-tags
,'@<(body|p)[^>]*?'.'>@ism' //empty body-tag
,'@.*<html[^>]*?'.'>@ism'
,'@</tr>@ism'
,'@<(\/)?(html|body|div|span|link|meta|font|center|noscript|frameset|noframes|table|tr|th|td|tbody|thead|tfoot)[^>]*?'.'>@ism' //remove tables,div,span,link,meta
,'@<\/p>(\r\n)+\|(\r\n)+<p>@ism'
,'@((<br ?/?'.'>)+((\r)?\n)*)+@i'
,'@(\r\n)+@'
,'@(style)=(\'|").*(["\'])+( |>)@Usi'
,'@<frame.*src=(\'|")(.*)(\'|").*>@Uis'
,'@(target)=(\'|").*(["\'])+( |>)@Usi'
);
$replace = array(
''
,''
,'\4'
,''
,''
,''
,'<\1>'
,''
,'<br />'
,''
,' | '
,'<br />'
,' '
,'\4'
,'Frame: <a href=\'?i=1&u=\2\'>\2</a><br />'
,''
);
if(!$this->img) //filter out images
{
//images with alt
$search[] = "/<img[^>]* alt=(\"([^\"]+)\"|'([^']+)'|([^\"'> ]+))[^>]*>/i";
$replace[] = "[$2$3$4] ";
//images without alt
$search[] = '/<img.*[^>]*?'.'>/Ui';
$replace[] = '[img]';
}
$this->data = preg_replace($search, $replace, $this->data);
$this->data = sprintf($this->template,$title,$this->contenttype,$this->baseurl, htmlspecialchars($this->url,ENT_QUOTES),$this->img?" checked='checked'":"",$this->img?"":" checked='checked'",$this->data);
}
/**
* When accessing a RSS or ATOM feed this function is called to make the feed readable
**/
function feedToHtml()
{
$outputhtml = "";
$atom = false;
$channelCount = preg_match_all("|(xml.*encoding=['\"](.*)['\"].*>.*)?<channel>.*<title>(.*)</title>.*<link>(.*)</link>.*</channel>|iUs",$this->data,$channels,PREG_SET_ORDER);
if($channelCount==0)
{
$channelCount = preg_match_all("|(xml.*encoding=['\"](.*)['\"].*>.*)?<feed.*>.*<title>(.*)</title>.*<link.*href=['\"](.*)['\"].*<author>|iUs",$this->data,$channels,PREG_SET_ORDER);
$atom = true;
}
$channels = reset($channels);
$outputhtml.= "<h1><a href='{$channels[4]}'>{$channels[3]}</a></h1><br />";
if($atom)
{
$itemCount = preg_match_all("|<entry>(.*)</entry>|iUs",$this->data,$items,PREG_SET_ORDER);
}
else
{
$itemCount = preg_match_all("|<item>(.*)</item>|iUs",$this->data,$items,PREG_SET_ORDER);
}
if($itemCount>0)
{
foreach($items as $item)
{
$str_title = "";
$str_link = "";
$str_descr = "";
$linkmatch = $atom? "|<link.*rel=['\"]alternate['\"].*href=['\"](.*)['\"].*/>|iUs" : "|<link.*>(.*)</link>|iUs";
$descrmatch = $atom? "|<summary.*>(.*)</summary>|iUs" : "|<description.*>(.*)</description>|iUs";
if(preg_match_all("|<title.*>(.*)</title>|iUs",$item[0],$title,PREG_SET_ORDER))
{
$str_title = $title[0][1];
}
if(preg_match_all($linkmatch,$item[0],$link,PREG_SET_ORDER))
{
$str_link = $link[0][1];
}
if(preg_match_all($descrmatch,$item[0],$descr,PREG_SET_ORDER))
{
$str_descr = str_replace("<![CDATA[","",str_replace("]]>","",$descr[0][1]));
}
$outputhtml.= "<h2><a href='{$str_link}'>{$str_title}</a></h2>";
$outputhtml.= "{$str_descr}<br /><br />";
}
}
else
{
$outputhtml.= "Feed couldn't be parsed";
}
$this->data = $outputhtml;
$charset = empty($channels[2])?"UTF-8":$channels[2];
$this->contenttype = "text/html; charset=$charset";
$this->rewrite();
$outputhtml = sprintf($this->template,"{$channels[3]}",$this->contenttype,$this->baseurl,$this->url,$this->img?" checked='checked'":"",$this->img?"":" checked='checked'",$this->data);
return $outputhtml;
}
}
?>