Scripts
PHP Proxy Script
Een script waarmee een proxy gedraaid kan worden naar een door jou gekozen site. Volg de instructies en vind het script op github.
php-proxy.php
<?php
/**
* Copyright (C) 2012 Pim de Haan
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* The Proxy class
* Allows the user to easily create a proxy to any site.
* Simple copy both the .htaccess and this file to any map in the webserver
* and that's it.
*/
class Proxy
{
/**
*
* @var curl_handle
*/
protected $ch;
/**
* URI to add before relative urls as well as default URL
* @var string
*/
protected $prefix = 'http://google.com';
/**
* Array of domains whose links will be routed through the proxy
* @var array
*/
protected $blockedDomains = array('google.com');
/**
* Mime type of page
* @var string
*/
protected $pageType;
/**
* Callback for setting a specific banner. The DOM is given as argument
* @var callback
*/
protected $bannerCallback;
/**
* Url to proxy.php
* @var string
*/
protected $baseUrl;
public function __construct()
{
$this->ch = curl_init();
curl_setopt($this->ch, CURLOPT_HEADER, true);
curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, true);
@curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($this->ch, CURLOPT_USERAGENT, 'Opera/9.23 (Windows NT 5.1; U; en)');
// URL without proxy.php
$this->baseUrl = substr($_SERVER['PHP_SELF'], 0, -9);
}
/**
* @see $bannerCallback
* @param calback $callback
*/
public function setBannerCallback($callback)
{
if(!is_callable($callback))
throw new InvalidArgumentException('Argument should be callable');
$this->bannerCallback = $callback;
}
/**
* Run
* @param string $url
* @param array $get $_GET global var
* @param array $post $_POST global var
* @return string Response
*/
public function run($url, $get, $post)
{
// Use default
if(empty($url))
$url = $this->prefix;
// Decode
else
$url = $this->decodeUrl($url);
// Apppend get params to request
if($get) {
$url .= '?'.http_build_query($get);
}
curl_setopt($this->ch, CURLOPT_URL, $url);
// set optional post params
if($post) {
curl_setopt($this->ch, CURLOPT_POSTFIELDS, $post);
curl_setopt($this->ch, CURLOPT_POST, true);
}
// See below
$return = $this->curlExecFollow($this->ch);
// Throw exception on error
if($return === false)
throw new Exception($this->error());
// Strip redirect headers
$body = $return;
while(strpos($body, 'HTTP') === 0) {
list($header, $body) = explode("\r\n\r\n", $body, 2);
}
// Set response headers
$this->setResponseHeaders($header);
// Rewrite links according to page type
if($this->pageType == 'text/html')
$body = $this->rewriteHtml($body);
elseif($this->pageType == 'text/css')
$body = $this->rewriteLinksInCss($body);
return $body;
}
protected function setResponseHeaders($header)
{
// Headers that should be mapped to client
$mappedHeaders = array(
'Set-Cookie',
'Expires',
'Last-Modified',
'Cache-Control',
'Content-Type',
'Pragma'
);
// Parse headers
$headers = $this->parseHeaders($header);
foreach($headers as $name => $value) {
// If header isn't mapped, don't set it
if(!array_search($name, $mappedHeaders))
continue;
// Support for multiple values with same name
if(is_array($value))
foreach($value as $part)
header($name.': '.$part, false);
else
header($name.': '.$value);
}
// Set page type
list($this->pageType) = explode(';', $headers['Content-Type']);
}
// Parse headers into array
protected function parseHeaders($header)
{
$retVal = array();
$fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $header));
foreach( $fields as $field ) {
if( preg_match('/([^:]+): (.+)/m', $field, $match) ) {
$match[1] = preg_replace('/(?<=^|[\x09\x20\x2D])./e', 'strtoupper("\0")', strtolower(trim($match[1])));
if( isset($retVal[$match[1]]) ) {
$retVal[$match[1]] = array($retVal[$match[1]], $match[2]);
} else {
$retVal[$match[1]] = trim($match[2]);
}
}
}
return $retVal;
}
/**
* Convert html for use in proxy
* @param string $source HTML input
* @return string HTML output
*/
protected function rewriteHtml($source)
{
// Use dom to easily find links
$dom = new DOMDocument;
@$dom->loadHTML($source);
// Map field to attribute name
$map = array(
'form' =>'action',
'a' => 'href',
'img' => 'src',
'script' => 'src',
'link' => 'href'
);
// Rewrite each type
foreach($map as $tagName => $attributeName) {
// Use Xpath to find all nodes
foreach($dom->getElementsByTagName($tagName) as $node) {
// Rewrite attribute accordingly
if($node->hasAttribute($attributeName)) {
$attribute = $node->getAttributeNode($attributeName);
$attribute->value = $this->rewriteLink($attribute->value);
}
}
}
// Rewrite links in <style> tag
foreach($dom->getElementsByTagName('style') as $node) {
$node->nodeValue = $this->rewriteLinksInCss($node->nodeValue);
}
// Call bannerCallback to update DOM
if(isset($this->bannerCallback)) {
$callback = $this->bannerCallback;
$dom = $callback($dom);
}
$html = $dom->saveHTML();
// Allow this ajax call to be routed trough the proxy as well
$html = str_replace("new Ajax.Updater('artistDetails', '/ajax_details_artinfo.php', {",
"new Ajax.Updater('artistDetails', '".$this->rewriteLink('/ajax_details_artinfo.php')."', {", $html);
return $html;
}
/**
* Update links in (inline) css
* @param string $css
* @return string
*/
protected function rewriteLinksInCss($css)
{
// Match on url('x') in css
return preg_replace_callback('#url\(\'?([^\'\)]+)\'?\)#', array($this, 'rewritleLinksInCssCallback'), $css);
}
/**
* Helper function
* @param array $matches
* @return string
*/
protected function rewritleLinksInCssCallback($matches) {
return "url('".$this->rewriteLink($matches[1])."')";
}
/**
* Update url so that it routes trough the proxy
* @param string $url
* @return string
*/
public function rewriteLink($url)
{
// Make relative links absolute
if(strpos($url, '/') === 0 && strpos($url, '/', 1) !== 1) {
$url = $this->prefix . $url;
// Add http: to protocol-relative links
} elseif(strpos($url, '//') === 0) {
$url = 'http:'.$url;
}
// Only rewrite blocked domains
$host = parse_url($url, PHP_URL_HOST);
foreach($this->blockedDomains as $domain) {
if(strpos($host, $domain) !== false) {
$url = $this->encodeUrl($url);
break;
}
}
return htmlentities($url);
}
/**
*
* @param string $url
* @return string
*/
protected function encodeUrl($url)
{
return $this->baseUrl.substr($url,7);
}
/**
*
* @param string $url
* @return string
*/
protected function decodeUrl($url)
{
return 'http://'.str_replace(' ', '%20', $url);
}
/**
* Get error message
* @return string
*/
protected function error()
{
return curl_error($this->ch);
}
/**
* Allow redirects under safe mode
* @param curl_handle $ch
* @return string
*/
protected function curlExecFollow($ch)
{
$mr = 5;
if (ini_get('open_basedir') == '' && (ini_get('safe_mode') == 'Off' || ini_get('safe_mode') == '')) {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0);
curl_setopt($ch, CURLOPT_MAXREDIRS, $mr);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
if ($mr > 0) {
$newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$rch = curl_copy_handle($ch);
curl_setopt($rch, CURLOPT_HEADER, true);
curl_setopt($rch, CURLOPT_NOBODY, true);
curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
curl_setopt($rch, CURLOPT_RETURNTRANSFER, true);
do {
if(strpos($newurl, '/') === 0)
$newurl = $this->prefix.$newurl;
curl_setopt($rch, CURLOPT_URL, $newurl);
$header = curl_exec($rch);
if (curl_errno($rch)) {
$code = 0;
} else {
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
if ($code == 301 || $code == 302) {
preg_match('/Location:(.*?)\n/', $header, $matches);
$newurl = str_replace(' ', '%20', trim(array_pop($matches)));
} else {
$code = 0;
}
}
} while ($code && --$mr);
curl_close($rch);
if (!$mr) {
if ($maxredirect === null) {
trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.', E_USER_WARNING);
} else {
$maxredirect = 0;
}
return false;
}
curl_setopt($ch, CURLOPT_URL, $newurl);
}
}
return curl_exec($ch);
}
}
try {
// Use '' al default
if(isset($_GET['url'])) {
$url = $_GET['url'];
unset($_GET['url']);
} else {
$url = '';
}
$proxy = new Proxy();
echo $proxy->run($url, $_GET, $_POST);
} catch(Exception $e) {
echo 'Error: '.$e->getMessage();
}
?>
Reacties
0