]*)>(.*?)<\/title>!is', $content, $buff);
return trim($buff[2]);
}
/**
* @brief Extract header script
**/
function getHeadScript($content) {
// remove the title tag
$content = preg_replace('!
]*)>(.*?)<\/title>!is','', $content);
// Remove meta tags
$content = preg_replace('!<(\/){0,1}meta([^>]*)>!is','', $content);
// Extract information such as ]*)>!is', $content, $link_buff);
for($i=0;$i!is', $content, $script_buff);
for($i=0;$i]*)>(.*?)<\/body>!is', $content, $body_buff);
$body_script = $body_buff[2];
// Remove link, style, script, etc.
$body_script = preg_replace('!]*)>!is', '', $body_script);
$body_script = preg_replace('!<(style|script)(.*?)<\/(style|script)>!is', '', $body_script);
return $body_script;
}
/**
* @brief Change the value of src, href in the content
**/
function replaceSrc($content, $path) {
$url_info = parse_url($path);
$host = sprintf("%s://%s%s",$url_info['scheme'],$url_info['host'],$url_info['port']?':'.$url_info['port']:'');
$this->host = $host.'/';
$path = $url_info['path'];
if(substr($path,-1)=='/') $path = substr($path,-1);
$t = explode('/',$path);
$_t = array();
for($i=0,$c=count($t)-1;$i<$c;$i++) {
$v = trim($t[$i]);
if(!$v) continue;
$_t[] = $v;
}
$path = $host.'/'.implode('/',$_t);
if(substr($path,-1)!='/') $path .= '/';
$this->path = $path;
$content = preg_replace_callback('/(src=|href=|url\()("|\')?([^"\'\)]+)("|\'\))?/is',array($this,'_replacePath'),$content);
return $content;
}
function _replacePath($matches) {
$val = trim($matches[3]);
if(preg_match('/^(http|https|ftp|telnet|mms|mailto)/i',$val)) return $matches[0];
if(substr($val,0,2)=='./') {
$path = $this->path.substr($val,2);
} elseif(substr($val,0,1)=='/') {
$path = $this->host.substr($val,1);
} else {
$path = $this->path.$val;
}
return sprintf("%s%s%s%s", $matches[1], $matches[2], $path, $matches[4]);
}
}
?>