php正则替换处理HTML页面的方法
本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:
<?php
if(!defined('BASEPATH'))exit('Nodirectscriptaccessallowed');
/**
*HTML替换处理类,考虑如下几种替换
*1.imgsrc:'/<img(.+?)src=([\'\"])?(.+?)([>]+?)/i'
*2.ahref:'/<a(.+?)href=([\'\"])?(.+?)([>]+?)/i'
*3.ifram.src:'/<iframe(.+?)src=([\'\"])?(.+?)([>]+?)/i'
*4.framesrc:'/<frame(.+?)src=([\'\"])?(.+?)([>]+?)/i'
*5.js:'/window.open([(]+?)([\'"]+?)(.+?)([)+?])/i'
*6.css:'/background(.+?)url([(])([\'"]+?)(.+?)([)+?])/i'
*/
classMyreplace{
private$moudle_array=array('udata','tdata','tresult','dresult');
private$content;
private$relative_dirname;
private$projectid;
private$moudle;
function__construct(){
$this->CI=&get_instance();
}
/**
*替换
*@paramstring$contentHTML内容
*@paramstring$relative相对路径
*@paramint$projectid项目id
*@moudlestring$moudle模板标识:udata,tdata,tresult,dresult
*/
publicfunctionmy_replace($content,$relative,$projectid,$moudle){
$this->content=$content;
$this->relative_dirname=$relative;
$this->projectid=$projectid;
if(in_array(strtolower($moudle),$this->moudle_array))
$this->moudle=$moudle;
elseexit;
switch($this->moudle){
case'udata':
$this->CI->load->model('mupload_data','model');
break;
case'tdata':
$this->CI->load->model('taskdata','model');
break;
case'tresult':
$this->CI->load->model('taskresult','model');
break;
case'dresult':
$this->CI->load->model('dmsresult','model');
break;
default:
break;
}
$pattern='/<img(.+?)src=([\'\"])?(.+?)([>]+?)/i';
$content=preg_replace_callback($pattern,array($this,'image_replace'),$content);
$pattern='/<a(.+?)href=([\'\"])?(.+?)([>]+?)/i';
$content=preg_replace_callback($pattern,array($this,'html_replace'),$content);
$pattern='/<iframe(.+?)src=([\'\"])?(.+?)([>]+?)/i';
$content=preg_replace_callback($pattern,array($this,'iframe_replace'),$content);
$pattern='/<frame(.+?)src=([\'\"])?(.+?)([>]+?)/i';
$content=preg_replace_callback($pattern,array($this,'frame_replace'),$content);
$pattern='/window.open([(]+?)([\'"]+?)(.+?)([)]+?)/i';
$content=preg_replace_callback($pattern,array($this,'js_replace'),$content);
$pattern='/background(.+?)url([(])([\'"]+?)(.+?)([)+?])/i';
$content=preg_replace_callback($pattern,array($this,'css_replace'),$content);
return$content;
}
privatefunctionimage_replace($matches){
if(count($matches)<4)return'';
if(empty($matches[3]))return'';
$matches[3]=rtrim($matches[3],'\'"/');
//获取图片的id
$parent_dir_num=substr_count($matches[3],'../');
$relative_dirname=$this->relative_dirname;
for($i=0;$i<$parent_dir_num;$i++){
$relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/"));
}
$relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./');
$image_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if(!empty($image_id)){
if($this->moudle=='dresult'){
return"<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid.$matches[2].$matches[4];
}else{
return"<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid.$matches[2].$matches[4];
}
}else{
return"<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
privatefunctionhtml_replace($matches){
if(count($matches)<4)return'';
if(empty($matches[3]))return'';
//如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
//if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3]))
//return"<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
$matches[3]=rtrim($matches[3],'\'"/');
//处理锚点
if(substr_count($matches[3],'#')>0)
$matches[3]=substr($matches[3],0,strrpos($matches[3],'#'));
//获取html的id
$parent_dir_num=substr_count($matches[3],'../');
$relative_dirname=$this->relative_dirname;
for($i=0;$i<$parent_dir_num;$i++){
$relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/"));
}
$relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./');
$txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if(!empty($txtfile_id)){
if($this->moudle=='dresult'){
return"<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}else{
return"<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}
}else{
return"<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
privatefunctioniframe_replace($matches){
if(count($matches)<4)return'';
if(empty($matches[3]))return'';
$matches[3]=rtrim($matches[3],'\'"/');
//处理锚点
if(substr_count($matches[3],'#')>0)
$matches[3]=substr($matches[3],0,strrpos($matches[3],'#'));
//获取html的id
$parent_dir_num=substr_count($matches[3],'../');
$relative_dirname=$this->relative_dirname;
for($i=0;$i<$parent_dir_num;$i++){
$relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/"));
}
$relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./');
$txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if(!empty($txtfile_id)){
if($this->moudle=='dresult'){
return"<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}else{
return"<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}
}else{
return"<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
privatefunctionframe_replace($matches){
if(count($matches)<4)return'';
if(empty($matches[3]))return'';
$matches[3]=rtrim($matches[3],'\'"/');
//处理锚点
if(substr_count($matches[3],'#')>0)
$matches[3]=substr($matches[3],0,strrpos($matches[3],'#'));
//获取html的id
$parent_dir_num=substr_count($matches[3],'../');
$relative_dirname=$this->relative_dirname;
for($i=0;$i<$parent_dir_num;$i++){
$relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/"));
}
$relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./');
$txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if(!empty($txtfile_id)){
if($this->moudle=='dresult'){
return"<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}else{
return"<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}
}else{
return"<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
privatefunctionjs_replace($matches){
if(count($matches)<4)return'';
if(empty($matches[3]))return'';
//处理链接
$arr_html=split(',',$matches[3]);
$href=$arr_html[0];
$other='';
for($i=0;$i<count($arr_html);$i++)
$other=$arr_html[$i].",";
$other=rtrim($other,"\,");
$href=rtrim($href,'\'\"');
//处理锚点
if(substr_count($href,'#')>0)
return"window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
//获取html的id
$parent_dir_num=substr_count($href,'../');
$relative_dirname=$this->relative_dirname;
for($i=0;$i<$parent_dir_num;$i++){
$relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/"));
}
$relativepath=rtrim($relative_dirname,'/').'/'.ltrim($href,'./');
$txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if(!empty($txtfile_id)){
if($this->moudle=='dresult'){
return"window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
}else{
return"window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
}
}else{
return"window.open".$matches[1].$matches[2].$matches[3].$matches[4];
}
}
privatefunctioncss_replace($matches){
if(count($matches)<5)return'';
if(empty($matches[4]))return'';
$matches[4]=rtrim($matches[4],'\'"/');
//获取图片的id
$parent_dir_num=substr_count($matches[4],'../');
$relative_dirname=$this->relative_dirname;
for($i=0;$i<$parent_dir_num;$i++){
$relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/"));
}
$relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[4],'./');
$image_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if(!empty($image_id)){
if($this->moudle=='dresult'){
return"background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid.$matches[3].$matches[5];
}else{
return"background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid.$matches[3].$matches[5];
}
}else{
return"background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
}
}
}
/*EndofMyreplace.php*/
/*Location:/application/libraries/Myreplace.php*/
PS:这里再为大家提供2款非常方便的正则表达式工具供大家参考使用:
JavaScript正则表达式在线测试工具:
http://tools.jb51.net/regex/javascript
正则表达式在线生成工具:
http://tools.jb51.net/regex/create_reg
希望本文所述对大家的php程序设计有所帮助。