欢迎您访问我爱IT技术网,今天小编为你分享的编程技术是:【PHP采集程序中常用的函数】,下面是详细的分享!
PHP采集程序中常用的函数
//获得当前的脚本网址
function get_php_url(){
if(!empty($_SERVER["REQUEST_URI"])){
$scriptName=$_SERVER["REQUEST_URI"];
$nowurl=$scriptName;
}else{
$scriptName=$_SERVER["PHP_SELF"];
if(empty($_SERVER["QUERY_STRING"])) $nowurl=$scriptName;
else $nowurl=$scriptName."?".$_SERVER["QUERY_STRING"];
}
return $nowurl;
}
//把全角数字转为半角数字
function GetAlabNum($fnum){
$nums=array("0","1","2","3","4","5","6","7","8","9");
$fnums="0123456789";
for($i=0;$i<=9;$i++) $fnum=str_replace($nums[$i],$fnums[$i],$fnum);
$fnum=ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
if($fnum=="") $fnum=0;
return $fnum;
}
//去除HTML标记
function Text2Html($txt){
$txt=str_replace(" "," ",$txt);
$txt=str_replace("<","<",$txt);
$txt=str_replace(">",">",$txt);
$txt=preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);
return $txt;
}
//清除HTML标记
function ClearHtml($str){
$str=str_replace('<','<',$str);
$str=str_replace('>','>',$str);
return $str;
}
//相对路径转化成绝对路径
function relative_to_absolute($content, $feed_url) {
preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
$server_url=preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
$server_url=preg_replace("/\/.*/", "", $server_url);
if ($server_url=='') {
return $content;
}
if (isset($protocol[0])) {
$new_content=preg_replace('/href=http://www.chinaz.com/program/2008/1222/"\//', 'href=http://www.chinaz.com/program/2008/1222/"'.$protocol[0].$server_url.'/', $content);
$new_content=preg_replace('/src=http://www.chinaz.com/program/2008/1222/"\//', 'src=http://www.chinaz.com/program/2008/1222/"'.$protocol[0].$server_url.'/', $new_content);
} else {
$new_content=$content;
}
return $new_content;
}
//取得所有链接
function get_all_url($code){
preg_match_all('/<a\s+href=http://www.chinaz.com/program/2008/1222/["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
return array('name'=>$arr[2],'url'=>$arr[1]);
}
//获取指定标记中的内容
function get_tag_data($str, $start, $end){
if ( $start=='' || $end=='' ){
return;
}
$str=explode($start, $str);
$str=explode($end, $str[1]);
return $str[0];
}
//HTML表格的每行转为CSV格式数组
function get_tr_array($table) {
$table=preg_replace("'<td[^>]*?>'si",'"',$table);
$table=str_replace("</td>",'",',$table);
$table=str_replace("</tr>","{tr}",$table);
//去掉 HTML 标记
$table=preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table=preg_replace("'([\r\n])[\s]+'","",$table);
$table=str_replace(" ","",$table);
$table=str_replace(" ","",$table);
$table=explode(",{tr}",$table);
array_pop($table);
return $table;
}
//将HTML表格的每行每列转为数组,采集表格数据
function get_td_array($table) {
$table=preg_replace("'<table[^>]*?>'si","",$table);
$table=preg_replace("'<tr[^>]*?>'si","",$table);
$table=preg_replace("'<td[^>]*?>'si","",$table);
$table=str_replace("</tr>","{tr}",$table);
$table=str_replace("</td>","{td}",$table);
//去掉 HTML 标记
$table=preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table=preg_replace("'([\r\n])[\s]+'","",$table);
$table=str_replace(" ","",$table);
$table=str_replace(" ","",$table);
$table=explode('{tr}', $table);
array_pop($table);
foreach ($table as $key=>$tr) {
$td=explode('{td}', $tr);
array_pop($td);
$td_array[]=$td;
}
return $td_array;
}
//返回字符串中的所有单词 $distinct=true 去除重复
function split_en_str($str,$distinct=true) {
preg_match_all('/([a-zA-Z]+)/',$str,$match);
if ($distinct==true) {
$match[1]=array_unique($match[1]);
}
sort($match[1]);
return $match[1];
}
以上所分享的是关于PHP采集程序中常用的函数,下面是编辑为你推荐的有价值的用户互动:
相关问题:php采集程序问题
答:$opts = array('http'=>array('method'=>"GET",'timeout'=>60,'user_agent'=>'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36'));$context = stream_context_create($opts);$c... >>详细
相关问题:PHP抓取/采集网页信息有哪些好的工具
答:你好。 PHP Simple HTML DOM, 目前兼容最好. 也可以用file_get_content这个函数 希望回答对你有帮助,如果有疑问,请继续追问 答题不易,互相理解,您的采纳是我前进的动力,感谢您。 >>详细
相关问题:想实现网页部分内容根据标题实时采集,如何用jquer...
答: >>详细
- 评论列表(网友评论仅供网友表达个人看法,并不表明本站同意其观点或证实其描述)
-
