如果支持打开远程内容的选项的话,实际上php用fopen或file_get_contents都能获得一个网页的内容,但是默认的函数有个不足的地方就是无法获取http头,这在一些特殊的应用中很不方便,如,有一个链接:
http://www.abc.com/showvd.asp?id=18
假如它返回的是一个图片,用默认函数就很难识别,但如果通过http应答头来判断就简单多了,此外如果对方通过 refer 来防盗链的话,也是无法获取的,用http类就能完美解决这些问题,而且速度也相差无几。
使用方法:
$hd = new dedehttpdown(); $hd->openurl("http://www.dedecms.com"); echo $hd->gethtml(); //如果保存为文件则用 $hd->savebin("dede.html"); $hd->close();
获得http请求头用 $hd->gethead("key") 设置请求头 $hd->sethead(key,value); (必须在调用 openurl 之前设定)
代码如下:
<? /* --------------------------------------------------------------------- //织梦http下载类v1.0 //出自:织梦之旅 http://www.dedecms.com //作者: it柏拉图 //时间: 2005-11-13 12:39 //声明: 首发在落伍者网站,转载请保留版权信息 --------------------------------------------------------------------- */ class dedehttpdown { var $m_url = ""; var $m_urlpath = ""; var $m_scheme = "http"; var $m_host = ""; var $m_port = "80"; var $m_user = ""; var $m_pass = ""; var $m_path = "/"; var $m_query = ""; var $m_fp = ""; var $m_error = ""; var $m_httphead = "" ; var $m_html = ""; var $m_puthead = ""; var $baseurlpath = ""; var $homeurl = ""; var $jumpcount = 0;//防止多重重定向陷入死循环 // //初始化系统 // function privateinit($url) { if($url=="") return ; $urls = ""; $urls = @parse_url($url); $this->m_url = $url; if(is_array($urls)) { $this->m_host = $urls["host"]; if(!empty($urls["scheme"])) $this->m_scheme = $urls["scheme"];
if(!empty($urls["user"])){ $this->m_user = $urls["user"]; }
if(!empty($urls["pass"])){ $this->m_pass = $urls["pass"]; }
if(!empty($urls["port"])){ $this->m_port = $urls["port"]; }
if(!empty($urls["path"])) $this->m_path = $urls["path"]; $this->m_urlpath = $this->m_path;
if(!empty($urls["query"])){ $this->m_query = $urls["query"]; $this->m_urlpath .= "?".$this->m_query; } $this->homeurl = $urls["host"]; $this->baseurlpath = $this->homeurl.$urls["path"]; $this->baseurlpath = ereg_replace("/([^/]*)\.(.*)$","/",$this->baseurlpath); $this->baseurlpath = ereg_replace("/$","",$this->baseurlpath); } } // //打开指定网址 // function openurl($url) { //重设各参数 $this->m_url = ""; $this->m_urlpath = ""; $this->m_scheme = "http"; $this->m_host = ""; $this->m_port = "80"; $this->m_user = ""; $this->m_pass = ""; $this->m_path = "/"; $this->m_query = ""; $this->m_error = ""; $this->jumpcount = 0; $this->m_httphead = array() ; //$this->m_puthead = ""; $this->m_html = ""; $this->close(); //初始化系统 $this->privateinit($url); $this->privatestartsession(); } // //打开303重定向网址 // function jumpopenurl($url) { //重设各参数 $this->m_url = ""; $this->m_urlpath = ""; $this->m_scheme = "http"; $this->m_host = ""; $this->m_port = "80"; $this->m_user = ""; $this->m_pass = ""; $this->m_path = "/"; $this->m_query = ""; $this->m_error = ""; $this->jumpcount++; $this->m_httphead = array() ; $this->m_html = ""; $this->close(); //初始化系统 $this->privateinit($url); $this->privatestartsession(); } // //获得某操作错误的原因 // function printerror() { echo "错误信息:".$this->m_error; echo "具体返回头:<br>"; foreach($this->m_httphead as $k=>$v) { echo "$k => $v <br>\r\n"; } } // //判别用get方法发送的头的应答结果是否正确 // function isgetok() { if( ereg("^2",$this->gethead("http-state")) ) { return true; } else { $this->m_error .= $this->gethead("http-state")." - ".$this->gethead("http-describe")."<br>"; return false; } } // //看看返回的网页是否是text类型 // function istext() { if(ereg("^2",$this->gethead("http-state")) && eregi("^text",$this->gethead("content-type"))) { return true; } else { $this->m_error .= "内容为非文本类型或网址重定向<br>"; return false; } } // //判断返回的网页是否是特定的类型 // function iscontenttype($ctype) { if(ereg("^2",$this->gethead("http-state")) && $this->gethead("content-type")==strtolower($ctype)) { return true; } else { $this->m_error .= "类型不对 ".$this->gethead("content-type")."<br>"; return false; } } // //用http协议下载文件 // function savetobin($savefilename) { if(!$this->isgetok()) return false; if(@feof($this->m_fp)) { $this->m_error = "连接已经关闭!"; return false; } $fp = fopen($savefilename,"w"); while(!feof($this->m_fp)){ fwrite($fp,fread($this->m_fp,1024)); } fclose($this->m_fp);
fclose($fp); return true; } // //保存网页内容为text文件 // function savetotext($savefilename) { if($this->istext()) $this->savebinfile($savefilename); else return ""; } // //用http协议获得一个网页的内容 // function gethtml() { if(!$this->istext()) return ""; if($this->m_html!="") return $this->m_html; if(!$this->m_fp||@feof($this->m_fp)) return ""; while(!feof($this->m_fp)){ $this->m_html .= fgets($this->m_fp,256); } @fclose($this->m_fp); return $this->m_html; } // //开始http会话 // function privatestartsession() {
if(!$this->privateopenhost()){ $this->m_error .= "打开远程主机出错!"; return false; }
if($this->gethead("http-edition")=="http/1.1") $httpv = "http/1.1"; else $httpv = "http/1.0";
//发送固定的起始请求头get、host信息 fputs($this->m_fp,"get ".$this->m_urlpath." $httpv\r\n"); $this->m_puthead["host"] = $this->m_host;
//发送用户自定义的请求头 if(!isset($this->m_puthead["accept"])) { $this->m_puthead["accept"] = "*/*"; } if(!isset($this->m_puthead["user-agent"])) { $this->m_puthead["user-agent"] = "mozilla/4.0 (compatible; msie 6.0; windows nt 5.2)"; } if(!isset($this->m_puthead["refer"])) { $this->m_puthead["refer"] = "http://".$this->m_puthead["host"]; } foreach($this->m_puthead as $k=>$v){ $k = trim($k); $v = trim($v); if($k!=""&&$v!=""){ fputs($this->m_fp,"$k: $v\r\n"); } }
//发送固定的结束请求头 //http1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束 if($httpv=="http/1.1") fputs($this->m_fp,"connection: close\r\n\r\n"); else fputs($this->m_fp,"\r\n");
//获取应答头状态信息 $httpstas = explode(" ",fgets($this->m_fp,256)); $this->m_httphead["http-edition"] = trim($httpstas[0]); $this->m_httphead["http-state"] = trim($httpstas[1]); $this->m_httphead["http-describe"] = ""; for($i=2;$i<count($httpstas);$i++){ $this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]); } //获取详细应答头 while(!feof($this->m_fp)){ $line = trim(fgets($this->m_fp,256)); if($line == "") break; $hkey = ""; $hvalue = ""; $v = 0; for($i=0;$i<strlen($line);$i++){ if($v==1) $hvalue .= $line[$i]; if($line[$i]==":") $v = 1; if($v==0) $hkey .= $line[$i]; } $hkey = trim($hkey); if($hkey!="") $this->m_httphead[strtolower($hkey)] = trim($hvalue); } //判断是否是3xx开头的应答 if(ereg("^3",$this->m_httphead["http-state"])) { if($this->jumpcount > 3) return; if(isset($this->m_httphead["location"])){ $newurl = $this->m_httphead["location"]; if(eregi("^http",$newurl)){ $this->jumpopenurl($newurl); } else{ $newurl = $this->fillurl($newurl); $this->jumpopenurl($newurl); } } else { $this->m_error = "无法识别的转移应答!"; } }// } // //获得一个http头的值 // function gethead($headname) { $headname = strtolower($headname); if(isset($this->m_httphead[$headname])) return $this->m_httphead[$headname]; else return ""; } // //设置http头的值 // function sethead($skey,$svalue) { $this->m_puthead[$skey] = $svalue; } // //打开连接 // function privateopenhost() { if($this->m_host=="") return false; $this->m_fp = @fsockopen($this->m_host, $this->m_port, &$errno, &$errstr,10); if(!$this->m_fp){ $this->m_error = $errstr; return false; } else{ return true; } } // //关闭连接 // function close(){ @fclose($this->m_fp); } // //补全相对网址 // function fillurl($surl) { $i = 0; $dstr = ""; $pstr = ""; $okurl = ""; $pathstep = 0; $surl = trim($surl); if($surl=="") return ""; $pos = strpos($surl,"#"); if($pos>0) $surl = substr($surl,0,$pos); if($surl[0]=="/"){ $okurl = "http://".$this->homeurl."/".$surl; } else if($surl[0]==".") { if(strlen($surl)<=2) return ""; else if($surl[0]=="/") { $okurl = "http://".$this->baseurlpath."/".substr($surl,2,strlen($surl)-2); } else{ $urls = explode("/",$surl); foreach($urls as $u){ if($u=="..") $pathstep++; else if($i<count($urls)-1) $dstr .= $urls[$i]."/"; else $dstr .= $urls[$i]; $i++; } $urls = explode("/",$this->baseurlpath); if(count($urls) <= $pathstep) return ""; else{ $pstr = "http://"; for($i=0;$i<count($urls)-$pathstep;$i++) { $pstr .= $urls[$i]."/"; } $okurl = $pstr.$dstr; } } } else { if(strlen($surl)<7) $okurl = "http://".$this->baseurlpath."/".$surl; else if(strtolower(substr($surl,0,7))=="http://") $okurl = $surl; else $okurl = "http://".$this->baseurlpath."/".$surl; } $okurl = eregi_replace("^(http://)","",$okurl); $okurl = eregi_replace("/{1,}","/",$okurl); return "http://".$okurl; } } ?>
申明:本教程内容由威凡网编辑整理并提供IT程序员分享学习,如文中有侵权行为,请与站长联系(QQ:254677821)!
|