Asp.Net、asp实现的搜索引擎网址收录检查程序
使用asp.net或者asp检查某个url地址,某篇文章是否被搜索引擎,如百度,谷歌,搜狗收录。
实现原理:直接搜索你那篇文章的url地址(不带协议,但上协议也行,代码会自动去掉协议内容),如果被索引会返回搜索结果,否则会提示找不到信息。
Asp.Net检查百度,谷歌,搜狗搜索引擎是否收录文章网址源代码:
usingSystem;
usingSystem.Net;
usingSystem.Text;
usingSystem.IO;
usingSystem.Web;
publicclassSearchEngineIndex
{
publicstaticstring[]urls={//搜索引擎检查地址
"http://www.baidu.com/s?ie=utf-8&wd=",//百度索引url检查地址
"https://www.google.com.hk/search?q=",//谷歌索引url检查地址
"http://www.sogou.com/web?ie=utf8&query="//搜狗索引url检查地址
}
,noFindKeyword={"抱歉,没有找到与","找不到和您的查询","未收录?"};//搜索引擎未索引url地址时的关键字
///<summary>
///获取响应的编码
///</summary>
///<paramname="contenttype"></param>
///<returns></returns>
privatestaticEncodingGetEncoding(stringcontenttype)
{
if(!string.IsNullOrEmpty(contenttype))
{
contenttype=contenttype.ToLower();
if(contenttype.IndexOf("gb2312")!=-1||contenttype.IndexOf("gbk")!=-1)returnEncoding.GetEncoding(936);
if(contenttype.IndexOf("big5")!=-1)returnEncoding.GetEncoding(950);
}
returnEncoding.UTF8;
}
///<summary>
///使用HttpWebRequest对象,自动识别字符集
///</summary>
///<paramname="url"></param>
///<paramname="addUseragent">是否添加UserAgent,采集其他网站时防止被拦截</param>
///<returns></returns>
publicstaticstringGetHtml(stringurl,booladdUseragent)
{
HttpWebRequestrequest=(HttpWebRequest)HttpWebRequest.Create(url);
if(addUseragent)request.UserAgent="Googlebot|Feedfetcher-Google|Baiduspider";
stringhtml=null;
try
{
HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();
StreamReadersrd=newStreamReader(response.GetResponseStream(),GetEncoding(response.ContentType));
html=srd.ReadToEnd();
srd.Close();
response.Close();
}
catch{}
returnhtml;
}
///<summary>
///检查某个url是否被搜索引擎索引
///</summary>
///<paramname="url">url地址</param>
///<paramname="engin">0:百度1:谷歌2:搜狗,其他搜索引擎如bing和360直接查网址显示的结果不是直接得到网址的,有些出入,不做检查</param>
///<returns></returns>
publicstaticboolCheckIndex(stringurl,intengin)
{
if(string.IsNullOrEmpty(url))returnfalse;
if(engin<0||engin>2)engin=0;
url=urls[engin]+HttpUtility.UrlEncode(url.ToLower().Replace("http://","").Replace("https://",""));
boolr=true;
stringhtml=GetHtml(url,true);
if(html==null||html.IndexOf(noFindKeyword[engin])!=-1)r=false;
returnr;
}
}
//调用方法示例
SearchEngineIndex.CheckIndex("www.nhooo.com/article/20101014/2902.aspx",0);//检查百度索引
SearchEngineIndex.CheckIndex("www.nhooo.com/article/20101014/2902.aspx",1);//检查谷歌索引
SearchEngineIndex.CheckIndex("www.nhooo.com/article/20101014/2902.aspx",2);//检查搜狗索引
Asp检查百度,谷歌,搜狗搜索引擎是否收录文章网址源代码:
<%
classSearchEnginIndex
dimurls,noFindKeyword
privatesubClass_Initialize
'百度,谷歌,搜狗url地址索引查询地址
urls=array("http://www.baidu.com/s?ie=utf-8&wd=","https://www.google.com.hk/search?q=","http://www.sogou.com/web?ie=utf8&query=")
'搜索引擎未索引url地址时的关键字
NoFindKeyword=array("抱歉,没有找到与","找不到和您的查询","未收录?")
Endsub
privatefunctionGetEncoding(contenttype)
contenttype=lcase(contenttype)
ifinstr(contenttype,"gb2312")<>0andinstr(contenttype,"gbk")<>0then
GetEncoding="gb2312"
elseifinstr(contenttype,"big5")<>0then
GetEncoding="big5"
else
GetEncoding="utf-8"
endif
endfunction
privatefunctionBinToString(bin,encoding)'将2进制流数据依据编码转为对应的字符串内容
dimobj
setobj=Server.CreateObject("Adodb.Stream")
obj.Type=1:obj.Mode=3:obj.Open
obj.Writebin
obj.Position=0:obj.Type=2:obj.Charset=encoding
BinToString=obj.ReadText
obj.Close:setobj=nothing
endfunction
publicfunctionGetHtml(url)
dimxhr
setxhr=server.CreateObject("microsoft.xmlhttp")
xhr.open"get",url,false
xhr.send
encoding=GetEncoding(xhr.getResponseHeader("content-type"))
response.CharSet=encoding
GetHtml=BinToString(xhr.responsebody,encoding)
setxhr=nothing
endfunction
publicfunctionCheckIndex(url,engin)
iflen(url)=0thenexitfunction
ifengin<0orengin>2thenengin=1
url=urls(engin)&server.URLEncode(url)
dimhtml
html=GetHtml(url)
CheckIndex=instr(html,NoFindKeyword(engin))=0
Endfunction
endClass
setsei=newSearchEnginIndex
response.Writesei.CheckIndex("www.nhooo.com/article/20101014/2902.aspx",0)'百度索引
response.Writesei.CheckIndex("www.nhooo.com/article/20101014/2902.aspx",1)'谷歌索引
response.Writesei.CheckIndex("www.nhooo.com/article/20101014/2902.aspx",2)'搜狗索引
setsei=nothing
%>