以下是引用片段:
以下文件保存为:test.asp 后运行
<% on error resume next time1=timer dim reg,vUrl,VBody,temp1,temp2,code,time1,time2,title vUrl=trim(request.form("url")) reg="\<meta.+ charset= {0,}([^\""| |\>|\/]*).+\/{0,1}\>" if vUrl<>"" then VBody=GetResStr(trim(request.form("url"))) temp1=VBody:temp2=VBody code=GetCode(temp1,reg) title=GetCode(temp2,"\<title\>(.*)\<\/title\>") else vUrl="http://" end if time2=timer %> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=gb2312"> <title>抓取页面</title> <%if err.number<>0 then%> <script language="javascript">alert('发生错误!\n您输入的URL为\"<%=vUrl%>\"\n请检查您输入的URL是否合法!');</script> <%end if%> </head> <body style="font-size:12px;margin:20px 0 0 20px;"> <form name="geturl" action="test.asp" method="post"> 请输入合法URL(必须以http://开头):<br /><input name="url" type="text" size=60 value="<%=vUrl%>"/><br /> <input type="submit" value="抓取" /><br /> </form> 所用时间:<font color=green><%=formatnumber((time2-time1)*1000,2)%>MS</font> <br /> 页面标题:<font color=green><%=title%></font> 页面编码:<font color=green><%=code%></font> <br /> <textarea cols=150 rows=30><%=VBody%></textarea> </body> </html>
<% function GetResStr(URL) dim ResBody,ResStr,PageCode Set Http=server.createobject("msxml2.serverxmlhttp.3.0") Http.setTimeouts 10000, 10000, 10000, 10000 Http.open "GET",URL,False Http.Send() If Http.Readystate =4 Then If Http.status=200 Then ResStr=http.responseText ResBody=http.responseBody PageCode=GetCode(ResStr,reg) GetResStr=BytesToBstr(http.responseBody,PageCode) End If End If End Function
'函数名:BytesToBstr '作用:转换二进制数据为字符 '参数:Body-二进制数据,Cset-文本编码方式 Function BytesToBstr(Body,Cset) Dim Objstream Set Objstream = Server.CreateObject("adodb.stream") objstream.Type = 1 objstream.Mode =3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothing End Function '函数名:GetCode '作用:转换二进制为字符 '参数:str-待查询字符串,regstr-正则表达式 Function GetCode(str,regstr) Dim Reg set Reg= new RegExp Reg.IgnoreCase = True Reg.MultiLine = True Reg.Pattern =regstr Set Cols = Reg.Execute(str) str=Cols(0).SubMatches(0) GetCode=str end function %>
|