asp+XMLHTTP组件做采集常用函数1
当前位置:点晴教程→知识管理交流
→『 技术文档交流 』
'==================================================
'函数名:GetHttpPage '作 用:获取网页源码 '参 数:HttpUrl ------网页地址 '================================================== Function GetHttpPage(HttpUrl) If IsNull(HttpUrl)=True or Len(HttpUrl)<18 or HttpUrl="$False$" Then GetHttpPage="$False$" Exit Function End If Dim Http Set Http=server.createobject("MSXML2.XMLHTTP") Http.open "GET",HttpUrl,False Http.Send() If Http.Readystate<>4 then Set Http=Nothing GetHttpPage="$False$" Exit function End if GetHTTPPage=bytesToBSTR(Http.responseBody,"GB2312") Set Http=Nothing If Err.number<>0 then Err.Clear End If End Function '================================================== '函数名:BytesToBstr '作 用:将获取的源码转换为中文 '参 数:Body ------要转换的变量 '参 数:Cset ------要转换的类型 '================================================== Function BytesToBstr(Body,Cset) Dim Objstream Set Objstream = Server.CreateObject("adodb.stream") objstream.Type = 1 objstream.Mode =3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothing End Function '================================================== '函数名:PostHttpPage '作 用:登录 '================================================== Function PostHttpPage(RefererUrl,PostUrl,PostData) Dim xmlHttp Dim RetStr Set xmlHttp = CreateObject("Msxml2.XMLHTTP") xmlHttp.Open "POST", PostUrl, False XmlHTTP.setRequestHeader "Content-Length",Len(PostData) xmlHttp.setRequestHeader "Content-Type", "application/x-www-form-urlencoded" xmlHttp.setRequestHeader "Referer", RefererUrl xmlHttp.Send PostData If Err.Number <> 0 Then Set xmlHttp=Nothing PostHttpPage = "$False$" Exit Function End If PostHttpPage=bytesToBSTR(xmlHttp.responseBody,"GB2312") Set xmlHttp = nothing End Function '================================================== '函数名:UrlEncoding '作 用:转换编码 '================================================== Function UrlEncoding(DataStr) Dim StrReturn,Si,ThisChr,InnerCode,Hight8,Low8 StrReturn = "" For Si = 1 To Len(DataStr) ThisChr = Mid(DataStr,Si,1) If Abs(Asc(ThisChr)) < &HFF Then StrReturn = StrReturn & ThisChr Else InnerCode = Asc(ThisChr) If InnerCode < 0 Then InnerCode = InnerCode + &H10000 End If Hight8 = (InnerCode And &HFF00)\ &HFF Low8 = InnerCode And &HFF StrReturn = StrReturn & "%" & Hex(Hight8) & "%" & Hex(Low8) End If Next UrlEncoding = StrReturn End Function '================================================== '函数名:URLDECODE '作 用:反转换编码 '================================================== Function URLDECODE(ENSTR) Dim DESTR, STRSPECIAL Dim C, I, V DESTR = "" STRSPECIAL = "!""#$%&’()*+,.-_/:;<=>?@[\]^`~%" For I = 1 To Len(ENSTR) C = Mid(ENSTR, I, 1) If C = "%" Then V = EVAL("&H" + Mid(ENSTR, I + 1, 2)) If InStr(STRSPECIAL, Chr(V)) > 0 Then DESTR = DESTR & Chr(V) I = I + 2 Else V = EVAL("&H" + Mid(ENSTR, I + 1, 2) + Mid(ENSTR, I + 4, 2)) DESTR = DESTR & Chr(V) I = I + 5 End If Else If C = "+" Then DESTR = DESTR & " " Else DESTR = DESTR & C End If End If Next URLDECODE = DESTR End Function '================================================== '函数名:GetBody '作 用:截取字符串 '参 数:ConStr ------将要截取的字符串 '参 数:StartStr ------开始字符串 '参 数:OverStr ------结束字符串 '参 数:IncluL ------是否包含StartStr '参 数:IncluR ------是否包含OverStr '================================================== Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR) If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True or StartStr="" or IsNull(StartStr)=True or OverStr="" or IsNull(OverStr)=True Then GetBody="$False$" Exit Function End If Dim ConStrTemp Dim Start,Over ConStrTemp=Lcase(ConStr) StartStr=Lcase(StartStr) OverStr=Lcase(OverStr) Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare) If Start<=0 then GetBody="$False$" Exit Function Else If IncluL=False Then Start=Start+LenB(StartStr) End If End If Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare) If Over<=0 or Over<=Start then GetBody="$False$" Exit Function Else If IncluR=True Then Over=Over+LenB(OverStr) End If End If GetBody=MidB(ConStr,Start,Over-Start) End Function '================================================== '函数名:GetArray '作 用:提取链接地址,以$Array$分隔 '参 数:ConStr ------提取地址的原字符 '参 数:StartStr ------开始字符串 '参 数:OverStr ------结束字符串 '参 数:IncluL ------是否包含StartStr '参 数:IncluR ------是否包含OverStr '================================================== Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR) If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True or StartStr="" or OverStr="" or IsNull(StartStr)=True or IsNull(OverStr)=True Then GetArray="$False$" Exit Function End If Dim TempStr,TempStr2,objRegExp,Matches,Match TempStr="" Set objRegExp = New Regexp objRegExp.IgnoreCase = True objRegExp.Global = True objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")" Set Matches =objRegExp.Execute(ConStr) For Each Match in Matches TempStr=TempStr & "$Array$" & Match.Value Next Set Matches=nothing If TempStr="" Then GetArray="$False$" Exit Function End If TempStr=Right(TempStr,Len(TempStr)-7) If IncluL=False then objRegExp.Pattern =StartStr TempStr=objRegExp.Replace(TempStr,"") End if If IncluR=False then objRegExp.Pattern =OverStr TempStr=objRegExp.Replace(TempStr,"") End if Set objRegExp=nothing TempStr=Replace(TempStr,"""","") TempStr=Replace(TempStr,"'","") TempStr=Replace(TempStr," ","") If TempStr="" then GetArray="$False$" Else GetArray=TempStr End if End Function '================================================== '函数名:DefiniteUrl '作 用:将相对地址转换为绝对地址 '参 数:PrimitiveUrl ------要转换的相对地址 '参 数:ConsultUrl ------当前网页地址 '================================================== Function DefiniteUrl(Byval PrimitiveUrl,Byval ConsultUrl) Dim ConTemp,PriTemp,Pi,Ci,PriArray,ConArray If PrimitiveUrl="" or ConsultUrl="" or PrimitiveUrl="$False$" or ConsultUrl="$False$" Then DefiniteUrl="$False$" Exit Function End If If Left(Lcase(ConsultUrl),7)<>"http://" Then ConsultUrl= "http://" & ConsultUrl End If ConsultUrl=Replace(ConsultUrl,"\","/") ConsultUrl=Replace(ConsultUrl,"://",":\\") PrimitiveUrl=Replace(PrimitiveUrl,"\","/") If Right(ConsultUrl,1)<>"/" Then If Instr(ConsultUrl,"/")>0 Then If Instr(Right(ConsultUrl,Len(ConsultUrl)-InstrRev(ConsultUrl,"/")),".")>0 then Else ConsultUrl=ConsultUrl & "/" End If Else ConsultUrl=ConsultUrl & "/" End If End If ConArray=Split(ConsultUrl,"/") If Left(LCase(PrimitiveUrl),7) = "http://" then DefiniteUrl=Replace(PrimitiveUrl,"://",":\\") ElseIf Left(PrimitiveUrl,1) = "/" Then DefiniteUrl=ConArray(0) & PrimitiveUrl ElseIf Left(PrimitiveUrl,2)="./" Then PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-2) If Right(ConsultUrl,1)="/" Then DefiniteUrl=ConsultUrl & PrimitiveUrl Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl End If ElseIf Left(PrimitiveUrl,3)="../" then Do While Left(PrimitiveUrl,3)="../" PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-3) Pi=Pi+1 Loop For Ci=0 to (Ubound(ConArray)-1-Pi) If DefiniteUrl<>"" Then DefiniteUrl=DefiniteUrl & "/" & ConArray(Ci) Else DefiniteUrl=ConArray(Ci) End If Next DefiniteUrl=DefiniteUrl & "/" & PrimitiveUrl Else If Instr(PrimitiveUrl,"/")>0 Then PriArray=Split(PrimitiveUrl,"/") If Instr(PriArray(0),".")>0 Then If Right(PrimitiveUrl,1)="/" Then DefiniteUrl="http:\\" & PrimitiveUrl Else If Instr(PriArray(Ubound(PriArray)-1),".")>0 Then DefiniteUrl="http:\\" & PrimitiveUrl Else DefiniteUrl="http:\\" & PrimitiveUrl & "/" End If End If Else If Right(ConsultUrl,1)="/" Then DefiniteUrl=ConsultUrl & PrimitiveUrl Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl End If End If Else If Instr(PrimitiveUrl,".")>0 Then If Right(ConsultUrl,1)="/" Then If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then DefiniteUrl="http:\\" & PrimitiveUrl & "/" Else DefiniteUrl=ConsultUrl & PrimitiveUrl End If Else If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then DefiniteUrl="http:\\" & PrimitiveUrl & "/" Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl End If End If Else If Right(ConsultUrl,1)="/" Then DefiniteUrl=ConsultUrl & PrimitiveUrl & "/" Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl & "/" End If End If End If End If If Left(DefiniteUrl,1)="/" then DefiniteUrl=Right(DefiniteUrl,Len(DefiniteUrl)-1) End if If DefiniteUrl<>"" Then DefiniteUrl=Replace(DefiniteUrl,"//","/") DefiniteUrl=Replace(DefiniteUrl,":\\","://") Else DefiniteUrl="$False$" End If End Function '================================================== '函数名:ReplaceSaveRemoteFile '作 用:替换、保存远程图片 '参 数:ConStr ------ 要替换的字符串 '参 数:SaveTf ------ 是否保存文件,False不保存,True保存 '参 数: TistUrl------ 当前网页地址 '================================================== Function ReplaceSaveRemoteFile(ConStr,strInstallDir,strChannelDir,SaveTf,TistUrl) If ConStr="$False$" or ConStr="" or strInstallDir="" or strChannelDir="" Then ReplaceSaveRemoteFile=ConStr Exit Function End If Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2 Dim Start1,Start2 Set Re = New Regexp Re.IgnoreCase = True Re.Global = True Re.Pattern ="<img.+?[^\>]>" Set Matches =Re.Execute(ConStr) For Each Match in Matches If TempStr<>"" then TempStr=TempStr & "$Array$" & Match.Value Else TempStr=Match.Value End if Next If TempStr<>"" Then TempArray=Split(TempStr,"$Array$") TempStr="" For Tempi=0 To Ubound(TempArray) Re.Pattern ="src\s*=\s*.+?\.(gif|jpg|bmp|jpeg|psd|png|svg|dxf|wmf|tiff)" Set Matches =Re.Execute(TempArray(Tempi)) For Each Match in Matches If TempStr<>"" then TempStr=TempStr & "$Array$" & Match.Value Else TempStr=Match.Value End if Next Next End if If TempStr<>"" Then Re.Pattern ="src\s*=\s*" TempStr=Re.Replace(TempStr,"") End If Set Matches=nothing Set Re=nothing If TempStr="" or IsNull(TempStr)=True Then ReplaceSaveRemoteFile=ConStr Exit function End if TempStr=Replace(TempStr,"""","") TempStr=Replace(TempStr,"'","") TempStr=Replace(TempStr," ","") Dim RemoteFileurl,SavePath,PathTemp,DtNow,strFileName,strFileType,ArrSaveFileName,RanNum,Arr_Path DtNow=Now() If SaveTf=True then SavePath=strInstallDir & strChannelDir & "/UploadFiles/" & year(DtNow) & right("0" & month(DtNow),2) & "/" Arr_Path=Split(SavePath,"/") PathTemp="" For Tempi=0 To Ubound(Arr_Path) If Tempi=0 Then PathTemp=Arr_Path(0) & "/" ElseIf Tempi=Ubound(Arr_Path) Then Exit For Else PathTemp=PathTemp & Arr_Path(Tempi) & "/" End If If CheckDir(PathTemp)=False Then If MakeNewsDir(PathTemp)=False Then SaveTf=False Exit For End If End If Next End If '去掉重复图片开始 TempArray=Split(TempStr,"$Array$") TempStr="" For Tempi=0 To Ubound(TempArray) If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then TempStr=TempStr & "$Array$" & TempArray(Tempi) End If Next TempStr=Right(TempStr,Len(TempStr)-7) TempArray=Split(TempStr,"$Array$") '去掉重复图片结束 '转换相对图片地址开始 TempStr="" For Tempi=0 To Ubound(TempArray) TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl) Next TempStr=Right(TempStr,Len(TempStr)-7) TempStr=Replace(TempStr,Chr(0),"") TempArray2=Split(TempStr,"$Array$") TempStr="" '转换相对图片地址结束 '图片替换/保存 Set Re = New Regexp Re.IgnoreCase = True Re.Global = True For Tempi=0 To Ubound(TempArray2) RemoteFileUrl=TempArray2(Tempi) If RemoteFileUrl<>"$False$" And SaveTf=True Then'保存图片 ArrSaveFileName = Split(RemoteFileurl,".") strFileType=Lcase(ArrSaveFileName(Ubound(ArrSaveFileName)))'文件类型 If strFileType="asp" or strFileType="asa" or strFileType="aspx" or strFileType="cer" or strFileType="cdx" or strFileType="exe" or strFileType="rar" or strFileType="zip" then UploadFiles="" ReplaceSaveRemoteFile=ConStr Exit Function End If Randomize RanNum=Int(900*Rnd)+100 strFileName = year(DtNow) & right("0" & month(DtNow),2) & right("0" & day(DtNow),2) & right("0" & hour(DtNow),2) & right("0" & minute(DtNow),2) & right("0" & second(DtNow),2) & ranNum & "." & strFileType Re.Pattern =TempArray(Tempi) If SaveRemoteFile(SavePath & strFileName,RemoteFileUrl)=True Then PathTemp=Replace(SavePath &strFileName,strInstallDir & strChannelDir & "/","[InstallDir_ChannelDir]") ConStr=Re.Replace(ConStr,PathTemp) Re.Pattern=strInstallDir & strChannelDir & "/" UploadFiles=UploadFiles & "|" & Re.Replace(SavePath &strFileName,"") Else PathTemp=RemoteFileUrl ConStr=Re.Replace(ConStr,PathTemp) 'UploadFiles=UploadFiles & "|" & RemoteFileUrl End If ElseIf RemoteFileurl<>"$False$" and SaveTf=False Then'不保存图片 Re.Pattern =TempArray(Tempi) ConStr=Re.Replace(ConStr,RemoteFileUrl) UploadFiles=UploadFiles & "|" & RemoteFileUrl End If Next Set Re=nothing If UploadFiles<>"" Then UploadFiles=Right(UploadFiles,Len(UploadFiles)-1) End If ReplaceSaveRemoteFile=ConStr End function 该文章在 2013/11/28 11:37:45 编辑过 |
关键字查询
相关文章
正在查询... |