vba xmlhttp 抓取网页(我是一家VR公司的实习生,因为我无法为狗屎编码)

优采云 发布时间: 2021-10-02 18:01

  vba xmlhttp 抓取网页(我是一家VR公司的实习生,因为我无法为狗屎编码)

  我在 StackOverflow文章 上的第一篇文章,所以请放松我:)

  我是一家 VR 公司的实习生,我正在尝试使用互联网搜索数据。我使用XMLHTTP成功提取了正确的数据(因为搜索的数据量很大),但是除非我登录,否则我会遇到无法读取信息的问题。

  EG 搜索词:7980 Sunset Blvd 结果:公司 ***** ***** *****,***** *****。

  如果我手动登录并搜索此位置,则可以通过星号检查所有信息。

  我的问题是:如何通过 MSXML2.XMLHTTP(类似于 Google 登录表单的脚本)登录

  研究了很多,遇到了这个问题文章,但是我不能把他例子的登录翻译成我的,因为我不会编码狗屎!哈哈

  这是我用于网络爬行的工作代码:

  Sub GetOwners()

Dim URL As String, lastRow As Long

Dim XMLHttp As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object

Dim start_time As Date

Dim end_time As Date

Dim var As String

Dim var1 As Object

lastRow = Range("A" & Rows.Count).End(xlUp).Row

Dim cookie As String

Dim result_cookie As String

start_time = Time

Debug.Print "start_time:" & start_time

For i = 2 To lastRow

URL = "https://www.spokeo.com/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)

Set XMLHttp = CreateObject("MSXML2.serverXMLHTTP")

XMLHttp.Open "GET", URL, False

XMLHttp.setRequestHeader "Content-Type", "text/xml"

XMLHttp.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"

XMLHttp.send

Set html = CreateObject("htmlfile")

html.body.innerHTML = XMLHttp.responseText

Set objResultDiv = html.GetElementById("profile_selection")

Set var1 = html.GetElementById("property_owners")

Cells(i, 2).Activate

If html.GetElementById("property_owners") Is Nothing Then

Cells(i, 2).Value = "-"

Else

Cells(i, 2).Value = var1.innerText

End If

DoEvents

Next

end_time = Time

Debug.Print "end_time:" & end_time

Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)

MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)

  这是我的 Frankenstein 试图获取 Vitor Prado 的代码并申请我的申请:

  Sub testXMLHTTP()

Dim xml As Object

Dim html As HTMLDocument

Dim dados As Object

Dim text As Object

Dim html2 As HTMLDocument

Dim xml2 As Object

Set xml = CreateObject("Msxml2.ServerXMLHTTP.6.0")

Set html = CreateObject("htmlFile")

With xml

.Open "GET", "https://www.spokeo.com/login?", False

.send

End With

strCookie = xml.getResponseHeader("Set-Cookie")

html.body.innerHTML = xml.responseText

Set objvstate = html.GetElementById("__VIEWSTATE")

Set objvstategen = html.GetElementById("__VIEWSTATEGENERATOR")

Set objeventval = html.GetElementById("__EVENTVALIDATION")

vstate = objvstate.Value

vstategen = objvstategen.Value

eventval = objeventval.Value

'URL Encode ViewState

Dim ScriptEngine As ScriptControl

Set ScriptEngine = New ScriptControl

ScriptEngine.Language = "JScript"

ScriptEngine.AddCode "function encode(vstate) {return encodeURIComponent(vstate);}"

Dim encoded As String

encoded = ScriptEngine.Run("encode", vstate)

vstate = encoded

'URL Encode Event Validation

ScriptEngine.AddCode "function encode(eventval) {return encodeURIComponent(eventval);}"

encoded = ScriptEngine.Run("encode", eventval)

eventval = encoded

'URL Encode ViewState Generator

ScriptEngine.AddCode "function encode(vstategen) {return encodeURIComponent(vstategen);}"

encoded = ScriptEngine.Run("encode", vstategen)

vstategen = encoded

Postdata = "__EVENTTARGET=" & "&__EVENTARGUMENT=" & "&__VIEWSTATE=" & vstate & "&__VIEWSTATEGENERATOR=" & vstategen & "&__EVENTVALIDATION=" & eventval & "&ctl00$ddlTipoUsuario=#rdBtnNaoContribuinte" & "&ctl00$UserNameAcessivel=Digite+o+Usuário" & "&ctl00$PasswordAcessivel=x" & "&ctl00$ConteudoPagina$Login1$rblTipo=rdBtnNaoContribuinte" & "&ctl00$ConteudoPagina$Login1$UserName=MYUSERNAME" & "&ctl00$ConteudoPagina$Login1$Password=MYPASSWORD" & "&ctl00$ConteudoPagina$Login1$Login=Acessar" & "&ctl00$ConteudoPagina$Login1$txtCpfCnpj=Digite+o+Usuário"

Set xml2 = CreateObject("Msxml2.ServerXMLHTTP.6.0")

Set html2 = CreateObject("htmlFile")

With xml2

.Open "POST", "https://www.spokeo.com/login?", False

.setRequestHeader "Cookie", strCookie

.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"

.setRequestHeader "Content-Lenght", Len(Postdata)

.send (Postdata)

End With

html2.body.innerHTML = xml2.responseText

Set objResult = html2.GetElementById("dadosDoUsuario")

GetElementById = objResult.innerText

MsgBox GetElementById

End Sub

  有很多参考我没有完全改变,因为我不知道在哪里看。

  我仍然会将这两个代码合并为一个登录。首先,我知道它会分开工作。

  任何帮助将不胜感激,我提前为我的编码无知道歉。

  干杯!

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线