0
Follow
0
View

Vba getting rid of table while web scraping

anxiaoli520528 注册会员
2023-01-25 03:35

With help of @JNevill, I solved and improved the code. Now if anyone interested can easily put the numbers as a date then the result of rates will be put in a table form in xl sheet.

Sub gettingTablesfromCBR2()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate, DateR As String
Dim TableCellCount, RowNum As Integer
Dim inputdate_from, inputdate_to, inputcurr_val As Variant

inputdate_from = ThisWorkbook.Worksheets("PrimoPagina").Range("A3").Value
inputdate_to = ThisWorkbook.Worksheets("PrimoPagina").Range("E3").Value
inputcurr_val = ThisWorkbook.Worksheets("PrimoPagina").Range("B1").Value
If inputcurr_val = "BLG" Then
inputcurr_val = "R01100"
Else
inputcurr_val = "R01239" 'EUR
End If

IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=" & inputcurr_val & "&UniDbQuery.From=" & inputdate_from & "&UniDbQuery.To=" & inputdate_to


    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
   
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
    For Each TableSection In HTMLTABLE.Children 'Body
            For Each TableRow In TableSection.Children
                RowText = ""
                TableCellCount = 1
                For Each TableCell In TableRow.Children
                    If TableCellCount = 1 Then DateR = TableCell.innerText
                    If TableCellCount = 2 Then Unit = TableCell.innerText
                    If TableCellCount = 3 Then Rate = TableCell.innerText
                    TableCellCount = TableCellCount + 1
                Next TableCell
                Debug.Print Unit, Rate
                
                RowNum = RowNum + 1
                ThisWorkbook.Worksheets("Results").Cells(RowNum, 3).Value = Unit
                ThisWorkbook.Worksheets("Results").Cells(RowNum, 2).Value = Rate
               ThisWorkbook.Worksheets("Results").Cells(RowNum, 1).Value = DateR
                
            Next TableRow
    Next TableSection
Next


End Sub
dlstonefly 注册会员
2023-01-25 03:35

Updated: You can add a counter to your innermost For loop that is reset to 1 before that loop starts. Then you can test this counter to see if it's the first cell of the row and skip it.

Example of this with some small tweaks to your code to just target the table (that has a classname of data):

Sub gettingTablesfromCBR()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLTABLES As MSHTML.IHTMLElementCollection
Dim HTMLTABLE As MSHTML.IHTMLElement
Dim HTMLDIV As MSHTML.IHTMLElement
Dim TableSection As MSHTML.IHTMLElement
Dim TableRow As MSHTML.IHTMLElement
Dim TableCell As MSHTML.IHTMLElement
Dim Unit As String
Dim Rate As String
Dim TableCellCount As Integer

IE.Visible = True
IE.navigate "https://www.cbr.ru/eng/currency_base/dynamics/?UniDbQuery.Posted=True&UniDbQuery.so=1&UniDbQuery.mode=1&UniDbQuery.date_req1=&UniDbQuery.date_req2=&UniDbQuery.VAL_NM_RQ=R01100&UniDbQuery.From=07.08.2021&UniDbQuery.To=05.11.2021"

    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
   
Set HTMLDOC = IE.Document
Set HTMLTABLES = HTMLDOC.getElementsByClassName("data")
For Each HTMLTABLE In HTMLTABLES 'Table
    For Each TableSection In HTMLTABLE.Children 'Body
            For Each TableRow In TableSection.Children
                RowText = ""
                TableCellCount = 1
                For Each TableCell In TableRow.Children
                    If TableCellCount = 2 Then Unit = TableCell.innerText
                    If TableCellCount = 3 Then Rate = TableCell.innerText
                    TableCellCount = TableCellCount + 1
                Next TableCell
                Debug.Print Unit, Rate
            Next TableRow
    Next TableSection
Next


End Sub

About the Author

Question Info

Publish Time
2023-01-25 03:35
Update Time
2023-01-25 03:35