Thread: [Solved] Extract Webdata from word
View Single Post
 
Old 10-15-2015, 12:55 AM
macropod's Avatar
macropod macropod is offline Windows 7 64bit Office 2010 32bit
Administrator
 
Join Date: Dec 2010
Location: Canberra, Australia
Posts: 22,387
macropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond repute
Default

I've never done much web data retrieval, but here's some code to get you started:
Code:
Sub AddGPHLink()
Dim Rng As Range, StrTxt As String, Tbl As Table, i As Long
Const StrLnk As String = "https://www.google.co.in/patents/"
Dim strLink As String
With ActiveDocument
  With .Range
    With .Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .Format = False
      .Forward = True
      .Wrap = wdFindStop
      .MatchWildcards = True
      .Text = "[UECW][SPNO] [0-9]{4,} [A-Z0-9]{1,2}"
      .Replacement.Text = ""
      .Execute
    End With
    Do While .Find.Found
      Set Rng = .Duplicate
      With Rng
        StrTxt = .Text
        .Delete
      End With
      .Hyperlinks.Add Anchor:=Rng, TextToDisplay:=StrTxt, _
        Address:=StrLnk & Replace(StrTxt, " ", "") & "?cl=en"
      .Collapse wdCollapseEnd
      .Find.Execute
    Loop
    .Start = ActiveDocument.Range.Start
    With .Find
      .Text = "IN [0-9A-Z]{7,}"
      .Execute
    End With
    Do While .Find.Found
      Set Rng = .Duplicate
      With Rng
        StrTxt = .Text
        .Delete
      End With
      .Hyperlinks.Add Anchor:=Rng, TextToDisplay:=StrTxt, _
        Address:=StrLnk & Replace(StrTxt, " ", "") & "?cl=en"
      .Collapse wdCollapseEnd
      .Find.Execute
    Loop
  End With
  For Each Tbl In .Tables
    With Tbl
      For i = 1 To .Rows.Count
        StrTxt = ""
        With .Cell(i, 2).Range
          If .Hyperlinks.Count > 0 Then
            StrTxt = Get_URL_Data(.Hyperlinks(1).Address)
          End If
        End With
        .Cell(i, 5).Range.Text = StrTxt
      Next
    End With
  Next
End With
Set Rng = Nothing
End Sub
 
Function Get_URL_Data(StrUrl As String) As String
'References to Internet Explorer & Microsoft HTML required
Dim Browser As SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim StrTmp As String, StrTxt As String
Set Browser = New SHDocVw.InternetExplorer
'Open the web page
Browser.navigate StrUrl
Do While Browser.Busy
  DoEvents
Loop
Set HTMLDoc = Browser.Document
Do While Browser.Busy
  DoEvents
Loop
'Get the data
On Error Resume Next
StrTmp = Split(HTMLDoc.Title, " - ")(1)
Get_URL_Data = StrTmp
'Close the browser
Browser.Quit
Set HTMLDoc = Nothing: Set Browser = Nothing
Application.ScreenUpdating = True
End Function
__________________
Cheers,
Paul Edstein
[Fmr MS MVP - Word]
Reply With Quote