![]() |
|
#1
|
|||
|
|||
|
Hello
I'm looking for a macro that will extract data from an HTML file, Here is the code that did the job, but in the HTML file the search code is changed and now the macro doesn't work. The macro worked pretty fast, if any one can fix this i would really appreciate it, attached is the input file with raw data and the output file that shows what it should look like. Below are the unique search keys where the data appears. <span itemprop="streetAddress"> <a href="/name/ <dt class="col-md-4">Phone Number</dt> <dt class="col-md-4">Email Address Thanks. Macro that was written earlier =============================== Dim oSource As Document Dim oDoc As Document Dim oRng As Range, oParaRng As Range Dim lngP As Long Dim sName As String, sAdd As String, sPhone As String, sExtract As String Set oSource = ActiveDocument Set oRng = oSource.Range Set oDoc = Documents.Add oDoc.Range.Font.Name = "Courier New" oDoc.Range.Font.Size = 10 With oRng.Find Do While .Execute(FindText:="<div class=" & Chr(34) & "c-people-result__address" & Chr(34) & ">") oRng.MoveEnd wdParagraph, 2 oRng.MoveStart wdParagraph, -4 For lngP = 1 To oRng.Paragraphs.Count Select Case lngP Case 1 Set oParaRng = oRng.Paragraphs(lngP).Range oParaRng.End = oParaRng.End - 1 sName = Trim(Replace(oParaRng.Text, "</a>", "")) sExtract = sName Case 4 Set oParaRng = oRng.Paragraphs(lngP).Range oParaRng.End = oParaRng.End - 1 oParaRng.MoveStartUntil ">" oParaRng.Start = oParaRng.Start + 1 sAdd = Replace(oParaRng.Text, "</div>", "") sExtract = sExtract & vbTab & sAdd Case 5 Set oParaRng = oRng.Paragraphs(lngP).Range oParaRng.End = oParaRng.End - 1 oParaRng.MoveStartUntil "(" sPhone = Replace(oParaRng.Text, "</div>", "") sExtract = sExtract & vbTab & sPhone End Select Next lngP oDoc.Range.InsertAfter Trim(sExtract) & vbCr oRng.Collapse 0 Loop End With |
|
#2
|
||||
|
||||
|
I can't help, but noticed closing quote missing in…
<a href="/name/ …just in case it's not irrelevant. |
|
#3
|
|||
|
|||
|
Its just a key to search on, doesn't have to be exact, since as soon as that part is found the name appears in that line and that name just has to be extracted.
|
|
#4
|
||||
|
||||
|
This type of data recovery is highly document dependent but based on your sample, the following will recover the data and write it to your output document. If the document layout changes then it won't work.
Code:
Option Explicit
Sub Macro1()
Const strPath As String = "C:\Path\output data 0825.doc" 'The path where the data collection document is stored
Dim oRng As Range, oRng2 As Range
Dim oColl As Collection
Dim lngCol As Long
Dim strName As String
Dim strAddress As String
Dim strPostCode As String
Dim strPhone As String
Dim strEmail As String
Dim oTarget As Document
Dim vAdd As Variant
Dim oSource As Document
Set oSource = ActiveDocument
Const sNum As String = "0123456789"
Set oRng = oSource.Range
Set oColl = New Collection
With oRng.Find
Do While .Execute(FindText:="<a href=""/name/")
On Error GoTo lbl_Exit
If InStr(oRng, "<a href=""/name/") > 0 Then
With oRng
.Collapse 0
.MoveEndUntil Chr(34)
strName = Replace(.Text, "-", " ")
.Collapse 0
.End = oSource.Range.End
.End = .Start + InStr(oRng, "<span itemprop=""streetAddress"">")
.MoveEndUntil sNum
.Collapse 0
.MoveEndUntil "<"
strAddress = .Text
.End = .Paragraphs(1).Range.End - 1
.MoveEndUntil sNum, wdBackward
.Collapse 0
.MoveStartUntil ">", wdBackward
strPostCode = .Text
.End = oSource.Range.End
.End = .Start + InStr(oRng, "<span itemprop=""telephone"">") + 26
.Collapse 0
.MoveEndWhile sNum & "-"
strPhone = .Text
.End = oSource.Range.End
.End = .Start + InStr(oRng, "<span itemprop=""email"">") + 22
.Collapse 0
.MoveEndUntil "<"
strEmail = .Text
oColl.Add strAddress & "|" & strPostCode & "|" & strName & "|" & strPhone & "|" & strEmail
.Collapse 0
End With
End If
Loop
End With
If oColl.Count > 0 Then
Set oTarget = Documents.Open(strPath)
Set oRng2 = oTarget.Range
For lngCol = 1 To oColl.Count
vAdd = Split(oColl(lngCol), "|")
oRng2.Collapse 0
oRng2.Text = vbCr & vAdd(0) & " , " & vAdd(1) & vbTab & vAdd(2) & vbTab & vAdd(3) & vbTab & vAdd(4)
Next lngCol
End If
lbl_Exit:
Set oSource = Nothing: Set oTarget = Nothing
Set oRng = Nothing: Set oRng2 = Nothing
Set oColl = Nothing
Exit Sub
End Sub
__________________
Graham Mayor - MS MVP (Word) (2002-2019) Visit my web site for more programming tips and ready made processes www.gmayor.com |
|
|
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Extract data from HTML File. | donlincolnmsof | Word VBA | 5 | 08-26-2019 08:36 PM |
| Extract data from HTML File. | donlincolnmsof | Word VBA | 0 | 03-07-2019 12:17 PM |
| Need help to extract specific data from text file using vba | victor92 | Excel Programming | 0 | 12-01-2017 12:53 AM |
Extract Data From Text file based on Pattern
|
PRA007 | Word VBA | 13 | 11-01-2015 11:20 PM |
| Macro to highlight repeated words in word file and extract into excel file | aabri | Word VBA | 1 | 06-14-2015 07:20 AM |