I'd suggest saving the PDF as a text file, then opening it Word and running the following Word macro:
Code:
Sub ParsePDFData()
Application.ScreenUpdating = False
With ActiveDocument.Range
.Paragraphs.First.Range.Delete
.Paragraphs.First.Range.Delete
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchWildcards = True
.Text = "^13[!^13]@^13[!^13]@^13[!^13]@^13^12^13[!^13]@^13"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "^13[!^13]@^13[!^13]@^13[!^13]@^13^12^13"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "[ ]{1,}^13"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "^13{2,}"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "(^13[0-9]{1,}>)([!$]@)($[!^13]{1,})"
.Replacement.Text = "\1^t\2^t^t\3"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "[0-9]{1,2}/[0-9]{1,2}/[0-9]{2,4}"
.Replacement.Text = "^t^&"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "(EACH )(^t)(^t$)"
.Replacement.Text = "\2\1\3"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "([0-9]{3,}^t[!^t]@^t)([!0-9])"
.Replacement.Text = "\1^t^t\2"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "($[0-9.]{4,}) ($[!^13]{1,})"
.Replacement.Text = "\1^t\2"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "^t[ ]{1,}"
.Replacement.Text = "^t"
.Execute Replace:=wdReplaceAll
DoEvents
.Text = "[ ]{1,}^t"
.Replacement.Text = "^t"
.Execute Replace:=wdReplaceAll
End With
.Copy
End With
Call Export
Application.ScreenUpdating = True
End Sub
Sub Export()
Dim xlApp As Object, xlWkBk As Object
Set xlApp = CreateObject("Excel.Application")
xlApp.Visible = True
xlApp.ScreenUpdating = False
Set xlWkBk = xlApp.Workbooks.Add
With xlWkBk.Sheets(1)
.Range("A1").PasteSpecial Paste:=-4163 'xlPasteValues
.Columns.AutoFit
.Columns("A:A").ColumnWidth = 8
.Range("A1").Select
End With
xlApp.ScreenUpdating = True
Set xlWkBk = Nothing: Set xlApp = Nothing
End Sub
The result will be an Excel worksheet containing the data, all nicely aligned. As coded, the only substantive difference is that the red values and cross-out values are shifted one column to the right, so that all the current prices are in the same column.
Note: With 800 pages of data to process, the code will take some time to complete.