Hi tinfanide,
If you have Adobe Acrobat Pro installed, you can automate Acrobat Pro for the processing:
Code:
Public Function ReadAcrobatDocument(strFileName As String, Optional iPage As Long) As String
'Note: A Reference to the Adobe Library must be set in Tools|References!
Dim AcroApp As CAcroApp, AcroAVDoc As CAcroAVDoc, AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList, AcroTextSelect As CAcroPDTextSelect
Dim PageNumber, PageContent, Content, i, j
Set AcroApp = CreateObject("AcroExch.App")
Set AcroAVDoc = CreateObject("AcroExch.AVDoc")
If AcroAVDoc.Open(strFileName, vbNull) <> True Then Exit Function
' The following While-Wend loop shouldn't be necessary but timing issues may occur.
While AcroAVDoc Is Nothing
Set AcroAVDoc = AcroApp.GetActiveDoc
Wend
Set AcroPDDoc = AcroAVDoc.GetPDDoc
If iPage <= AcroPDDoc.GetNumPages And iPage <> 0 Then
Set PageNumber = AcroPDDoc.AcquirePage(iPage - 1)
Set PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) <> True Then Exit Function
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
Content = Content & AcroTextSelect.GetText(j)
Next j
End If
If iPage = 99999 Then
For i = 0 To AcroPDDoc.GetNumPages - 1
Set PageNumber = AcroPDDoc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) <> True Then Exit Function
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
Content = Content & AcroTextSelect.GetText(j)
Next j
Next i
ElseIf iPage > AcroPDDoc.GetNumPages Then
Content = "Error!: The PDF file has only " & AcroPDDoc.GetNumPages & " Pages."
End If
If iPage = 0 Then Content = AcroPDDoc.GetNumPages
ReadAcrobatDocument = Content
AcroAVDoc.Close True
AcroApp.Exit
Set AcroAVDoc = Nothing: Set AcroApp = Nothing
End Function
The above function takes two arguments:
• the PDF's full path & name; and
• optionally, the page number to be retrieved
If you:
• omit the second parameter or give it a 0 value, you'll get just the page count;
• specify 99999 as the second parameter, you'll get the text contents of the PDF;
• specify a valid page #, you'll get that page's text contents; or
• specify a valid page #, you'll get an error output.
You can call the function with a sub like:
Code:
Sub Demo()
Dim strPDF As String, strTmp As String, i As Integer
' The next ten lines and the last line in this sub can help if
' you get "ActiveX component can't create object" errors even
' though a Reference to Acrobat is set in Tools|References.
Dim bTask As Boolean
bTask = True
If Tasks.Exists(Name:="Adobe Acrobat Professional") = False Then
bTask = False
Dim AdobePath As String, WshShell As Object
Set WshShell = CreateObject("Wscript.shell")
AdobePath = WshShell.RegRead("HKEY_CLASSES_ROOT\acrobat\shell\open\command\")
AdobePath = Trim(Left(AdobePath, InStr(AdobePath, "/") - 1))
Shell AdobePath, vbHide
End If
strPDF = ReadAcrobatDocument("C:\test\1.pdf", 0)
ActiveDocument.Range.InsertAfter strPDF
If bTask = False Then Tasks.Item("Adobe Acrobat Professional").Close
End Sub
Note1: A Reference to the Adobe Library must be set in Tools|References.
Note2: The sub is written for Word (eg ActiveDocument), but you can change that easily enough to work with Excel or any other app.