![]() |
|
|
|
#1
|
|||
|
|||
|
Code:
Sub pdf()
Const sFile As String = "C:\test\1.pdf"
Const oFile As String = "C:\test\1.log"
Dim m, n As Integer
Dim str As String
m = FreeFile()
Open sFile For Binary As m
n = FreeFile()
Open oFile For Output As n
Do While Not EOF(m)
Line Input #m, str
Print #n, str
Loop
''' error
''' Input past end of file
Close m
Close n
End Sub
It runs as expected but fails before closing the n file (log file). |
|
#2
|
|||
|
|||
|
Code:
Sub pdf()
Const sFile As String = "C:\test\1.pdf"
Dim f As Integer
Dim byt As Byte
Dim str As String, str1 As String
f = FreeFile
Open sFile For Binary Access Read As #f
Do While Loc(f) < LOF(f)
Get f, , byt
str1 = str1 & byt & vbNewLine
Loop
Close m
''' str1
''' different from the output when I use C# StreamReader to extract the data from the pdf file
End Sub
First, I bet there is something to do with Binary. The error "Input past end of file" seems to be caused by reading binary files and I think using Loc() could have helped solve it. Second, I declare a variable for the binary data. I think it works with the binary data extracted from the pdf file. But third, the output (string str1) is still different from what I get from a C# code (which works well to count the pages of a pdf file). Anyway, I think I still need to be working hard on it since even I get the page count from the C# code. It cannot be used in Excel. |
|
#3
|
||||
|
||||
|
Hi tinfanide,
If you have Adobe Acrobat Pro installed, you can automate Acrobat Pro for the processing: Code:
Public Function ReadAcrobatDocument(strFileName As String, Optional iPage As Long) As String
'Note: A Reference to the Adobe Library must be set in Tools|References!
Dim AcroApp As CAcroApp, AcroAVDoc As CAcroAVDoc, AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList, AcroTextSelect As CAcroPDTextSelect
Dim PageNumber, PageContent, Content, i, j
Set AcroApp = CreateObject("AcroExch.App")
Set AcroAVDoc = CreateObject("AcroExch.AVDoc")
If AcroAVDoc.Open(strFileName, vbNull) <> True Then Exit Function
' The following While-Wend loop shouldn't be necessary but timing issues may occur.
While AcroAVDoc Is Nothing
Set AcroAVDoc = AcroApp.GetActiveDoc
Wend
Set AcroPDDoc = AcroAVDoc.GetPDDoc
If iPage <= AcroPDDoc.GetNumPages And iPage <> 0 Then
Set PageNumber = AcroPDDoc.AcquirePage(iPage - 1)
Set PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) <> True Then Exit Function
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
Content = Content & AcroTextSelect.GetText(j)
Next j
End If
If iPage = 99999 Then
For i = 0 To AcroPDDoc.GetNumPages - 1
Set PageNumber = AcroPDDoc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) <> True Then Exit Function
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
Content = Content & AcroTextSelect.GetText(j)
Next j
Next i
ElseIf iPage > AcroPDDoc.GetNumPages Then
Content = "Error!: The PDF file has only " & AcroPDDoc.GetNumPages & " Pages."
End If
If iPage = 0 Then Content = AcroPDDoc.GetNumPages
ReadAcrobatDocument = Content
AcroAVDoc.Close True
AcroApp.Exit
Set AcroAVDoc = Nothing: Set AcroApp = Nothing
End Function
• the PDF's full path & name; and • optionally, the page number to be retrieved If you: • omit the second parameter or give it a 0 value, you'll get just the page count; • specify 99999 as the second parameter, you'll get the text contents of the PDF; • specify a valid page #, you'll get that page's text contents; or • specify an invalid page #, you'll get an error output. You can call the function with a sub like: Code:
Sub Demo()
Dim strPDF As String, strTmp As String, i As Integer
' The next ten lines and the last line in this sub can help if
' you get "ActiveX component can't create object" errors even
' though a Reference to Acrobat is set in Tools|References.
Dim bTask As Boolean
bTask = True
If Tasks.Exists(Name:="Adobe Acrobat Professional") = False Then
bTask = False
Dim AdobePath As String, WshShell As Object
Set WshShell = CreateObject("Wscript.shell")
AdobePath = WshShell.RegRead("HKEY_CLASSES_ROOT\acrobat\shell\open\command\")
AdobePath = Trim(Left(AdobePath, InStr(AdobePath, "/") - 1))
Shell AdobePath, vbHide
End If
strPDF = ReadAcrobatDocument("C:\test\1.pdf", 0)
ActiveDocument.Range.InsertAfter strPDF
If bTask = False Then Tasks.Item("Adobe Acrobat Professional").Close
End Sub
Note2: The sub is written for Word (eg ActiveDocument), but you can change that easily enough to work with Excel or any other app.
__________________
Cheers, Paul Edstein [Fmr MS MVP - Word] |
|
|
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
error: You are attempting to save a file format that is blocked by file block setting
|
zatlas1 | Office | 8 | 04-28-2012 02:37 PM |
Error File in Word
|
gleckie | Word | 2 | 02-02-2012 07:51 AM |
File Permission Error
|
Pianoman 74 | Word | 3 | 01-26-2012 03:56 PM |
| File Conversion Error | azii | Word | 0 | 01-04-2012 04:34 AM |
| Runtime error 5487 - Word cannot complete the save to to file permission error | franferns | Word | 0 | 11-25-2009 05:35 AM |