Hi flds,
Try the following:
Code:
Sub ParseDocs()
Application.ScreenUpdating = False
Dim strInFold As String, strOutFold As String, strFile As String, strOutFile As String
Dim TOC As TableOfContents, Para As Paragraph, Tbl As Table, Sctn As Section, Rng As Range
Dim DocSrc As Document, DocTxt As Document, DocTbl As Document, DocApp As Document
strInFold = GetFolder
If strInFold = "" Then Exit Sub
strFile = Dir(strInFold & "\*.doc", vbNormal)
If strFile <> "" Then strOutFold = strInFold & "\Output"
If Dir(strOutFold, vbDirectory) = "" Then MkDir strOutFold
strFile = Dir(strInFold & "\*.doc", vbNormal)
While strFile <> ""
Set DocSrc = Documents.Open(FileName:=strInFold & "\" & strFile, AddTorecentFiles:=False, Visible:=False)
With DocSrc
For Each TOC In .TablesOfContents
TOC.Delete
Next
.Fields.Unlink
If .Tables.Count > 0 Then
.Range.Copy
Set DocTbl = Documents.Add(Visible:=False)
With DocTbl
.Range.Paste
For Each Para In .Paragraphs
With Para.Range
On Error Resume Next
If .Information(wdWithInTable) = False Then
If .Next.Paragraphs.First.Range.Information(wdWithInTable) = False Then
.Delete
Else
.Text = vbCr
End If
End If
End With
Next
Call Cleanup(.Range)
End With
For Each Tbl In .Tables
Tbl.Delete
Next
For Each Sctn In .Sections
If UCase(Sctn.Range.Words.First) = "APPENDIX" Then
Set Rng = Sctn.Range
Rng.End = .Range.End
Rng.Cut
Set DocApp = Documents.Add(Visible:=False)
With DocApp
.Range.Paste
Call Cleanup(.Range)
End With
Exit For
End If
Next
Call Cleanup(.Range)
End If
strOutFile = strOutFold & "\" & Split(.Name, ".")(0)
.Range.Copy
Set DocTxt = Documents.Add(Visible:=False)
With DocTxt
.Range.Paste
.SaveAs FileName:=strOutFile & "-Text", AddTorecentFiles:=False
.Close
End With
Set DocTxt = Nothing
If Not DocTbl Is Nothing Then
DocTbl.SaveAs FileName:=strOutFile & "-Tables", AddTorecentFiles:=False
DocTbl.Close
Set DocTbl = Nothing
End If
If Not DocApp Is Nothing Then
DocApp.SaveAs FileName:=strOutFile & "-Appendices", AddTorecentFiles:=False
DocApp.Close
Set DocApp = Nothing
End If
.Close SaveChanges:=False
End With
strFile = Dir()
Wend
Set DocSrc = Nothing
Application.ScreenUpdating = True
End Sub
Function GetFolder(Optional Title As String, Optional RootFolder As Variant) As String
On Error Resume Next
GetFolder = CreateObject("Shell.Application").BrowseForFolder(0, Title, 0, RootFolder).Items.Item.Path
End Function
Sub Cleanup(Rng As Range)
With Rng.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "^b"
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^~"
.Replacement.Text = "-"
.Execute Replace:=wdReplaceAll
End With
End Sub
For Appendix processing, the code relies on there being a Section break immediately before the first paragraph in that Section, whose first word is 'Appendix'. If that's not how your documents are structured (you haven't said anything in reply to my question about that), a different approach will be needed.
The code includes folder navigation and code to produce an 'Output' folder below that for holding the output files. All documents in the selected folder will be processed automatically.