View Single Post
 
Old 06-19-2011, 07:21 PM
macropod's Avatar
macropod macropod is offline Windows 7 64bit Office 2007
Administrator
 
Join Date: Dec 2010
Location: Canberra, Australia
Posts: 21,956
macropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond repute
Default

Hi flds,

Try the following:
Code:
Sub ParseDocs()
Application.ScreenUpdating = False
Dim strInFold As String, strOutFold As String, strFile As String, strOutFile As String
Dim TOC As TableOfContents, Para As Paragraph, Tbl As Table, Sctn As Section, Rng As Range
Dim DocSrc As Document, DocTxt As Document, DocTbl As Document, DocApp As Document
strInFold = GetFolder
If strInFold = "" Then Exit Sub
strFile = Dir(strInFold & "\*.doc", vbNormal)
If strFile <> "" Then strOutFold = strInFold & "\Output"
If Dir(strOutFold, vbDirectory) = "" Then MkDir strOutFold
strFile = Dir(strInFold & "\*.doc", vbNormal)
While strFile <> ""
  Set DocSrc = Documents.Open(FileName:=strInFold & "\" & strFile, AddTorecentFiles:=False, Visible:=False)
  With DocSrc
    For Each TOC In .TablesOfContents
      TOC.Delete
    Next
    .Fields.Unlink
    If .Tables.Count > 0 Then
      .Range.Copy
      Set DocTbl = Documents.Add(Visible:=False)
      With DocTbl
        .Range.Paste
        For Each Para In .Paragraphs
          With Para.Range
            On Error Resume Next
            If .Information(wdWithInTable) = False Then
              If .Next.Paragraphs.First.Range.Information(wdWithInTable) = False Then
                .Delete
              Else
                .Text = vbCr
              End If
            End If
          End With
        Next
        Call Cleanup(.Range)
      End With
      For Each Tbl In .Tables
        Tbl.Delete
      Next
      For Each Sctn In .Sections
        If UCase(Sctn.Range.Words.First) = "APPENDIX" Then
          Set Rng = Sctn.Range
          Rng.End = .Range.End
          Rng.Cut
          Set DocApp = Documents.Add(Visible:=False)
          With DocApp
            .Range.Paste
            Call Cleanup(.Range)
          End With
          Exit For
        End If
      Next
      Call Cleanup(.Range)
    End If
    strOutFile = strOutFold & "\" & Split(.Name, ".")(0)
    .Range.Copy
    Set DocTxt = Documents.Add(Visible:=False)
    With DocTxt
      .Range.Paste
      .SaveAs FileName:=strOutFile & "-Text", AddTorecentFiles:=False
      .Close
    End With
    Set DocTxt = Nothing
    If Not DocTbl Is Nothing Then
      DocTbl.SaveAs FileName:=strOutFile & "-Tables", AddTorecentFiles:=False
      DocTbl.Close
      Set DocTbl = Nothing
     End If
    If Not DocApp Is Nothing Then
      DocApp.SaveAs FileName:=strOutFile & "-Appendices", AddTorecentFiles:=False
      DocApp.Close
      Set DocApp = Nothing
    End If
    .Close SaveChanges:=False
  End With
  strFile = Dir()
Wend
Set DocSrc = Nothing
Application.ScreenUpdating = True
End Sub
 
Function GetFolder(Optional Title As String, Optional RootFolder As Variant) As String
On Error Resume Next
GetFolder = CreateObject("Shell.Application").BrowseForFolder(0, Title, 0, RootFolder).Items.Item.Path
End Function
 
Sub Cleanup(Rng As Range)
  With Rng.Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Text = "^b"
    .Replacement.Text = ""
    .Execute Replace:=wdReplaceAll
    .Text = "^~"
    .Replacement.Text = "-"
    .Execute Replace:=wdReplaceAll
  End With
End Sub
For Appendix processing, the code relies on there being a Section break immediately before the first paragraph in that Section, whose first word is 'Appendix'. If that's not how your documents are structured (you haven't said anything in reply to my question about that), a different approach will be needed.

The code includes folder navigation and code to produce an 'Output' folder below that for holding the output files. All documents in the selected folder will be processed automatically.
__________________
Cheers,
Paul Edstein
[Fmr MS MVP - Word]
Reply With Quote