View Single Post
 
Old 06-26-2011, 05:16 PM
macropod's Avatar
macropod macropod is offline Windows 7 64bit Office 2010 32bit
Administrator
 
Join Date: Dec 2010
Location: Canberra, Australia
Posts: 21,963
macropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond repute
Default

Hi flds,

Try the following.
Code:
Sub ParseDocs()
Application.ScreenUpdating = False
Dim strInFold As String, strOutFold As String, strFile As String, strOutFile As String
Dim TOC As TableOfContents, Para As Paragraph, Tbl As Table, Sctn As Section, Rng As Range
Dim DocSrc As Document, DocTxt As Document, DocTbl As Document, DocApp As Document, DocRef As Document
'Call the GetFolder Function to determine the folder to process
strInFold = GetFolder
If strInFold = "" Then Exit Sub
strFile = Dir(strInFold & "\*.doc", vbNormal)
'Check for documents in the folder - exit if none found
If strFile <> "" Then strOutFold = strInFold & "\Output"
'Test for an existing outpfolder & create one if it doesn't already exist
If Dir(strOutFold, vbDirectory) = "" Then MkDir strOutFold
strFile = Dir(strInFold & "\*.doc", vbNormal)
'Process all documents in the chosen folder
While strFile <> ""
  Set DocSrc = Documents.Open(FileName:=strInFold & "\" & strFile, AddTorecentFiles:=False, Visible:=False)
  With DocSrc
    'Delete everything before the first Table Of Contents in the source document
    If .TablesOfContents.Count <> 0 Then
      Set Rng = .TablesOfContents(1).Range
      Rng.Start = .Range.Start
      Rng.Delete
    End If
    'Delete any other Tables Of Contents in the source document
    For Each TOC In .TablesOfContents
      TOC.Delete
    Next
    'Convert all fields in the source document to plain text
    .Fields.Unlink
    'Convert all non-breaking hyphens in the source document to ordinary hyphens
    With Content.Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .Text = "^~"
      .Replacement.Text = "-"
      .Execute Replace:=wdReplaceAll
    End With
    'Check for tables in the source document
    If .Tables.Count > 0 Then
      'If there are any tables in the source document, make a copy of the document
      .Range.Copy
      ' Create a new document for the tables
      Set DocTbl = Documents.Add(Visible:=False)
      'Process the new document
      Call MakeTableDoc(DocTbl)
      'Delete all tables in the source document
      For Each Tbl In .Tables
        Tbl.Delete
      Next
    End If
    'Check for appendices in the source document
    For Each Sctn In .Sections
      If UCase(Sctn.Range.Words.First) = "APPENDIX" Then
        Set Rng = Sctn.Range
        Rng.End = .Range.End
        'Cut the from the start of the first appendices Section to the end of the
        'source document and paste it into a new appendices document
        Rng.Cut
        Set DocApp = Documents.Add(Visible:=False)
        'Process the new document
        Call NewDoc(DocApp)
        Exit For
      End If
    Next
    'Check for references in the source document
    For Each Sctn In .Sections
      If UCase(Sctn.Range.Words.First) = "REFERENCES" Then
        Set Rng = Sctn.Range
        Rng.End = .Range.End
        'Cut the from the start of the first references Section to the end of the
        'source document and paste it into a new references document
        Rng.Cut
        Set DocRef = Documents.Add(Visible:=False)
        'Process the new document
        Call NewDoc(DocRef)
        Exit For
      End If
    Next
    Call Cleanup(.Range)
    'String variable for the output filenames
    strOutFile = strOutFold & "\" & Split(.Name, ".")(0)
    'Copy whatever's left in the source document and paste it into a new text document
    .Range.Copy
    Set DocTxt = Documents.Add(Visible:=False)
    With DocTxt
      .Range.Paste
      'Save and close the text document
      .SaveAs FileName:=strOutFile & "-Text", AddTorecentFiles:=False
      .Close
    End With
    Set DocTxt = Nothing
    'Save and close the references document
    If Not DocRef Is Nothing Then
      DocRef.SaveAs FileName:=strOutFile & "-References", AddTorecentFiles:=False
      DocRef.Close
      Set DocRef = Nothing
     End If
    'Save and close the tables document
    If Not DocTbl Is Nothing Then
      DocTbl.SaveAs FileName:=strOutFile & "-Tables", AddTorecentFiles:=False
      DocTbl.Close
      Set DocTbl = Nothing
     End If
      'Save and close the appendices document
    If Not DocApp Is Nothing Then
      DocApp.SaveAs FileName:=strOutFile & "-Appendices", AddTorecentFiles:=False
      DocApp.Close
      Set DocApp = Nothing
    End If
    'Close the source document without saving the changes we've made to it
    .Close SaveChanges:=False
  End With
  strFile = Dir()
Wend
Set Rng = Nothing: Set DocSrc = Nothing
Application.ScreenUpdating = True
End Sub
 
Function GetFolder(Optional Title As String, Optional RootFolder As Variant) As String
On Error Resume Next
GetFolder = CreateObject("Shell.Application").BrowseForFolder(0, Title, 0, RootFolder).Items.Item.Path
End Function
 
Sub MakeTableDoc(DocTbl As Document)
Dim Sctn As Section, Para As Paragraph, Rng As Range
With DocTbl
  .Range.Paste
  'Delete any Sections with no tables in the tables document
  For Each Sctn In .Sections
    If Sctn.Range.Tables.Count = 0 Then
      Sctn.Range.Delete
    Else
      On Error Resume Next
      If Sctn.PageSetup.Orientation = Sctn.Range.Previous.PageSetup.Orientation Then
        Sctn.Range.Previous.Characters.Last.Delete
      End If
    End If
  Next
  'Check all paragraphs not in tables in the tables document
  For Each Para In .Paragraphs
    With Para
      Set Rng = .Range
      On Error Resume Next
      With Rng
        If .Information(wdWithInTable) = False Then
          If .Next.Paragraphs.First.Range.Information(wdWithInTable) = False Then
            'Delete any paragraphs not followed by a table in the tables document
            .Delete
          Else
            'Keep table captions, if present, and ensure there are three paragraphs
            'between tables in the tables document
            If InStr(.Style, "Table Caption") = 0 Then
              .End = .End - 1
              .Text = vbNullString
            End If
            .InsertBefore vbCr & vbCr
          End If
        End If
      End With
    End With
  Next
End With
Set Rng = Nothing
End Sub
 
Sub NewDoc(NewDoc As Document)
With NewDoc
  .Range.Paste
  Call Cleanup(.Range)
End With
End Sub
 
Sub Cleanup(Rng As Range)
  With Rng.Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Text = "^b"
    .Replacement.Text = ""
    .Execute Replace:=wdReplaceAll
  End With
End Sub
With the tables document, the modified code deletes as many Section breaks as possible without affecting the page orientation. Unless you've got mixed page sizes (eg A3 & A4) in the same document, that should go pretty close to meeting your needs. Any remaining Section breaks would have to be eyeballed to see if they can safely be deleted.

As for:
Quote:
Is it ok, if I ask for 2 more simple requirements relating to the same documents. Based on the concept as the last code. I have found one code and the other needs to be created.
Well, yes, you can ask. But you'll need to say what those requirements are before I can do anything on that front.
__________________
Cheers,
Paul Edstein
[Fmr MS MVP - Word]
Reply With Quote