For your revised 'original document', try:
Code:
Sub ReformatDocument()
Application.ScreenUpdating = False
Dim i As Long, j As Long, k As Long, l As Long, x As Long, Str As String
Dim RngDoc As Range, RngTmp As Range, RngVrs As Range, RngCmt As Range
Dim bQuot As Boolean, SBar As Boolean, oPara As Paragraph
bQuot = Options.AutoFormatAsYouTypeReplaceQuotes
Options.AutoFormatAsYouTypeReplaceQuotes = False
SBar = Application.DisplayStatusBar
Application.DisplayStatusBar = True
Set RngDoc = ActiveDocument.Range
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Format = True
.Font.Italic = True
.MatchWildcards = True
.Text = "^13([!0-9]*^13)"
.Replacement.Text = " ^l\1"
.Execute Replace:=wdReplaceAll
.Format = False
.Text = "[ ]@^13"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2;}"
.Replacement.Text = " "
.Execute Replace:=wdReplaceAll
.Text = "\[[0-9]@\]"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchWildcards = True
.Execute
End With
Do While .Find.Found
With .Duplicate
If .Words.First.Previous.Font.Italic = True Then
.Paragraphs.First.Range.Font.Italic = True
End If
End With
.Collapse wdCollapseEnd
.Find.Execute
Loop
End With
DoEvents
With RngDoc
Str = .Paragraphs.First.Range.Text
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Format = False
.MatchWildcards = False
.Text = Str
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
End With
For Each oPara In .Paragraphs
With oPara.Range
If InStr(.Text, "HOOFDSTUK.") > 0 Then l = l + 1
If .Words.First.Font.Italic = True Then
If IsNumeric(.Words.First) Then
Set RngVrs = oPara.Range
j = RngVrs.Words.First
StatusBar = "Sorting Verse & Comments for " & l & ":" & j
Set RngCmt = Nothing
Set RngTmp = oPara.Range
With RngTmp
.Collapse wdCollapseEnd
.End = RngDoc.End
End With
For k = 1 To RngTmp.Paragraphs.Count
With RngTmp.Paragraphs(k).Range
If .Words.First.Font.ColorIndex = wdRed Then
If IsNumeric(.Words.First) Then
If .Words.First = j Then
Set RngCmt = .Paragraphs(1).Range
With RngCmt
Do While Not IsNumeric(.Paragraphs.Last.Next.Range.Characters.First)
.MoveEnd wdParagraph, 1
If .End = RngDoc.End Then Exit Do
If .Paragraphs.Last.Next.Range.Text = UCase(.Paragraphs.Last.Next.Range.Text) Then Exit Do
Loop
End With
End If
Exit For
End If
End If
End With
Next
If Not RngCmt Is Nothing Then
RngVrs.Collapse wdCollapseEnd
RngVrs.FormattedText = RngCmt.FormattedText
With RngCmt
If .End = RngDoc.End Then .End = .End - 1
.Delete
End With
RngVrs.InsertAfter "</content>" & vbCr
End If
RngVrs.InsertAfter " </vers>" & vbCr
End If
End If
End With
Next
DoEvents
Str = "Adding tags. Please Wait •"
StatusBar = Str
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchWildcards = True
.Text = " ^l"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "[^13]{2;}"
.Replacement.Text = "^p"
'Str = Str & " •"
'StatusBar = Str
.Execute Replace:=wdReplaceAll
'.Text = "([A-Z0-9 ]@)^13[A-Z. ]@([0-9]@)^13"
'.Replacement.Text = "<boek titel=""\1"">^p <hoofdstuk number=""\2"">^p"
.Text = "[A-Z. ]@([0-9]@)^13"
.Replacement.Text = " <hoofdstuk number=""\1"">^p"
Str = Str & " •"
StatusBar = Str
.Execute Replace:=wdReplaceAll
.Text = "([0-9]@)[!0-9A-Z]@[A-Z]@.^13"
.Replacement.Text = " <hoofdstuk number=""\1"">^p"
Str = Str & " •"
StatusBar = Str
.Execute Replace:=wdReplaceAll
.Format = True
.Font.Italic = True
.Text = "([0-9]@).(*)^13"
.Replacement.Text = " <vers number=""\1"">^p <title number=""\1"">\2^p</title>^p"
Str = Str & " •"
StatusBar = Str
.Execute Replace:=wdReplaceAll
.Format = False
.Text = "^13([0-9]@)(.*)^13"
.Replacement.Text = "^p <content number=""#"">\1\2^p"
Str = Str & " •"
StatusBar = Str
.Execute Replace:=wdReplaceAll
.Text = "^13 \<hoofdstuk number"
.Replacement.Text = "^p</hoofdstuks>^p</hoofdstuk>^&"
Str = Str & " •"
StatusBar = Str
.Execute Replace:=wdReplaceAll
.Wrap = wdFindStop
.Text = "hoofdstuk number=""<[0-9]@>"
.Replacement.Text = ""
.Execute
End With
DoEvents
Str = Str & " •"
StatusBar = Str
Do While .Find.Found
i = Split(.Text, Chr(34))(1)
.Collapse wdCollapseEnd
Do While InStr(.Paragraphs.Last.Next.Range.Text, "<hoofdstuk number=") = 0
.MoveEnd wdParagraph, 1
If .End = ActiveDocument.Range.End Then Exit Do
Loop
With .Duplicate.Find
.Wrap = wdFindStop
.Text = "(content number="")#"
.Replacement.Text = "\1" & i
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
End With
.Collapse wdCollapseEnd
.Find.Execute
Loop
.Collapse wdCollapseEnd
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Forward = False
.Text = "\</vers\>"
.Replacement.Text = ""
.Wrap = wdFindStop
.Format = False
.MatchWildcards = True
.Execute
End With
.End = ActiveDocument.Range.End
.Text = "</vers>" & vbCr & "</hoofdstuk>" & vbCr & "</hoofdstuks>" & vbCr & "</book>"
End With
With ActiveDocument
With .Styles(wdStyleNormal)
With .ParagraphFormat
.SpaceBefore = 0
.SpaceAfter = 0
.Space1
End With
.Font.Name = "Courier New"
End With
With .Range
.InsertBefore "<book>" & vbCr & "<book_content>" & vbCr & "<hoofdstuks>" & vbCr
.Style = wdStyleNormal
.Font.Reset
.ParagraphFormat.Reset
End With
End With
Options.AutoFormatAsYouTypeReplaceQuotes = bQuot
StatusBar = ""
MsgBox "Done!"
Application.ScreenUpdating = True
End Sub
Note that the macro now also gives a progress report on the status bar.
In most cases, your document containes chapter numbers expressed like 'HOOFDSTUK. 1' or '2de HOOFDSTUK.', but you also have one expressed as 'HOOFDSTUK XIII.' The macro does not attempt to reformat the last kind - it would take a lot of extra programming to process chapters using Roman Numerals. You should make sure they're all expressed in one of the first two forms before running the macro.
I don't know what you expect to be done with your Haggai document - it isn't formatted anything like your revised 'original document'. I don't propose to work out the programming requirements for a plethora of different formats. You should make sure all the documents are formatted the same way as your revised 'original document'.