I'm posting again in the hope I can resolve a couple of issues with the macro below.
Before Macro TEST DOC.docx
After Macro TEST DOC.docx
1. When converting pdfs to word, the definitions may or may not contain the word 'means' after the main bold definition text at the start of the sentence. The macro currently only looks for the word 'means' and deletes and inserts a tab. What I would like is if after the bold text and it is just a space and/or colon space without the word 'means' that the space and/or colon space is replaced with a tab otherwise the whole definition ends up in column 1.
2. If the word 'means' appears additionally in the sentence the macro is deleting the word and inserting a tab, e.g. in relation to any group company
means any other company (see attached documents). Is there something I could add to the code below to prevent this from happening?
Code:
With aRng.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "[:;, ^t]{1,5}means[:;, ]{1,5}" 'replace means with a tab
.Replacement.Text = "^t"
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
3. If the Bold Definition text is on a line on its own and sub-levels (a), (b) etc. are on the next line, the macro doesn't pick this up and puts (a) into column 1. It also happens if after the bold text the sentence starts with a sub-level (a), I really can't fathom out how to get this to work properly.
This is the whole macro:
Code:
Sub DPU_DefinitionTabulator()
Dim aRng As Range, aTbl As Table, aPara As Paragraph, aCell As Cell, oBorder As Border
Set aRng = ActiveDocument.Range
For Each aPara In aRng.Paragraphs
aPara.Range.Words.Last.Font.Reset 'remove bold from paragraph marks and autonumbers
Next aPara
With aRng.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "[:;, ^t]{1,5}means[:;, ]{1,5}" 'replace means with a tab
.Replacement.Text = "^t"
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,9}" 'multiple spaces reduced to one
.Replacement.Text = " "
.Execute Replace:=wdReplaceAll
Set aRng = ActiveDocument.Range
aRng.ListFormat.ConvertNumbersToText 'make lists hard coded
.Text = "^w^p" 'paragraphs ending with whitespace have spaces removed
.Replacement.Text = "^p"
.MatchWildcards = False
.Execute Replace:=wdReplaceAll
.Text = ".^p" 'paragraphs ending with . add semi-colon
.Replacement.Text = ";^p"
.Execute Replace:=wdReplaceAll
.Text = ".]^p" 'paragraphs ending with ] add semi-colon in front of ]
.Replacement.Text = ";]^p"
.Execute Replace:=wdReplaceAll
.Text = " ([;:,]{1,5})" 'space before punctuation removed
.Replacement.Text = "\1"
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
.Text = "^13([a-z])\)" 'make sure lists with unopened brackets a) get opening bracket (a)
.Replacement.Text = "^13(\1)"
.Execute Replace:=wdReplaceAll
.Text = "^13(?)" 'If paragraph starts non-bold
'.Replacement.Text = "|\1" 'Option: keep all paragraphs in a definition in one row
.Replacement.Text = "^p^t\1" 'Option: if you want separate rows per paragraph
.MatchWildcards = True
.Font.Bold = False
.Execute Replace:=wdReplaceAll
.Text = "([!^13])^t" 'find tabs preceded by non-bold character other than a carriage return
.Replacement.Text = "\1zzTabzz" 'replace with a replaceable unique string
.Font.Bold = False
.Execute Replace:=wdReplaceAll
End With
Set aRng = ActiveDocument.Range
Set aTbl = aRng.ConvertToTable(Separator:=wdSeparateByTabs, NumColumns:=2, AutoFitBehavior:=wdAutoFitFixed)
With aTbl
.Style = "Table Grid Light" 'choose a table style that matches your preferred table look
.ApplyStyleHeadingRows = False
.ApplyStyleLastRow = False
.ApplyStyleFirstColumn = True 'make sure the table style has a bold first column
.ApplyStyleLastColumn = False
.ApplyStyleRowBands = False
.Range.Style = "Definition Level 1"
For Each aCell In .Columns(1).Cells
aCell.Range.Style = "DefBold"
If aCell.Range.Characters.First = "[" Then
aCell.Range.Characters.First.InsertAfter Text:=""""
aCell.Range.Characters.Last.InsertBefore Text:=""""
ElseIf Len(aCell.Range.Text) > 2 Then
aCell.Range.Characters.First.InsertBefore Text:=""""
aCell.Range.Characters.Last.InsertBefore Text:=""""
End If
Next aCell
.Columns.PreferredWidthType = wdPreferredWidthPoints
.Columns.PreferredWidth = InchesToPoints(2.7)
.Columns(2).PreferredWidth = InchesToPoints(3.63)
For Each oBorder In .Borders
oBorder.LineStyle = wdLineStyleNone
Next oBorder
End With
'Reinstate the tabs and paragraphs
With aRng.Find
.ClearFormatting
.Replacement.ClearFormatting
.MatchWildcards = False
.Text = "|"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "zzTabzz"
.Replacement.Text = "^t" 'or a space if you prefer
.Execute Replace:=wdReplaceAll
End With
'Remove all local formatting
ActiveDocument.Range.Font.Reset
'Call DPU_ApplyHeadingStylesToTableTable
End Sub
Really appreciate some help if possible. Thanks, Shelley