The following macro generates a list of all words used in the active document, and outputs them alphabetically sorted, with frequency of occurrence in a table at the end of that document, starting on a new page. The page Nos on which the words occur may also be included in the output. The macro only lists words in the MainTextStory, not headers/footers/ footnotes/endnotes etc. It also has provision for an exclusion list, so that various words and phrases can be excluded. The exclusions list (defined by the words & phrases in the StrExcl string variable) lists the words and phrases to be omitted from the concordance. Any phrases should be listed in the exclusions list before any of the single-word exclusions (so that conflicts don’t occur).
Code:
Sub ConcordanceBuilder()
Application.ScreenUpdating = False
Dim StrIn As String, StrOut As String, StrTmp As String, StrExcl As String
Dim i As Long, j As Long, k As Long, l As Long, Rng As Range, RngTmp As Range, Pgs As Long
'Define the exlusions list
StrExcl = "-,a,am,an,and,are,as,at,b,be,but,by,c,can,cf,cm,d,did,do,does,e,eg," & _
"e.g,en,eq,etc,f,for,g,get,go,got,h,has,have,he,he's,her,her's" & _
"him,his,how,i,ie,i.e,if,i'm,in,into,is,it,its,j,k,l,m,me,mi,mm,my," & _
"n,na,nb,no,not,o,of,off,ok,on,one,or,our,out,p,q,r,re,s,she,she's,so," & _
"t,that,the,their,them,they,they're,this,those,they're,to,u,v,via," & _
"vs,w,was,we,we're,were,who,will,with,would,x,y,yd,you,your,your's,z"
With ActiveDocument
'Get the document's text
StrIn = .Content.Text
'Strip out unwanted characters. Amongst others, hyphens and formatted single quotes are retained at this stage
For i = 1 To 255
Select Case i
Case 1 To 31, 33 To 44, 46 To 64, 91 To 96, 123 To 144, 147 To 191, 247
While InStr(StrIn, Chr(i)) > 0
StrIn = Replace(StrIn, Chr(i), " ")
Wend
End Select
Next
DoEvents
'Delete any periods or commas at the end of a word.
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(44) & Chr(32), " "), Chr(44) & vbCr, " "), Chr(46) & Chr(32), " "), Chr(46) & vbCr, " ")
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
'Convert to lowercase
StrIn = " " & LCase(Trim(StrIn)) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl, ","))
While InStr(StrIn, " " & Split(StrExcl, ",")(i) & " ") > 0
StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
Wend
Next
DoEvents
'Clean up any duplicate spaces
While InStr(StrIn, " ") > 0
StrIn = Replace(StrIn, " ", " ")
Wend
StrIn = " " & Trim(StrIn) & " "
j = UBound(Split(StrIn, " "))
l = j
For i = 1 To j
'Find how many occurences of each word there are in the document
StrTmp = Split(StrIn, " ")(1)
While InStr(StrIn, " " & StrTmp & " ") > 0
StrIn = Replace(StrIn, " " & StrTmp & " ", " ")
Wend
'Calculate the number of words replaced
k = l - UBound(Split(StrIn, " "))
'Update the output string
StrOut = StrOut & StrTmp & vbTab & k & vbCr
l = UBound(Split(StrIn, " "))
If l = 1 Then Exit For
DoEvents
Next
StrIn = StrOut
StrOut = ""
Pgs = MsgBox("Get Page Refs? This may take a while.", vbYesNo)
If Pgs = vbYes Then
For i = 0 To UBound(Split(StrIn, vbCr)) - 1
StrTmp = ""
With .Range
With .Find
.ClearFormatting
.Text = Split(Split(StrIn, vbCr)(i), vbTab)(0)
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWholeWord = True
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Execute
End With
Do While .Find.Found
StrTmp = StrTmp & " " & .Information(wdActiveEndPageNumber)
Set RngTmp = .Duplicate
Set RngTmp = RngTmp.GoTo(What:=wdGoToBookmark, Name:="\page")
.Start = RngTmp.End
.Find.Execute
Loop
End With
StrTmp = Replace(Trim(StrTmp), " ", ",")
StrOut = StrOut & Split(StrIn, vbCr)(i) & vbTab & StrTmp & vbCr
DoEvents
Next
Else
StrOut = StrIn
End If
StrOut = "Word" & vbTab & "Count" & vbTab & "Page" & vbCr & StrOut
'Create the concordance table on a new last page
Set Rng = .Range.Characters.Last
With Rng
.Text = vbCr & Chr(12) & vbCr & StrOut
.Start = .Start + 3
.ConvertToTable Separator:=vbTab, Numcolumns:=3
With .Tables(1)
.Sort Excludeheader:=True, FieldNumber:=1, SortFieldType:=wdSortFieldAlphanumeric, _
SortOrder:=wdSortOrderAscending, CaseSensitive:=False
If Pgs = vbNo Then .Columns(3).Delete
.Rows(1).Range.Font.Bold = True
.Rows(1).HeadingFormat = True
End With
End With
End With
Application.ScreenUpdating = True
End Sub
The above code strips out trailing apostrophes (which are difficult to differentiate from enclosing single quotes), with the result that some possessive word forms in the table might look a bit odd or be treated as simple plurals, but that's a small price to pay.
For PC macro installation & usage instructions, see: Installing Macros
For Mac macro installation & usage instructions, see: Word:mac - Install a Macro
__________________
Cheers,
Paul Edstein
[Fmr MS MVP - Word]
|