I had a lucky break and found myself with some time.
The code below processes your test document without stopping.
There are several references that it identified as malformed. Typically this is because the - in the reference in an em dash rather than a hyphen.
I've changed the code so that you don't have to include the authorities in the code itself. Instead just create a document called Authorities.docx. Place a comma seperated list of the authorities in the document and save it in the same directory as document you want to process.
This is the list I ended up with from you post above.
Quote:
1Chronicles,1Corinthians,1John,1Kings,1Peter,1Samu el,1Thessalonians,1Timothy,2Chronicles,2Corinthian s,2John,2Kings,2Peter,2Samuel,2Thessalonians,2Timo thy,3John,Acts,John,Amos,Colossians,Daniel,Deutero nomy,Ecclesiastes,Ephesians,Esther,Exodus,Ezekiel, Ezra,Galatians,Genesis,Habakkuk,Haggai,Hebrews,Hos ea,Isaiah,James,Jeremiah,Job,Joel,John,Jonah,Joshu a,Jude,Judges,Lamentations,Leviticus,Luke,Malachi, Mark,Matthew,Micah,Nahum,Nehemiah,Numbers,Obadiah, Philemon,Philippians,Proverbs,Psalms,Revelation,Ro mans,Ruth,Titus,Zechariah,Zephaniah.
|
You'll note that it has been sorted into alphabetical order. John1, 2John etc are processed before John. If its the other way around then 1John is interpreted as only John.
If you want to change references such as 1John to John 1 the code will accept this as written but it depends on how your hyperlink targets are structured.
I had some trouble with John as an authority as it was like the code ignored the presence of the name, hence the reason for it appearing twice in my list.
Here's the updated code
Code:
Option Explicit
' Note the use of module scope constants and variable
' Constants used with the authorities and authority_ref arrays
Const NAME As Long = 0
Const CHAPTER As Long = 1
Const VERSE_1 As Long = 2
Const VERSE_2 As Long = 3
Const NULL_REFERENCE As String = ",,,"
Const DO_NOT_SAVE As Boolean = False
' Constants representing characters
Const SPACE As String = " "
Const COMMA As String = ","
Const SEMI_COLON As String = ";"
Const HYPHEN As String = "-"
Const USCORE As String = "_"
Const LBRACKET As String = "("
Const RBRACKET As String = ")"
Const COLON As String = ":"
Const POINT As String = "."
'Constants used with cset Note cset doesn't support ranges e.g."[0-9]"
Const IS_A_NUMBER As String = "0123456789"
Const END_OF_REFERENCE As String = vbLf & vbCr & "[ ),.;]"
'Constants used when forming hyperlinks
Const FULL_HYPERLINK As Boolean = True
Const VERSE_ONLY_HYPERLINK As Boolean = False
Const AUTHORITY_PLACEHOLDER As String = "<authority>"
Const PATH_TO_REFERENCES As String = "W:\Skydrive\Docs\<authority>.htm#chpt_"
Private authority_ref() As String
Private found_strange_ref As Boolean
Sub main()
Dim authorities() As String
Dim authority As Variant
found_strange_ref = False
populate_list_of_authorities authorities
authority_ref = Split(NULL_REFERENCE, COMMA)
For Each authority In authorities
'authority is a variant but the sub argument is a string hence the cast
authority_ref(NAME) = authority
insert_authority_hyperlinks
Next
If found_strange_ref Then
MsgBox "Malformed references were found. These have been highlighted in Red", vbOKOnly
End If
End Sub
Sub populate_list_of_authorities(ByRef list() As String)
' Reads in the list of Authorities from a word document called 'authorities.docx'
'
Dim list_text As String
Dim authority_doc As Word.Document
Set authority_doc = Documents.Open(ActiveDocument.Path & "\" & "Authorities.docx")
list_text = authority_doc.Paragraphs(1).range.text
authority_doc.Close DO_NOT_SAVE
' Process list text to get a standard representation
' e.g. This " John , Luke, Samuel 1, Revelations ,Genesis,."
' to this "John,Luke,Samuel 1,Revelations,Genesis"
list_text = Trim$(list_text)
list_text = replace(list_text, SPACE & SPACE, SPACE)
list_text = replace(list_text, SPACE & COMMA, COMMA)
list_text = replace(list_text, COMMA & SPACE, COMMA)
list_text = replace(list_text, COMMA & COMMA, COMMA)
list_text = replace(list_text, SEMI_COLON, COMMA)
list_text = replace(list_text, COLON, COMMA)
list_text = replace(list_text, HYPHEN, COMMA)
Do While Not Right(list_text, 1) Like "[0-9a-zA-Z]"
list_text = Left(list_text, Len(list_text) - 1)
Loop
list = Split(list_text, COMMA)
End Sub
Sub insert_authority_hyperlinks()
' The search text means the following
' Search terms have been split into a number of terms using ()
'(<authority>) find text starting with the reference authority
'(*) select all characters until the character specified in the next () field
'(:) include the colon in the selection
'(*) select all characters until the character specified in the next field is found
'([ ,-;]) select all characters until we find a space, comma, hyphen or semicolon
Const PRIMARY_SEARCH_TEXT As String = "(<authority>)( @)([0-9]{1,})(:)([0-9]{1,})"
Dim rng As Word.range
Dim end_of_rng As Long
Set rng = ActiveDocument.StoryRanges(wdMainTextStory)
Do
With rng.find
.text = replace(PRIMARY_SEARCH_TEXT, AUTHORITY_PLACEHOLDER, authority_ref(NAME))
.MatchWildcards = True
.Forward = True
.Format = True
.Font.ColorIndex = ActiveDocument.Styles(wdStyleNormal).Font.ColorIndex
.Wrap = wdFindStop
.Execute
If .Found Then
process_single_hyperlink rng, FULL_HYPERLINK
End If
End With
Loop Until Not rng.find.Found
End Sub
Function process_single_hyperlink(rng As Word.range, hyperlink_type As Boolean)
Dim last_char As String
last_char = rng.Next(unit:=wdCharacter, Count:=1)
rng.Select
Select Case last_char
Case HYPHEN
' the reference isn't complete so we need to extend rng and then
' re-enter this sub
extend_hyphen_reference_to_full_range rng
process_single_hyperlink rng, hyperlink_type
Case COMMA
' A comma at the end of a reference implies a following short form reference
populate_authority_ref rng
insert_hyperlink rng, FULL_HYPERLINK
If move_range_to_following_short_form_ref(rng) Then
' Don't provide any message if we don't find a short form ref
process_single_hyperlink rng, VERSE_ONLY_HYPERLINK
End If
Case vbLf, vbCr, SPACE, SEMI_COLON, COLON, POINT, RBRACKET
populate_authority_ref rng
insert_hyperlink rng, hyperlink_type
Case Else
rng.HighlightColorIndex = wdRed
found_strange_ref = True
End Select
End Function
Sub extend_hyphen_reference_to_full_range(rng As Word.range)
rng.MoveEndUntil cset:=IS_A_NUMBER
rng.MoveEndWhile cset:=IS_A_NUMBER
End Sub
Function move_range_to_following_short_form_ref(rng As Word.range) As Boolean
' we assume that the follow on range is quite close to avoid the error
' of there actually not being a following short form reference
If rng.MoveStartUntil(cset:=IS_A_NUMBER, Count:=4) Then
rng.MoveEndWhile cset:=IS_A_NUMBER
move_range_to_following_short_form_ref = True
Else
move_range_to_following_short_form_ref = False
End If
End Function
Sub populate_authority_ref(rng As Word.range)
' If the text of the range does not contain a colon then the reference is a short form
' and consequently we only need to populate VERSE_1
If InStr(rng.text, COLON) Then
authority_ref(NAME) = get_substr(rng.text, SPACE, 0)
authority_ref(CHAPTER) = get_substr(get_substr(rng.text, SPACE, 1), COLON, 0)
authority_ref(VERSE_1) = get_substr(get_substr(rng.text, COLON, 1), HYPHEN, 0)
If InStr(rng.text, HYPHEN) > 0 Then
authority_ref(VERSE_2) = get_substr(get_substr(rng.text, COLON, 1), HYPHEN, 1)
Else
authority_ref(VERSE_2) = vbNullString
End If
Else
authority_ref(VERSE_1) = rng.text
End If
End Sub
Function get_substr(text, seperator, index) As String
Dim list() As String
If index < 0 Then
get_substr = vbNullString
ElseIf InStr(text, seperator) = 0 Then
get_substr = text
Exit Function
End If
list = Split(text, seperator)
If UBound(list) < index Then
get_substr = vbNullString
Exit Function
End If
get_substr = list(index)
End Function
Sub insert_hyperlink(reference As Word.range, hyperlink_type As Boolean)
Dim range_len As Long
range_len = reference.Characters.Count
ActiveDocument.Hyperlinks.Add _
Anchor:=reference, _
Address:= _
replace(PATH_TO_REFERENCES, AUTHORITY_PLACEHOLDER, authority_ref(NAME)) _
& authority_ref(CHAPTER) _
& "_" _
& authority_ref(VERSE_1), _
TextToDisplay:=IIf(hyperlink_type, reference.text, authority_ref(VERSE_1))
reference.MoveStart unit:=wdCharacter, Count:=range_len
End Sub
I have checked portions of you document to make sure that everything was being captured but it is rather long so I'll leave it to yourself to do a detailed checking.
Incidentally, it would now be a trivial exercise to compile a dictionary of dictionaries in case you wanted to do any further processing with the hyperlinks. e.g. Print a list of references, list of hyperlinks, count the references for each authority etc.
Let me know how it goes for you.
|