View Single Post
 
Old 01-12-2021, 05:13 PM
Cray_Z Cray_Z is offline Windows 10 Office 2016
Novice
 
Join Date: Sep 2014
Posts: 16
Cray_Z is on a distinguished road
Default Macro to extract CCIs as separate entries when part of a text string

Previously, with your help, I was able to modify a macro that searched a document, identified entries that were formatted as "(CCI*)" and output those entries (with page numbers of where they occurred) to a table at the end of the document. For example:
(CCI: 000123)........................21,22

It works wonderfully but was wondering if there is any way to have it separate those entries that have multiple numbers i.e. (CCI: 000123, 000456, 000789). Currently the macro creates an entry like this:
(CCI: 000123, 000456, 000789)......................18-20&32

I would prefer the macro strip off the leading "(CCI: " and trailing ")" and separate the numbers i.e.
000123...........................18-20&32
000456...........................18-20&32
000789...........................18-20&32

The macro I am using:
Code:
Sub CCI_Extractor_v26()
Application.ScreenUpdating = False
 ' This macro checks the contents of a document for CCI reference numbers.
 ' These terms are then tallied and their page references output to a table at the end
 ' of the document, showing the page #s on which they occur.
 ' The number of columns for the table is determined by the lCol variable.
 ' Optional code where the output table is created allows the user to choose
 ' between an across then down or down the across table layout.
Dim Doc As Document, Rng As Range, Tbl As Table
Dim StrTerms As String, strFnd As String, StrPages As String
Dim StrOut As String, StrBreak As String, StrBkMk As String
Dim i As Long, j As Long, lCol As Long
StrPages = "": lCol = 1: StrBkMk = "_Defined_Terms": StrPages = "": StrTerms = vbCr
Set Doc = ActiveDocument
'Go through the document looking for defined terms.
With Doc.Content
  'Check whether our table exists. If so, delete it.
  If .Bookmarks.Exists(StrBkMk) Then .Bookmarks(StrBkMk).Range.Tables(1).Delete
  'Check to see if there is a page break at end of doocument.  If so delete it.
  With Doc.Characters.Last
   While .Previous.Text Like "[ " & Chr(160) & vbCr & vbTab & Chr(12) & "]"
    .Previous.Text = vbNullString
   Wend
  End With
    With .Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Format = False
    .Wrap = wdFindStop
    .MatchWholeWord = True
    .MatchWildcards = True
    .MatchCase = False
    .Execute Replace:=wdReplaceAll
    'Find CCI reference numbers.
    .Text = "\(CCI*\)" '''
    .Execute
  End With

  Do While .Find.found
    Set Rng = .Duplicate
    With Rng
      'If it's not in the StrTerms list, add it.
      If InStr(StrTerms, vbCr & .Text & vbCr) = 0 Then StrTerms = StrTerms & .Text & vbCr
    End With
    .Find.Execute
  Loop
End With
'Exit if no defined terms have been found.
If StrTerms = vbCr Then
  MsgBox "No CCI reference numbers found." & vbCr & "Aborting.", vbExclamation, "CCI extraction Error"
  GoTo ErrExit
End If

'Sort the CCI numbers
Set Rng = ActiveDocument.Range.Characters.Last
With Rng
'Start on a new blank page at end of document
  .InsertBreak Type:=wdPageBreak
  .Collapse wdCollapseEnd
  .InsertBefore vbCr
  .InsertAfter StrTerms
  .Sort ExcludeHeader:=True, FieldNumber:=1, SortFieldType:=wdSortFieldAlphanumeric, _
    SortOrder:=wdSortOrderAscending
  StrTerms = .Text
  .Text = vbNullString
    
End With
While Left(StrTerms, 1) = vbCr
  StrTerms = Mid(StrTerms, 2, Len(StrTerms) - 1)
Wend
'Build the page records for all terms in the StrTerms list.
For i = 0 To UBound(Split(StrTerms, vbCr)) - 1
  strFnd = Trim(Split(StrTerms, vbCr)(i))
  StrPages = ""
  With Doc.Content
    With .Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .Format = False
      .Text = strFnd
      .Wrap = wdFindStop
      .MatchWholeWord = True
      .MatchWildcards = False
      .MatchCase = True
      .Execute
    End With
    j = 0
    Do While .Find.found
      'If we haven't already found this CCI on this page, add it to the list.
      If j <> .Duplicate.Information(wdActiveEndAdjustedPageNumber) Then
        j = .Duplicate.Information(wdActiveEndAdjustedPageNumber)
        StrPages = StrPages & j & " "
      End If
      .Find.Execute
    Loop
    'Turn the pages list into a comma-separated string.
    StrPages = Replace(Trim(StrPages), " ", ",")
    If StrPages <> "" Then
      'Add the current record to the output list (StrOut)
      StrOut = StrOut & strFnd & vbTab & Replace(Replace(ParseNumSeq(StrPages, "&"), ",", ", "), "  ", " ") & vbCr
    End If
  End With
Next i
'Output the found terms as a table at the end of the document.
With Rng
  'Calculate the number of table lines for the data.
  j = -Int((UBound(Split(StrOut, vbCr))) / -lCol)
  Set Tbl = ActiveDocument.Tables.Add(Range:=Rng, NumRows:=j + 2, NumColumns:=lCol)
  With Tbl
         'Define the overall table layout.
    With .Range.ParagraphFormat
      .RightIndent = CentimetersToPoints(5 / lCol)
      With .TabStops
        .ClearAll
        .Add Position:=CentimetersToPoints(15 / lCol), Alignment:=wdAlignTabRight, Leader:=wdTabLeaderDots
      End With
    End With
    'Define Width of first column
    Tbl.Cell(1, 1).SetWidth _
 ColumnWidth:=InchesToPoints(8), _
 RulerStyle:=wdAdjustNone
 
    'Populate & format the header row.
    For i = 1 To lCol
       With .Cell(1, i).Range
         .ParagraphFormat.Alignment = wdAlignParagraphCenter
         .Text = "Control Correlation Identifiers for Security Controls" & vbCrLf & "CCI Number" & vbTab & "Page(s)"
        .Font.Name = "Arial"
        .Font.Size = 12
        .Font.Bold = True
      '  .Font.Underline = wdUnderlineSingle
        .ParagraphFormat.KeepWithNext = False
       End With
    Next
    With .Rows.First
      'Apply the heading row attribute so that the table header repeats after a page break.
      .HeadingFormat = True
      'Delete the header row's tab leaders.
      With .Range
        With .ParagraphFormat.TabStops
          .ClearAll
          .Add Position:=CentimetersToPoints(15 / lCol), Alignment:=wdAlignTabRight, Leader:=wdTabLeaderSpaces
        End With
        .Font.Bold = True
      End With
    End With
     For i = 0 To UBound(Split(StrOut, vbCr)) - 1
      ' Populate the data rows, down then across
      .Cell(i Mod j + 2, -Int(-(i + 1) / j)).Range.Text = Split(StrOut, vbCr)(i)
      ' Populate the data rows, across then down
      ' .Range.Cells(i + lCol + 1).Range.Text = Split(StrOut, vbCr)(i)
     Next
    'Bookmark the table.
    ActiveDocument.Bookmarks.Add Name:=StrBkMk, Range:=Tbl.Range
   
  End With
End With

'Clean up and exit.
ErrExit:
Set Rng = Nothing: Set Tbl = Nothing: Set Doc = Nothing
Application.ScreenUpdating = True
End Sub


Function ParseNumSeq(StrNums As String, Optional StrEnd As String)
'This function converts multiple sequences of 3 or more consecutive numbers in a
' list to a string consisting of the first & last numbers separated by a hyphen.
' The separator for the last sequence can be set via the StrEnd variable.
Dim ArrTmp(), i As Long, j As Long, k As Long
ReDim ArrTmp(UBound(Split(StrNums, ",")))
For i = 0 To UBound(Split(StrNums, ","))
  ArrTmp(i) = Split(StrNums, ",")(i)
Next
For i = 0 To UBound(ArrTmp) - 1
  If IsNumeric(ArrTmp(i)) Then
    k = 2
    For j = i + 2 To UBound(ArrTmp)
      If CInt(ArrTmp(i) + k) <> CInt(ArrTmp(j)) Then Exit For
      ArrTmp(j - 1) = ""
      k = k + 1
    Next
    i = j - 2
  End If
Next
StrNums = Join(ArrTmp, ",")
StrNums = Replace(Replace(Replace(StrNums, ",,", " "), ", ", " "), " ,", " ")
While InStr(StrNums, "  ")
  StrNums = Replace(StrNums, "  ", " ")
Wend
StrNums = Replace(Replace(StrNums, " ", "-"), ",", ", ")
If StrEnd <> "" Then
  i = InStrRev(StrNums, ",")
  If i > 0 Then
    StrNums = Left(StrNums, i - 1) & Replace(StrNums, ",", " " & Trim(StrEnd), i)
  End If
End If
ParseNumSeq = StrNums
End Function
Reply With Quote