View Single Post
 
Old 10-31-2014, 04:36 PM
macropod's Avatar
macropod macropod is offline Windows 7 64bit Office 2010 32bit
Administrator
 
Join Date: Dec 2010
Location: Canberra, Australia
Posts: 22,375
macropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond reputemacropod has a reputation beyond repute
Default

Try the macro below. It has a folder browser, so all you need do is to point it to the folder with your transcript files and it will process all files in that folder. The results of the processing will be output to a table in the document you run the macro from. I suggest using an empty document. I don't know what you want for your 'what' column, since a speaker could have much to say and I don't know what you want to capture. The output also includes the filename for each set of records - presently in speaker column.

In addition to the processing discussed earlier, I've added some code to do a bit of related clean-up work, such as removing tabs, double-spaces and interjections, so an accurate word count without those can be obtained.
Code:
Sub Get_Speaker_Stats()
Application.ScreenUpdating = False
Dim strFolder As String, strFile As String, strDocNm As String, wdDoc As Document
Dim i As Long, j As Long, k As Long, RngSpkr As Range
Dim StrSpkr As String, StrTmp As String, StrStats As String
strDocNm = ActiveDocument.FullName
StrStats = "Speaker" & vbTab & "Turns" & vbTab & "Words"
'Get the folder to process
strFolder = GetFolder
If strFolder = "" Then Exit Sub
strFile = Dir(strFolder & "\*.doc", vbNormal)
'Loop through all documents in the folder
While strFile <> ""
  i = 0: j = 0: k = 1
  'If it's not this document, open it
  If strFolder & "\" & strFile <> strDocNm Then
    Set wdDoc = Documents.Open(FileName:=strFolder & "\" & strFile, _
      AddToRecentFiles:=False, Visible:=False, ReadOnly:=True)
    With wdDoc
      'Store the document's name
      StrStats = StrStats & vbCr & .Name
      With .Range
        With .Find
          .ClearFormatting
          .Replacement.ClearFormatting
          .Format = False
          .Forward = True
          .Wrap = wdFindContinue
          .MatchWildcards = True
          'Delete interjections
          .Text = "[\[\(]@[!0-9]@[\)\]]{1,}"
          .Replacement.Text = ""
          .Execute Replace:=wdReplaceAll
          'Do some basic clean-up work
          .Text = "[^t^0160]"
          .Replacement.Text = " "
          .Execute Replace:=wdReplaceAll
          .Text = "[ ]{2,}"
          .Replacement.Text = " "
          .Execute Replace:=wdReplaceAll
          .Text = " ^13"
          .Replacement.Text = "^p"
          .Execute Replace:=wdReplaceAll
          'Reformat the data for tabulation
          .Text = "(\[[0-9]{2}:[0-9]{2}:[0-9]{2}\])[ ]@([A-Z]@)[. ]@([! ])"
          .Replacement.Text = "\1^t\2.^t\3"
          .Execute Replace:=wdReplaceAll
        End With
        'Convert the document to a 3-column table
        .ConvertToTable Separator:=vbTab, NumColumns:=3
        With .Tables(1)
          'Sort the table by the 2nd (speaker) column
          .Sort ExcludeHeader:=False, FieldNumber:=2, SortFieldType:=wdSortFieldAlphanumeric, SortOrder:=wdSortOrderAscending
          'add a temporary row so we get the last record
          .Rows.Add
          'Get the first speaker's details & stats for their first words
          Set RngSpkr = .Cell(1, 2).Range
          With RngSpkr
            .End = .End - 1
            StrSpkr = .Text
            If Len(.Cells(1).Next.Range.Text) > 2 Then j = j + UBound(Split(.Cells(1).Next.Range.Text, " ")) + 1
          End With
          'Process the rest of the table
          For i = 2 To .Rows.Count
            'Check who the speaker is
            Set RngSpkr = .Cell(i, 2).Range
            With RngSpkr
              .End = .End - 1
              'If it's the same speaker, update the word count
              If .Text = StrSpkr Then
                If Len(.Cells(1).Next.Range.Text) > 2 Then j = j + UBound(Split(.Cells(1).Next.Range.Text, " ")) + 1
              'Otherwise, store the stats & get details of the new speaker
              Else
                If StrSpkr <> "" Then StrStats = StrStats & vbCr & StrSpkr & vbTab & i - k & vbTab & j
                StrSpkr = .Text: j = 0: k = i
                If Len(.Cells(1).Next.Range.Text) > 2 Then j = j + UBound(Split(.Cells(1).Next.Range.Text, " ")) + 1
              End If
            End With
          Next
        End With
      End With
      'We're done with this file, so close it
      .Close SaveChanges:=False
    End With
  End If
  'Get the next file
  strFile = Dir()
Wend
Set wdDoc = Nothing: Set RngSpkr = Nothing
'Output the stats for all files
With ActiveDocument.Range
  .InsertAfter StrStats
  .ConvertToTable Separator:=vbTab, NumColumns:=3
  With .Tables(1)
    .Rows(1).HeadingFormat = True
    .Borders.Enable = True
  End With
End With
Application.ScreenUpdating = True
End Sub
 
Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function
__________________
Cheers,
Paul Edstein
[Fmr MS MVP - Word]
Reply With Quote