Friday, June 22, 2012

Crude way to mass convert DOC and XLS to DOCX and XLSX

I'm trying to reduce the space usage of older files on my servers, as well as weed out documents that cause Office crashes. If you're curious what kind of gains are to be had, my observation has been on the order of 50-80% less space, per newer-format file.

I tried making my own mass conversion tool before, but didn't really have a chance to make anything of it. There are pay tools out there, but I am uncertain of their performance. Lucky me, I found some macros that did an Excel conversion: I was able to expand this with other references to handle Word docs as well. Its faster than converting each doc by hand, but could be improved still.

Suggested / planned improvements: better error-handling; converting RTF files


Macro use: copy into a new macro-enabled Excel 2007 or 2010 file. Path to process is entered into cell B1

'Mass Office XML file converter
'Prepared by: Michael Adams, unquietwiki.com, 6-22-2012

'References
'http://answers.microsoft.com/en-us/office/forum/office_2010-excel/batch-convert-xls-to-xlsx/1d9b3d78-daf0-4014-8fb2-930aca6493b0
'http://vbadud.blogspot.com/2009/08/update-word-document-with-excel.html
'http://www.exceltip.com/show_tip/Applications_-_Word,_Outlook_in_VBA/Control_Word_from_Excel_using_VBA_in_Microsoft_Excel/465.html
'http://www.maxhire.net/robo/Projects/MaxHire%20Knowledge%20Base/Files_and_Documents/docx-doc.htm
'http://patorjk.com/programming/tutorials/vbarrays.htm
'http://spreadsheetpage.com/index.php/tip/getting_a_list_of_file_names_using_vba/
'http://www.vbaexpress.com/forum/showthread.php?t=10929

Sub ConvertToOpenXML()
    ' Setup
    Dim strPath As String
    Dim wbk As Workbook
    Dim wapp As Word.Application
    Dim wdoc As Word.Document
    Dim vsArray() As String
   
    ' Get filenames
    strPath = Sheet1.Cells(1, "B").Value
    ReDim vsArray(0 To 0) As String
    Call ReturnAllFilesUsingDir(strPath, vsArray)
   
    ' Process files
    For Each strFile In vsArray()
    'Is XLS
        If LCase(Right(strFile, 3)) = "xls" Then
            Set wbk = Workbooks.Open(strFile)
            If wbk.HasVBProject Then
              wbk.SaveAs FileName:=strFile & "m", _
                FileFormat:=xlOpenXMLWorkbookMacroEnabled, AddToMru:=False
            Else
               wbk.SaveAs FileName:=strFile & "x", _
                FileFormat:=xlOpenXMLWorkbook, AddToMru:=False
            End If
            wbk.Close SaveChanges:=False
        End If
    'Is DOC
        If LCase(Right(strFile, 3)) = "doc" Then
            Set wapp = New Word.Application
            Set wdoc = Word.Documents.Open(strFile)
            If wdoc.HasVBProject Then
            wdoc.SaveAs
              wdoc.SaveAs FileName:=strFile & "m", _
                FileFormat:=wdFormatXMLDocumentMacroEnabled, AddToRecentFiles:=False
            Else
               wdoc.SaveAs FileName:=strFile & "x", _
                FileFormat:=wdFormatXMLDocument, AddToRecentFiles:=False
            End If
            wdoc.Close SaveChanges:=False
            wapp.Quit
        End If
    Next
End Sub

Function ReturnAllFilesUsingDir(ByRef strPath As String, ByRef vsArray() As String) As Boolean
    Dim tempStr As String, vDirs() As String, Cnt As Long, dirCnt As Long
    
    If Len(vsArray(0)) = 0 Then
        Cnt = 0
    Else
        Cnt = UBound(vsArray) + 1
    End If
    If Right(strPath, 1) <> "\" Then strPath = strPath & "\"
    On Error GoTo BadDir
    tempStr = Dir(strPath, 31)
    
    Do Until Len(tempStr) = 0
        If Asc(tempStr) <> 46 Then
            If GetAttr(strPath & tempStr) And vbDirectory Then
                ReDim Preserve vDirs(dirCnt)
                vDirs(dirCnt) = tempStr
                dirCnt = dirCnt + 1
            End If
BadDirGo:
        End If
        tempStr = Dir
SkipDir:
    Loop
    On Error GoTo BadFile
    tempStr = Dir(strPath, 15)
    Do Until Len(tempStr) = 0
        ReDim Preserve vsArray(Cnt)
        vsArray(Cnt) = strPath & tempStr
        Cnt = Cnt + 1
        tempStr = Dir
    Loop
     'Debug.Print Cnt
BadFileGo:
    On Error GoTo 0
    If dirCnt > 0 Then
        For dirCnt = 0 To UBound(vDirs)
            If Len(Dir(strPath & vDirs(dirCnt))) = 0 Then
                ReturnAllFilesUsingDir strPath & vDirs(dirCnt), vsArray
            End If
        Next
    End If
    Exit Function
BadDir:
    If tempStr = "pagefile.sys" Or tempStr = "???" Then
         '  Debug.Print "DIR: Skipping: " & strPath & tempStr
        Resume BadDirGo
    ElseIf Err.Number = 52 Then 'or err.number=5 then
         '  Debug.Print "No read rights: " & strPath & tempStr
        Resume SkipDir
    End If
    Debug.Print "Error with DIR (BadDir): " & Err.Number & " - " & Err.Description
    Debug.Print " strPath: " & strPath
    Debug.Print " tempStr: " & tempStr
    Exit Function
BadFile:
    If Err.Number = 52 Then 'or err.number=5 then
         '  Debug.Print "No read rights: " & strPath & tempStr
    Else
        Debug.Print "Error with DIR (BadFile): " & Err.Number & " - " & Err.Description
        Debug.Print " strPath: " & strPath
        Debug.Print " tempStr: " & tempStr
    End If
    Resume BadFileGo
End Function

No comments:

Post a Comment