Thursday, December 20, 2007

Indexing Office 2007 Documents Using Windows SharePoint Services 3.0

I've been very impressed with Windows SharePoint Services 3.0 ever since I had access to it as a Beta, especially with the included Blog and Wiki capability, then with the release of the Application Templates.

The only annoying thing I've found with it is the lack of search support for Office 2007 document formats, without consuming an Office 2007 license on the server, as suggested in Knowledge Base article 944433.

Well, no more! Microsoft have finally released the Microsoft Filter Pack - a set of standalone IFilters for the following file formats: .docx, .docm, .pptx, .pptm, .xlsx, .xlsm, .xlsb, .zip, .one, .vdx, .vsd, .vss, .vst, .vdx, .vsx, and .vtx.

There is a slight catch however. You have to munge with the registry as per Knowledge Base article 946338. I needed to roll this out over a number of servers, so the manual process didn't suit me. I sat down and whipped up the following script:

@echo off
setlocal enabledelayedexpansion
set mv=0
net stop spsearch
for /f "skip=3 delims==" %%i in ('reg query "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Applications"') do set rk=%%i
for /f %%k in ('reg query "%rk%\Gather\Search\Extensions\ExtensionList" ^| findstr /b /v HKEY') do if !mv! lss %%k set mv=%%k
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "docm" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d docm
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "docx" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d docx
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "one" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d one
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "pptm" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d pptm
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "pptx" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d pptx
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "vdx" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d vdx
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "vsd" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d vsd
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "vss" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d vss
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "vst" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d vst
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "vsx" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d vsx
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "vtx" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d vtx
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "xlsb" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d xlsb
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "xlsm" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d xlsm
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "xlsx" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d xlsx
)
reg query "%rk%\Gather\Search\Extensions\ExtensionList" /f "zip" /d /e >nul
if errorlevel 1 (
set /a mv+=1
reg add "%rk%\Gather\Search\Extensions\ExtensionList" /v !mv! /t REG_SZ /d zip
)
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.docm" /ve /t REG_MULTI_SZ /d "{5A98B233-3C59-4B31-944C-0E560D85E6C3}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.docx" /ve /t REG_MULTI_SZ /d "{5A98B233-3C59-4B31-944C-0E560D85E6C3}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.one" /ve /t REG_MULTI_SZ /d "{89BCB740-6119-101A-BCB7-00DD010655AF}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.pptm" /ve /t REG_MULTI_SZ /d "{DDFE337F-4987-4EC8-BDE3-133FA63D5D85}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.pptx" /ve /t REG_MULTI_SZ /d "{DDFE337F-4987-4EC8-BDE3-133FA63D5D85}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.vdx" /ve /t REG_MULTI_SZ /d "{FAEA5B46-761B-400E-B53E-E805A97A543E}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.vsd" /ve /t REG_MULTI_SZ /d "{FAEA5B46-761B-400E-B53E-E805A97A543E}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.vss" /ve /t REG_MULTI_SZ /d "{FAEA5B46-761B-400E-B53E-E805A97A543E}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.vst" /ve /t REG_MULTI_SZ /d "{FAEA5B46-761B-400E-B53E-E805A97A543E}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.vsx" /ve /t REG_MULTI_SZ /d "{FAEA5B46-761B-400E-B53E-E805A97A543E}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.vtx" /ve /t REG_MULTI_SZ /d "{FAEA5B46-761B-400E-B53E-E805A97A543E}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.xlsb" /ve /t REG_MULTI_SZ /d "{312AB530-ECC9-496E-AE0E-C9E6C5392499}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.xlsm" /ve /t REG_MULTI_SZ /d "{F90DFE0C-CBDF-41FF-8598-EDD8F222A2C8}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.xlsx" /ve /t REG_MULTI_SZ /d "{F90DFE0C-CBDF-41FF-8598-EDD8F222A2C8}" /f
reg add "HKLM\SOFTWARE\Microsoft\Shared Tools\Web Server Extensions\12.0\Search\Setup\ContentIndexCommon\Filters\Extension\.zip" /ve /t REG_MULTI_SZ /d "{20E823C2-62F3-4638-96BD-90F4F6784EBC}" /f
net start spsearch
stsadm -o spsearch -action fullcrawlstart


I put this into a file called register-ifilters.cmd and ran it after installing the Filter Pack on my WSS 3.0 boxes. It's not a fantastic script, but it does the job.



Please note that if you've got a lot of documents in your WSS 3.0 content databases, then you may want to schedule the fullcrawlstart command at a later stage, rather than run it as part of the IFilter registration.

1 comment:

Jeff said...

These steps worked for you?

I found your post while trying to get my WSS 3.0 to index 2007 files.

I've followed the guides but I still cannot get it to work. I just wanted to verify after you install the filter and ran your script searching in 2007 docs actually worked.