Я не знаю инструментов, которые экспортируют строки как двоичные .В большинстве случаев это действительно не нужно.
Если вам нужно получить двоичную информацию о строках, вы можете использовать IStorage::OpenStream
, IStream::Stat
с параметром STATFLAG_NONAME
и IStream::Read
для чтения информации изMSI напрямую.Информация об укусах сохраняется в потоках с именами «_StringData» и «_StringPool».Имена потоков просто кодируются.Если у вас есть интерес, я мог бы опубликовать вам код, который показывает, как декодировать имена.
ОБНОВЛЕНО : я подготовил небольшое демо из моей старой утилиты.Демонстрация загружает строки из «_StringData» и «_StringPool» и выгружает информацию в читаемом формате.Если вы отрегулируете константы в строке
bSuccess = LoadStringPool (pStg, TRUE, 80, 10, 10);
(см. Ниже), вы можете получить более полную информацию.Таким же образом вы можете легко изменить код, чтобы сохранить соответствующие потоки в файле в двоичном виде.
Код C, который вы найдете ниже
#define STRICT
#define _WIN32_WINNT 0x501
#include <stdio.h>
#include <windows.h>
#include <ShLwApi.h> // for wnsprintf
#include <malloc.h> // for _alloca
#include <lmerr.h>
#include <tchar.h>
// IPropertyUI in <ShObjIdl.h>
//#include <msi.h>
#define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
#define CONST_STR_LEN(s) (ARRAY_SIZE(s) - 1)
#pragma comment (lib, "ole32.lib")
#pragma comment (lib, "ShLwApi.lib")
typedef struct tagMSISTRINGTABLE {
UINT cStrings;
LPWSTR pszStringData; // have '\0' bytes between strings
LPWSTR *ppszStringPool; // array of pointers to the corresponding string in pszStringData data block
WORD cbStringIdSize; // size of StringId in all tables in bytes. Typically if cStrings<32K, cbStringIdSize=2, then 3 or more.
// cbStringIdSize value will be calculated based on first bytes of _StringPool stream.
MSISTRINGTABLE g_StringTable = {0, NULL, NULL, 2};
#define MIDL_DEFINE_GUID(type,name,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) const type name = {l,w1,w2,{b1,b2,b3,b4,b5,b6,b7,b8}}
MIDL_DEFINE_GUID (CLSID, CLSID_MsiTransform, 0x000c1082, 0x0000, 0x0000, 0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46); //.mst
MIDL_DEFINE_GUID (CLSID, CLSID_MsiDatabase, 0x000c1084, 0x0000, 0x0000, 0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46); //.msi, .msm
MIDL_DEFINE_GUID (CLSID, CLSID_MsiPatch, 0x000c1086, 0x0000, 0x0000, 0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46); //.msp
void DisplayErrorMessage (DWORD dwErrorCode, LPCTSTR pszTemplate, ...)
va_list pa;
TCHAR szText[1024]; // 1024 is the maximum which wsprintf and wvsprintf support
LPTSTR pErrorString;
LPCTSTR pszErrorDll = NULL;
//DWORD dwFacility = HRESULT_FACILITY(dwErrorCode);
va_start (pa, pszTemplate);
wvnsprintf (szText, ARRAY_SIZE(szText), pszTemplate, pa);
va_end (pa);
// Choose default Error DLL
dwErrorCode = HRESULT_CODE(dwErrorCode);
pszErrorDll = TEXT("UrlMon.dll");
else if (HRESULT_FACILITY(dwErrorCode) == FACILITY_INTERNET && dwErrorCode > 0xC00CE000L && dwErrorCode < 0xC00CE5FFL)
pszErrorDll = TEXT("msxmlr.dll"); // TEXT("msxmlr4.dll");
pszErrorDll = TEXT("MQUtil.dll");
else if (dwErrorCode >= NERR_BASE && dwErrorCode <= MAX_NERR)
pszErrorDll = TEXT("NetMsg.dll");
else if (dwErrorCode >= 0xC0040002L && dwErrorCode <= 0xC004001FL)
pszErrorDll = TEXT("IoLogMsg.dll");
else if ((LONG)dwErrorCode < 0)
pszErrorDll = TEXT("ntdll.dll");
// Load the DLL if needed
if (pszErrorDll) {
hModule = LoadLibraryEx (pszErrorDll, NULL, LOAD_LIBRARY_AS_DATAFILE);
if (!hModule) {
_tprintf (TEXT("Can not load DLL \"%s\" to display description for error 0x%08lX.\r\n"), pszErrorDll, dwErrorCode);
//StringFormatedOutput (ERROR_OUTPUT, TEXT("Can not load DLL \"%s\" to display description for error 0x%08lX.\r\n"),
// pszErrorDll, dwErrorCode);
// Query Error text.
// See Q149409 as an example.
if (!FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM | // Always search in system message table !!!
hModule, // source of message definition
dwErrorCode, // message ID
// 0, // language ID
// GetUserDefaultLangID(), // language ID
// GetSystemDefaultLangID(),
(LPTSTR)&pErrorString, // pointer for buffer to allocate
0, // min number of chars to allocate
NULL)) {
if (dwErrorCode & 0xC0000000) {
_tprintf (szText);
_tprintf (TEXT("Unknown error. Error code 0x%08lX.\r\n"), dwErrorCode);
//StringFormatedOutput (ERROR_OUTPUT, TEXT("%sUnknown error. Error code 0x%08lX.\r\n"), szText, dwErrorCode);
else {
_tprintf (szText);
_tprintf (TEXT("Unknown error. Error code %lu.\r\n"), dwErrorCode);
//StringFormatedOutput (ERROR_OUTPUT, TEXT("%sUnknown error. Error code %lu.\r\n"), szText, dwErrorCode);
else {
_tprintf (szText);
_tprintf (pErrorString);
_tprintf (TEXT("\r\n"));
//StringFormatedOutput (ERROR_OUTPUT, TEXT("%s%s\r\n"), szText, pErrorString);
LocalFree (pErrorString);
if (hModule)
FreeLibrary (hModule);
// This function do almost the same as Base64 encoding used for example in MIME (see 6.8 in http://www.ietf.org/rfc/rfc2045.txt).
// Base64 convert codes from 0 till 63 (0x3F) to the corresponding character from the array 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
// This function convert it to the corresponding character from the another array '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._'
static BYTE MsiBase64Encode (BYTE x)
// 0-0x3F converted to '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._'
// all other values higher as 0x3F converted also to '_'
if (x < 10)
return x + '0'; // 0-9 (0x0-0x9) -> '0123456789'
else if (x < (10+26))
return x - 10 + 'A'; // 10-35 (0xA-0x23) -> 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
else if (x < (10+26+26))
return x - 10 - 26 + 'a'; // 36-61 (0x24-0x3D) -> 'abcdefghijklmnopqrstuvwxyz'
else if (x == (10+26+26)) // 62 (0x3E) -> '.'
return '.';
return '_'; // 63-0xffffffff (0x3F-0xFFFFFFFF) -> '_'
#pragma warning (disable: 4706)
static UINT DecodeStreamName (LPWSTR pszInStreamName, LPWSTR pszOutStreamName)
DWORD count = 0;
while ((ch = *pszInStreamName++)) {
if ((ch >= 0x3800) && (ch < 0x4840)) {
// a part of Unicode charecterd used with CJK Unified Ideographs Extension A. (added with Unicode 3.0) used by
// Windows Installer for encoding one or two ANSI characters. This subset of Unicode characters are not currently
// used nether in "MS PMincho" or "MS PGothic" font nor in "Arial Unicode MS"
if (ch >= 0x4800) // 0x4800 - 0x483F
// only one charecter can be decoded
ch = (WCHAR) MsiBase64Encode ((BYTE)(ch - 0x4800));
else { // 0x3800 - 0x383F
// the value contains two characters
ch -= 0x3800;
*pszOutStreamName++ = (WCHAR) MsiBase64Encode ((BYTE)(ch & 0x3f));
ch = (WCHAR) MsiBase64Encode ((BYTE)((ch >> 6) & 0x3f));
// all characters lower as 0x3800 or higher or equel to 0x4840 will be saved without any decoding
*pszOutStreamName++ = ch;
*pszOutStreamName = L'\0';
return count;
#pragma warning (default: 4706)
// This function do almost the same as Base64 decoding used for example in MIME (see 6.8 in http://www.ietf.org/rfc/rfc2045.txt).
// Base64 convert character from the array 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' to the corresponding codes from 0 till 63 (0x3F)
// This function convert character from the another array '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._' to it to 0 till 63 (0x3F)
static BYTE MsiBase64Decode (BYTE ch)
// returns values 0 till 0x3F or 0xFF in the case of an error
// only '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._' are allowed and converted to 0-0x3F
if ((ch>=L'0') && (ch<=L'9')) // '0123456789' -> 0-9 (0x0-0x9)
return ch-L'0';
else if ((ch>=L'A') && (ch<=L'Z')) // 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -> 10-35 (26 chars) - (0xA-0x23)
return ch-'A'+10;
else if ((ch>=L'a') && (ch<=L'z')) // 'abcdefghijklmnopqrstuvwxyz' -> 36-61 (26 chars) - (0x24-0x3D)
return ch-L'a'+10+26;
else if (ch==L'.')
return 10+26+26; // '.' -> 62 (0x3E)
else if (ch==L'_')
return 10+26+26+1; // '_' -> 63 (0x3F) - 6 bits
return INVALID_DECODING_RESULT; // other -> -1 (0xFF)
#define MAX_STREAM_NAME 0x1f
static void EncodeStreamName (BOOL bTable, LPCWSTR pszInStreamName, LPWSTR pszOutStreamName, UINT cchOutStreamName)
LPWSTR pszCurrentOut = pszOutStreamName;
if (bTable) {
*pszCurrentOut++ = 0x4840;
while (cchOutStreamName--) {
WCHAR ch = *pszInStreamName++;
if (ch && (ch < 0x80) && (MsiBase64Decode((BYTE)ch) <= 0x3F)) {
WCHAR chNext = *pszInStreamName;
// MsiBase64Decode() convert any "standard" character '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._' to 0-0x3F.
// One can pack two charecters together in 0-0xFFF. To do so, one needs convert the first one with respect of MsiBase64Decode(),
// convert the next character also with respect MsiBase64Decode() and shift it 6 bits on the left. Two characters together
// produce a value from 0 till 0xFFF. We add 0x3800 to the result. We receive a value between 0x3800 and 0x47FF
if (chNext && (chNext < 0x80) && (MsiBase64Decode((BYTE)chNext) <= 0x3F)) {
ch = (WCHAR)(MsiBase64Decode((BYTE)ch) + 0x3800 + (MsiBase64Decode((BYTE)chNext)<<6));
ch = MsiBase64Decode((BYTE)ch) + 0x4800;
*pszCurrentOut++ = ch;
if (!ch)
static HRESULT LoadStreamInMemory (IStorage *pStg, LPCWSTR pszStreamName, PBYTE *ppData, PUINT pSize)
IStream *pStm = NULL;
// set defaults
*ppData = NULL;
*pSize = 0;
__try {
ULONG cbRead;
hr = IStorage_OpenStream (pStg, pszStreamName, NULL, STGM_READ | STGM_SHARE_EXCLUSIVE, 0, &pStm); // STGM_SHARE_EXCLUSIVE
if (FAILED(hr)) {
DisplayErrorMessage (hr, TEXT("Failed IStorage::OpenStream(). "));
hr = IStream_Stat (pStm, &stat, STATFLAG_NONAME);
if (FAILED(hr)) {
DisplayErrorMessage (hr, TEXT("Failed IStream::Stat(). "));
if (stat.cbSize.HighPart) {
*pSize = stat.cbSize.LowPart;
if (*pSize) {
*ppData = (PBYTE) LocalAlloc (LMEM_FIXED, *pSize);
if (!*ppData) {
//r = IStream_Read (stm, pData, sz, &cbRead);
hr = pStm->lpVtbl->Read (pStm, *ppData, *pSize, &cbRead);
//hr = IStream_Read (pStm, *ppData, *pSize, &cbRead);
if (FAILED(hr) || (cbRead != *pSize)) {
*ppData = (PBYTE) LocalFree (*ppData);
if (SUCCEEDED(hr))
hr = S_OK;
hr = S_OK;
__finally {
if (pStm)
IStream_Release (pStm);
return hr;
UINT DumpString (LPCWSTR pszString, LPCTSTR pszFormat, UINT nMaxLen)
UINT cchPrinted = 0;
LPWSTR pszText = (LPWSTR) _alloca (max(5,min((UINT)lstrlenW(pszString),nMaxLen)+1)*sizeof(WCHAR));
if ((UINT)lstrlenW(pszString) <= nMaxLen)
cchPrinted = _tprintf (pszFormat, pszString);
else if (nMaxLen > 3) {
lstrcpynW (pszText, pszString, nMaxLen-2);
pszText[nMaxLen] = L'\0';
pszText[nMaxLen-1] = L'.';
pszText[nMaxLen-2] = L'.';
pszText[nMaxLen-3] = L'.';
cchPrinted = _tprintf (pszFormat, pszText);
else if (nMaxLen == 3) {
pszText[0] = pszString[0];
pszText[1] = pszString[1];
pszText[2] = L'.';
pszText[3] = L'\0';
cchPrinted = _tprintf (pszFormat, pszText);
else if (nMaxLen == 2) {
pszText[0] = pszString[0];
pszText[1] = L'.';
pszText[2] = L'\0';
cchPrinted = _tprintf (pszFormat, pszText);
else if (nMaxLen == 1) {
pszText[0] = pszString[0];
pszText[1] = L'\0';
cchPrinted = _tprintf (pszFormat, pszText);
return cchPrinted;
static HRESULT LoadTableFromStream (IStorage *pStg, LPCWSTR pszTableName, PBYTE *ppData, PUINT pSize)
WCHAR szEncodedStreamName[32];
EncodeStreamName (TRUE, pszTableName, szEncodedStreamName, ARRAY_SIZE(szEncodedStreamName));
hr = LoadStreamInMemory (pStg, szEncodedStreamName, ppData, pSize);
if (FAILED(hr))
DisplayErrorMessage (hr, TEXT("Failed LoadStreamInMemory() for the table %ls. "), pszTableName);
return hr;
BOOL LoadStringPool (IStorage *pStg, BOOL bDumpStringPool, UINT cMaxStrOutLen, UINT cMaxFirstRowsOut, UINT cMaxLastRowsOut)
UINT nOffsetSrc = 0, nOffsetDest = 0, nStringPoolLength, nStringDataLength;
UINT iStringId, iSrc, uBufferSize;
PSTR pszStringData = NULL;
struct _StringPool {
WORD wLength;
WORD wRefcnt;
} *pStringPool = NULL;
DWORD dwCodePage;
BOOL bAllPrinted = TRUE;
UINT cStringIdsPrinted = 0;
hr = LoadTableFromStream (pStg, OLESTR("_StringPool"), (PBYTE *)&pStringPool, &nStringPoolLength);
if (FAILED(hr))
return FALSE;
dwCodePage = pStringPool[0].wLength;
if (pStringPool[0].wRefcnt == 0)
g_StringTable.cbStringIdSize = 2;
else if (pStringPool[0].wRefcnt == 0x8000)
g_StringTable.cbStringIdSize = 3;
if (bDumpStringPool)
_tprintf (TEXT("\r\nString ID size: %d\r\n"), g_StringTable.cbStringIdSize);
// convert bytes to indexes
nStringPoolLength /= sizeof (struct _StringPool);
hr = LoadTableFromStream (pStg, OLESTR("_StringData"), (PBYTE *)&pszStringData, &nStringDataLength);
if (FAILED(hr))
return FALSE;
// Allocate buffer large enough to hold all strings from _StringData steam together with '\0' at the end of each string.
// We allocate all memory in one block and not per string, to speed up allocation and to reduce overhead in heap menagement.
uBufferSize = nStringDataLength + nStringPoolLength;
g_StringTable.pszStringData = (PWSTR) LocalAlloc (LPTR, uBufferSize*sizeof(WCHAR));
// allocate and initialize to NULL all pointers
g_StringTable.ppszStringPool = (PWSTR *) LocalAlloc (LPTR, nStringPoolLength * sizeof (PWSTR *));
if (bDumpStringPool) {
_tprintf (TEXT("\r\nCode page of the string pool: %d\r\n"), dwCodePage);
_tprintf (TEXT("+++String Pool Entries+++\r\n"));
for (iSrc=1, iStringId=1; iSrc<nStringPoolLength; iSrc++) {
DWORD dwLen = pStringPool[iSrc].wLength;
if (pStringPool[iSrc].wLength == 0) {
// A string is lagrer as 64K. In the case one create one dummy entry with pStringPool[iStringId].wLength
// and high word of string length saved in the next entry will be saved in pStringPool[iStringId].wRefcnt
if (pStringPool[iSrc].wRefcnt == 0) // empty entry
if (iSrc != 1 && pStringPool[iSrc-1].wLength == 0 && pStringPool[iSrc-1].wRefcnt != 0)
// current string have length over 64K
dwLen += pStringPool[iSrc-1].wRefcnt << 16; //* 0x10000;
if (dwLen < uBufferSize) {
pszStringData+nOffsetSrc, (int)dwLen,
g_StringTable.pszStringData+nOffsetDest, uBufferSize);
g_StringTable.pszStringData[nOffsetDest+dwLen] = L'\0';
uBufferSize -= dwLen+1;
g_StringTable.ppszStringPool[iStringId] = g_StringTable.pszStringData+nOffsetDest;
if (bDumpStringPool) {
//_tprintf (TEXT("\tId:%5d Refcnt:%5d String: %ls\r\n"), iStringId, pStringPool[iStringId].wRefcnt, g_StringTable.pszStringData+nOffsetDest);
if (cStringIdsPrinted<cMaxFirstRowsOut || iStringId+cMaxLastRowsOut>=nStringPoolLength) {
_tprintf (TEXT("\tId:%5d Refcnt:%5d String: "), iStringId, pStringPool[iStringId].wRefcnt);
DumpString (g_StringTable.pszStringData+nOffsetDest, TEXT("%ls\r\n"), cMaxStrOutLen);
else {
if (bAllPrinted)
_tprintf (TEXT("...\r\n"));
bAllPrinted = FALSE;
nOffsetDest += dwLen+1;
nOffsetSrc += dwLen;
if (nOffsetSrc >= nStringDataLength)
if (iStringId < nStringPoolLength)
g_StringTable.cStrings = iStringId;
g_StringTable.cStrings = iStringId-1;
return TRUE;
int _tmain (int argc, LPTSTR argv[])
IStorage *pStg = NULL;
LPTSTR pszFileName;
LPWSTR pszwFileName;
BOOL bSuccess = FALSE;
if (argc < 2) {
_tprintf (TEXT("Usage: GetMsiStringTable <filename>\r\n"));
return 1;
pszFileName = argv[1];
#ifdef _UNICODE
pszwFileName = pszFileName;
DWORD cchLen = lstrlenA (pszFileName) + 1;
pszwFileName = _alloca (cchLen*sizeof(WCHAR));
MultiByteToWideChar (CP_ACP, MB_ERR_INVALID_CHARS | MB_PRECOMPOSED, pszFileName, -1, pszwFileName, cchLen);
__try {
CLSID clsidStg;
STGOPTIONS stgOption = {0};
stgOption.usVersion = STGOPTIONS_VERSION;
// Open the root storage.
hr = StgOpenStorageEx (pszwFileName,
&stgOption, // NULL,
&IID_IStorage, // instaed of IID_IStorage it is possible to use IID_IPropertySetStorage
(PVOID *)&pStg);
if (FAILED(hr)) {
DisplayErrorMessage (hr, TEXT("Error: couldn't open storage \"%ls\". "), pszFileName);
hr = ReadClassStg (pStg, &clsidStg);
if (SUCCEEDED(hr)) {
// MsiInfo.exe
// Transform: Class Id for the MSI storage is {000C1082-0000-0000-C000-000000000046} CLSID_MsiTransform
// MSI: Class Id for the MSI storage is {000C1084-0000-0000-C000-000000000046} CLSID_MsiDatabase
// Patch: Class Id for the MSI storage is {000C1086-0000-0000-C000-000000000046} CLSID_MsiPatch
OLECHAR szClsidStg[39];
StringFromGUID2 (&clsidStg, szClsidStg, ARRAY_SIZE(szClsidStg));
_tprintf (TEXT("Class Id for the storage is %ls:\r\n"), szClsidStg);
if (IsEqualCLSID (&clsidStg, &CLSID_MsiDatabase))
_tprintf (TEXT("\tStorage has MSI database/Merge module class id.\r\n"));
else if (IsEqualCLSID (&clsidStg, &CLSID_MsiPatch))
_tprintf (TEXT("\tStorage has MSI patch class id.\r\n"));
else if (IsEqualCLSID (&clsidStg, &CLSID_MsiTransform))
_tprintf (TEXT("\tStorage has MSI transform class id.\r\n"));
else {
_tprintf (TEXT("\tStorage is not a Windows Installer file.\r\n"));
bSuccess = LoadStringPool (pStg, TRUE, 80, 10, 10);
if (!bSuccess)
__finally {
if (pStg)
IStorage_Release (pStg);
return 0;
Пример вывода * 1021Visual Studio 2010 Ultimate:
Class Id for the storage is {000C1084-0000-0000-C000-000000000046}:
Storage has MSI database/Merge module class id.
String ID size: 3
Code page of the string pool: 1252
+++String Pool Entries+++
Id: 1 Refcnt: 542 String: Name
Id: 2 Refcnt: 7 String: Table
Id: 4 Refcnt: 7 String: Type
Id: 5 Refcnt: 5 String: _sqlAssembly
Id: 6 Refcnt: 8 String: File_
Id: 7 Refcnt: 18 String: MS.VS.vspGridControl.dll.27F9E354_F6F7_44D7_9637_42C9575D0C37
Id: 8 Refcnt: 7 String: _sqlFollowComponents
Id: 9 Refcnt: 2 String: FollowComponent
Id: 10 Refcnt: 30 String: Component_
Id: 11 Refcnt: 2 String: ParentComponent_
Id:94617 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3205
Id:94618 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3206
Id:94619 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3207
Id:94620 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3208
Id:94621 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3209
Id:94622 Refcnt: 3 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3210
Id:94623 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3211
Id:94624 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3212
Id:94625 Refcnt: 1 String: Microsoft.VisualStudio.Diagnostics.ServiceModelSink.Behavior, Microsoft.Visua...
Id:94626 Refcnt: 1 String: VS_Debugging_ServiceModelSink.MachineConfigV4.3213
В то время я трачу много времени на то, чтобы узнать, как декодировать идентификатор строки, который имеет размер 3 байта, а не только 2 байта, что типично для небольших установок с не оченьтаблица длинных строк.