Мне нужно было сделать это в PowerShell, работающем на сервере без установленных приложений MS Office. Хитрость, как предложено выше, заключается в том, чтобы заглянуть внутрь офисного файла и изучить встроенные XML-файлы внутри.
Вот функция, которая запускается как командлет. Это означает, что вы можете просто сохранить сценарий в каталоге сценариев PowerShell и вызвать функцию из любого другого сценария PowerShell.
# DocumentOfficePropertiesGet
# Example usage
# From a PowerShell script:
# $props = Invoke-Expression "c:\PowerShellScriptFolder\DocumentOfficePropertiesGet.ps1 -DocumentFullPathName ""d:\documents\my excel doc.xlsx"" -OfficeProperties ""dcterms:created;dcterms:modified"""
# Parameters
# DocumentFullPathName -- full path and name of MS Office document
# OfficeProperties -- semi-colon delimited string of property names as they
# appear in the core.xml file. To see these names, rename any
# MS Office document file to have the extension .zip, then look inside
# the zip file. In the docProps folder open the core.xml file. The
# core document properties are nodes under the cp:coreProperties node.
# Example: dcterms:created;dcterms:modified;cp:lastModifiedBy
# Return value
# The function returns a hashtable object -- in the above example, $props would contain
# the name-value pairs for the requested MS Office document properties. In the calling script,
# to get at the values:
# $fooProperty = $props.'dcterms:created'
# $barProperty = $props.'dcterms:modified'
[CmdletBinding()]
[OutputType([System.Collections.Hashtable])]
Param
(
[Parameter(Position=0,
Mandatory=$false,
HelpMessage="Enter the full path name of the document")]
[ValidateNotNullOrEmpty()]
[String] $DocumentFullPathName='e:\temp\supplier_List.xlsx',
[Parameter(Position=1,
Mandatory=$false,
HelpMessage="Enter the Office properties semi-colon delimited")]
[ValidateNotNullOrEmpty()]
[String] $OfficeProperties='dcterms:created; dcterms:modified ;cp:lastModifiedBy;dc:creator'
)
# We need the FileSystem assembly
Add-Type -AssemblyName System.IO.Compression.FileSystem
# This function unzips a zip file -- and it works on MS Office files directly: no need to
# rename them from foo.xlsx to foo.zip. It expects the full path name of the zip file
# and the path name for the unzipped files
function Unzip
{
param([string]$zipfile, [string]$outpath)
[System.IO.Compression.ZipFile]::ExtractToDirectory($zipfile, $outpath) *>$null
}
# Remove spaces from the OfficeProperties parameter
$OfficeProperties = $OfficeProperties.replace(' ','')
# Compose the name of the folder where we will unzip files
$zipDirectoryName = $env:TEMP + "\" + "TempZip"
# delete the zip directory if present
remove-item $zipDirectoryName -force -recurse -ErrorAction Ignore | out-null
# create the zip directory
New-Item -ItemType directory -Path $zipDirectoryName | out-null
# Unzip the files -- i.e. extract the xml files embedded within the MS Office document
unzip $DocumentFullPathName $zipDirectoryName
# get the docProps\core.xml file as [xml]
$coreXmlName = $zipDirectoryName + "\docProps\core.xml"
[xml]$coreXml = get-content -path $coreXmlName
# create an array of the requested properties
$requiredProperties = $OfficeProperties -split ";"
# create a hashtable to return the values
$docProperties = @{}
# Now look for each requested property
foreach($requiredProperty in $requiredProperties)
{
# We will be lazy and ignore the namespaces. We need the local name only
$localName = $requiredProperty -split ":"
$localName = $localName[1]
# Use XPath to fetch the node for this property
$thisNode = $coreXml.coreProperties.SelectSingleNode("*[local-name(.) = '$localName']")
if($thisNode -eq $null)
{
# To the hashtable, add the requested property name and its value -- null in this case
$docProperties.Add($RequiredProperty, $null)
}
else
{
# To the hashtable, add the requested property name and its value
$docProperties.Add($RequiredProperty, $thisNode.innerText)
}
}
#clean up
remove-item $zipDirectoryName -force -recurse
# return the properties hashtable. To do this, just write the object to the output stream
$docProperties