Следующий код включает в себя все идеи Дэйва и Р. Уббена, приведенные выше, плюс он возвращает полный список всех изображений, а также имеет дело с несколькими битами глубины. Мне пришлось преобразовать его в VB для проекта, над которым я работаю, хотя, извините за это ...
Private Sub getAllImages(ByVal dict As pdf.PdfDictionary, ByVal images As List(Of Byte()), ByVal doc As pdf.PdfReader)
Dim res As pdf.PdfDictionary = CType(pdf.PdfReader.GetPdfObject(dict.Get(pdf.PdfName.RESOURCES)), pdf.PdfDictionary)
Dim xobj As pdf.PdfDictionary = CType(pdf.PdfReader.GetPdfObject(res.Get(pdf.PdfName.XOBJECT)), pdf.PdfDictionary)
If xobj IsNot Nothing Then
For Each name As pdf.PdfName In xobj.Keys
Dim obj As pdf.PdfObject = xobj.Get(name)
If (obj.IsIndirect) Then
Dim tg As pdf.PdfDictionary = CType(pdf.PdfReader.GetPdfObject(obj), pdf.PdfDictionary)
Dim subtype As pdf.PdfName = CType(pdf.PdfReader.GetPdfObject(tg.Get(pdf.PdfName.SUBTYPE)), pdf.PdfName)
If pdf.PdfName.IMAGE.Equals(subtype) Then
Dim xrefIdx As Integer = CType(obj, pdf.PRIndirectReference).Number
Dim pdfObj As pdf.PdfObject = doc.GetPdfObject(xrefIdx)
Dim str As pdf.PdfStream = CType(pdfObj, pdf.PdfStream)
Dim bytes As Byte() = pdf.PdfReader.GetStreamBytesRaw(CType(str, pdf.PRStream))
Dim filter As String = tg.Get(pdf.PdfName.FILTER).ToString
Dim width As String = tg.Get(pdf.PdfName.WIDTH).ToString
Dim height As String = tg.Get(pdf.PdfName.HEIGHT).ToString
Dim bpp As String = tg.Get(pdf.PdfName.BITSPERCOMPONENT).ToString
If filter = "/FlateDecode" Then
bytes = pdf.PdfReader.FlateDecode(bytes, True)
Dim pixelFormat As System.Drawing.Imaging.PixelFormat
Select Case Integer.Parse(bpp)
Case 1
pixelFormat = Drawing.Imaging.PixelFormat.Format1bppIndexed
Case 24
pixelFormat = Drawing.Imaging.PixelFormat.Format24bppRgb
Case Else
Throw New Exception("Unknown pixel format " + bpp)
End Select
Dim bmp As New System.Drawing.Bitmap(Int32.Parse(width), Int32.Parse(height), pixelFormat)
Dim bmd As System.Drawing.Imaging.BitmapData = bmp.LockBits(New System.Drawing.Rectangle(0, 0, Int32.Parse(width), Int32.Parse(height)), System.Drawing.Imaging.ImageLockMode.WriteOnly, pixelFormat)
Marshal.Copy(bytes, 0, bmd.Scan0, bytes.Length)
bmp.UnlockBits(bmd)
Using ms As New MemoryStream
bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Png)
bytes = ms.GetBuffer
End Using
End If
images.Add(bytes)
ElseIf pdf.PdfName.FORM.Equals(subtype) Or pdf.PdfName.GROUP.Equals(subtype) Then
getAllImages(tg, images, doc)
End If
End If
Next
End If
End Sub