详解如何利用PowerShell提取wps表格中嵌入的图片
针对Excel下打开表格图片显示 #NAME?编辑栏显示为 =@_xlfn.DISPIMG( 样公式的问题,一般需要在 wps 程序下,Ctrl+F 查找范围选值,输入 =DISPIMG 全选,然后再右键转换为浮动图片。如果是Excel中,则是查找公式 DISPIMG。
查阅网上得资料得知,可以通过解压表格文件,然后 根据公式中的第一参数(通常以 ID 开头)看 xl_rels\cellimages.xml.rels 目录下的 name (其值为 dispimg 函数的第一参数)和 r:embed (其值以 rId 开头)的对应关系,然后再看 Id ( rId 开头 ) 和 Target(图片路径) 的对应关系,进而得到图片的路径。
在 LLM 的帮助下进而有以下 PS 脚本。
function Get-ExcelDispImages { param ( [Parameter(Mandatory=$true)] [string]$ExcelPath, [Parameter(Mandatory=$false)] [string]$OutputFolder = ".\ExcelImages" ) # 辅助函数:安全地读取文件内容 function Read-FileContent { param ( [string]$Path ) try { # 使用.NET方法直接读取文件,避免PowerShell路径解析问题 if ([System.IO.File]::Exists($Path)) { return [System.IO.File]::ReadAllText($Path) } Write-Host "File not found: $Path" -ForegroundColor Yellow return $null } catch { Write-Host "Error reading file $Path : $_" -ForegroundColor Yellow return $null } } try { # 验证Excel文件是否存在 if (-not (Test-Path -LiteralPath $ExcelPath)) { throw "Excel file not found: $ExcelPath" } # 确保ExcelPath是绝对路径 $ExcelPath = (Get-Item $ExcelPath).FullName # 创建输出文件夹(使用绝对路径) $OutputFolder = [System.IO.Path]::GetFullPath($OutputFolder) if (-not (Test-Path -Path $OutputFolder)) { New-Item -ItemType Directory -Path $OutputFolder -Force | Out-Null } # 创建Excel COM对象 $excel = New-Object -ComObject Excel.Application $excel.Visible = $false $excel.DisplayAlerts = $false $workbook = $excel.Workbooks.Open($ExcelPath) # 用于存储找到的DISPIMG ID和信息 $dispImgIds = @() $imageMapping = @{} # 遍历所有工作表 foreach ($worksheet in $workbook.Worksheets) { $usedRange = $worksheet.UsedRange $rowCount = $usedRange.Rows.Count $colCount = $usedRange.Columns.Count for ($row = 1; $row -le $rowCount; $row++) { for ($col = 1; $col -le $colCount; $col++) { $cell = $usedRange.Cells($row, $col) $formula = $cell.Formula # 检查是否包含DISPIMG函数并提取所有参数 if ($formula -match 'DISPIMG\("([^"]+)"') { $imageId = $matches[1] Write-Host "Found DISPIMG: $formula" -ForegroundColor Gray # 创建参数列表 $params = @{ 'ID' = $imageId 'Cell' = $cell.Address() 'Formula' = $formula 'Worksheet' = $worksheet.Name } # 提取所有参数,包括图片ID $paramValues = @() $formula -match 'DISPIMG\((.*?)\)' | Out-Null $paramString = $matches[1] $paramValues = $paramString -split ',' | ForEach-Object { $_ -replace '"', '' -replace '^\s+|\s+$', '' } # 存储所有参数 for ($i = 0; $i -lt $paramValues.Count; $i++) { $params["Param$i"] = $paramValues[$i] } $imageMapping[$imageId] = $params $dispImgIds += $imageId } } } } # 创建临时目录 $tempPath = Join-Path ([System.IO.Path]::GetTempPath()) "ExcelTemp" if (Test-Path $tempPath) { Remove-Item $tempPath -Recurse -Force } New-Item -ItemType Directory -Path $tempPath -Force | Out-Null # 复制Excel文件到临时目录并解压 $tempExcel = Join-Path $tempPath "temp.xlsx" $tempZip = Join-Path $tempPath "temp.zip" Copy-Item -Path $ExcelPath -Destination $tempExcel -Force if (Test-Path $tempZip) { Remove-Item $tempZip -Force } Rename-Item -Path $tempExcel -NewName "temp.zip" -Force Expand-Archive -Path $tempZip -DestinationPath $tempPath -Force # 检查media文件夹并处理图片 $mediaPath = Join-Path $tempPath "xl\media" if (Test-Path $mediaPath) { # 显示DISPIMG参数和图片对应关系 Write-Host "`nFound $($imageMapping.Count) DISPIMG functions" -ForegroundColor Cyan if ($imageMapping.Count -gt 0) { Write-Host "`n=== DISPIMG Functions Found ===" -ForegroundColor Cyan Write-Host "Found $($imageMapping.Count) DISPIMG functions" -ForegroundColor Cyan # 显示所有找到的DISPIMG函数 Write-Host "`n=== DISPIMG Functions Details ===" -ForegroundColor Yellow foreach ($id in $imageMapping.Keys) { $params = $imageMapping[$id] Write-Host "Cell: [$($params.Worksheet)]$($params.Cell)" -ForegroundColor Gray Write-Host "Formula: $($params.Formula)" -ForegroundColor Gray } # 首先从cellimages.xml获取DISPIMG ID到rId的映射 $dispImgToRid = @{} $cellImagesPath = Join-Path $tempPath "xl\cellimages.xml" Write-Host "`n=== Reading cellimages.xml ===" -ForegroundColor Yellow Write-Host "Path: $cellImagesPath" -ForegroundColor Gray if (Test-Path $cellImagesPath) { try { $xmlContent = Get-Content $cellImagesPath -Raw -EnCoding UTF8 Write-Host "`nRaw XML Content:" -ForegroundColor Gray Write-Host $xmlContent # 使用正则表达式提取所有cellImage元素 $matches = [regex]::Matches($xmlContent, '<etc:cellImage>.*?</etc:cellImage>', [System.Text.RegularExpressions.RegexOptions]::Singleline) Write-Host "`nFound $($matches.Count) cellImage elements" -ForegroundColor Gray foreach ($match in $matches) { $cellImageXml = $match.Value Write-Host "`nProcessing cellImage element:" -ForegroundColor Gray # 提取name属性(包含DISPIMG ID) if ($cellImageXml -match 'name="([^"]+)"') { $dispImgId = $matches[1] Write-Host "Found DISPIMG ID: $dispImgId" -ForegroundColor Gray # 提取r:embed属性(包含rId) if ($cellImageXml -match 'r:embed="(rId\d+)"') { $rId = $matches[1] $dispImgToRid[$dispImgId] = $rId Write-Host " Mapping: DISPIMG ID $dispImgId -> rId $rId" -ForegroundColor Green } } } } catch { Write-Host "Error reading cellimages.xml: $($_.Exception.Message)" -ForegroundColor Red Write-Host $_.Exception.StackTrace -ForegroundColor Red } } else { Write-Host "cellimages.xml not found!" -ForegroundColor Red } # 从cellimages.xml.rels获取rId到实际图片的映射 $ridToImage = @{} $cellImagesRelsPath = Join-Path $tempPath "xl\_rels\cellimages.xml.rels" Write-Host "`n=== Reading cellimages.xml.rels ===" -ForegroundColor Yellow Write-Host "Path: $cellImagesRelsPath" -ForegroundColor Gray if (Test-Path $cellImagesRelsPath) { try { [xml]$relsXml = Get-Content $cellImagesRelsPath -Raw Write-Host "`nXML Content:" -ForegroundColor Gray Write-Host $relsXml.OuterXml $relsXml.Relationships.Relationship | ForEach-Object { if ($_.Target -match "media/") { $ridToImage[$_.Id] = $_.Target Write-Host " rId $($_.Id) -> $($_.Target)" -ForegroundColor Green } } } catch { Write-Host "Error reading cellimages.xml.rels: $($_.Exception.Message)" -ForegroundColor Red Write-Host $_.Exception.StackTrace -ForegroundColor Red } } else { Write-Host "cellimages.xml.rels not found!" -ForegroundColor Red } Write-Host "`n=== Summary of Mappings ===" -ForegroundColor Yellow Write-Host "DISPIMG ID -> rId mappings:" -ForegroundColor Gray $dispImgToRid.GetEnumerator() | ForEach-Object { Write-Host " $($_.Key) -> $($_.Value)" -ForegroundColor Gray } Write-Host "`nrId -> Image mappings:" -ForegroundColor Gray $ridToImage.GetEnumerator() | ForEach-Object { Write-Host " $($_.Key) -> $($_.Value)" -ForegroundColor Gray } # 处理每个DISPIMG函数 Write-Host "`n=== Processing DISPIMG Functions ===" -ForegroundColor Yellow foreach ($id in $imageMapping.Keys) { $params = $imageMapping[$id] Write-Host "`nProcessing: [$($params.Worksheet)]$($params.Cell)" -ForegroundColor Green Write-Host "Formula: $($params.Formula)" -ForegroundColor Gray Write-Host "DISPIMG ID: $id" -ForegroundColor Gray # 从DISPIMG ID查找对应的rId $rId = $dispImgToRid[$id] if ($rId) { Write-Host "Found rId: $rId" -ForegroundColor Green # 从rId查找对应的图片路径 $imagePath = $ridToImage[$rId] if ($imagePath) { Write-Host "Found image path: $imagePath" -ForegroundColor Green $imageName = Split-Path $imagePath -Leaf # 查找对应的图片文件 $mediaFile = Get-ChildItem -LiteralPath $mediaPath | Where-Object { $_.Name -eq $imageName } if ($mediaFile) { $outputPath = Join-Path $OutputFolder "$id$($mediaFile.Extension)" Copy-Item -LiteralPath $mediaFile.FullName -Destination $outputPath -Force Write-Host "Successfully copied: $imageName -> $outputPath" -ForegroundColor Cyan # 以 media 文件夹下的文件名复制 # Copy-Item $mediaFile.FullName $(Join-Path $OutputFolder $(Split-Path $imagePath -Leaf)) } else { Write-Host "Image file not found: $imageName" -ForegroundColor Red } } else { Write-Host "No image path found for rId: $rId" -ForegroundColor Red } } else { Write-Host "No rId found for DISPIMG ID: $id" -ForegroundColor Red } } } else { Write-Host "No DISPIMG functions found in the workbook." -ForegroundColor Yellow } } } catch { Write-Host "错误: $($_.Exception.Message)" -ForegroundColor Red } finally { # 清理 if ($workbook) { $workbook.Close($false) } if ($excel) { $excel.Quit() [System.Runtime.Interopservices.Marshal]::ReleaseComObject($excel) | Out-Null } if (Test-Path $tempPath) { Remove-Item $tempPath -Recurse -Force } [System.GC]::Collect() [System.GC]::WaitForPendingFinalizers() } }
一个可能的用法如下
Get-ExcelDispImages -ExcelPath "C:\Users\demo\Documents\dispimg_wps.xlsx" -OutputFolder $pwd\output
生成的图片在当前目录下的 output 文件夹下。
以上就是详解如何利用PowerShell提取wps表格中嵌入的图片的详细内容,更多关于PowerShell提取表格中图片的资料请关注脚本之家其它相关文章!
相关文章
PowerShell管理Win Server 2008 R2
在Windows Serve 2008 R2中,一个重要改进就是PowerShell版本升级为2.0。Win 2008 R2包括一系列新的服务器管理界面,这些均建立在PowerShell 2.0之上。它新增了240个cmdlets命令集,新的PowerShell图形用户界面也增添了开发功能,从而用户能更简单创建自己的命令行。2015-09-09
最新评论