C#调用OpenXml读取excel行数据
问题及现象
在OpenXML中文件不包含空白单元格的条目,这就是跳过空白单元格的原因。
所以如果当我们打开一个excel,读取一个表格数据,发现如果有空单元格,openXML会跳过导致读取的数据发生错位。
比如这个是原始的excel表格数据。
当有空格读取后,第一行和最后一行的数据就会错位了,如下:
解决的办法就是:
假设:
DocumentFormat.OpenXml.Spreadsheet.Row row,
Cell cell = row.Elements<Cell>().FirstOrDefault(c => c.CellReference == $"列行");
//例如:c.CellReference =="A12"
当cell是空的时候,表示该单元格是空值。
因此对于从行中提取单元格不能使用“foreach (Cell cell in row)”,这样取出来的cell是非空的单元格,也就是如果你有10列,有两列是空单元格,那么就只能取出来8列,这就导致了取出数据的错位。
而应该使用循环,也就是知道了表格的列数了,然后使用for去循环,例如:
for (int columnIndex = 0; columnIndex < dt.Columns.Count; columnIndex++) { //查找指定的行列单元格是否存在。 Cell cell = row.Elements<Cell>().FirstOrDefault(c => c.CellReference == $"{columnReference[columnIndex]}{row.RowIndex}");//例如:c.CellReference =="A12" string cellVal = null; //定义获取的单元格的值,默认为空 if (cell != null) {//不为空使用定义的GetCellValue()函数获取cell中的值 cellVal = GetCellValue(cell, workbookPart); } .... }
以下为封装的OpenXML处理的完整代码
调用ReadSheetWithHeader()函数,ReadSheetWithHeader会调用封装的OpenXml类OutExcel对象,从而把指定的excel文件的sheet表读取到DataTable的数据集合中。
/// <summary> /// 将指定的excel文件中的指定索引的sheet读取到表对象中 /// </summary> /// <param name="fileNm">excel文件路径</param> /// <param name="sheetIndex">sheet索引</param> /// <returns>返回DataTable对象</returns> public DataTable ReadSheetWithHeader(string fileNm, int sheetIndex) { FileStream fs = new FileStream(fileNm, FileMode.Open, FileAccess.Read, FileShare.Read); DataTable dt = new OutExcel().ReadExcel(sheetIndex, fs); return dt; }
封装OpenXml类OutExcel
using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Spreadsheet; using System; using System.Collections.Generic; using System.Data; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; namespace OpenExcelMng { public class OutExcel { /// <summary> /// 按照给定的Excel流组织成Datatable /// </summary> /// <param name="sheetName">须要读取的Sheet的名称</param> /// <param name="stream">Excel文件流</param> /// <returns>组织好的DataTable</returns> public DataTable ReadExcel(string sheetName, Stream stream) { using (SpreadsheetDocument document = SpreadsheetDocument.Open(stream, false)) { //打开Stream WorkbookPart workbookPart = document.WorkbookPart; IEnumerable<Sheet> sheets = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName); if (sheets.Count() == 0) {//找出合适前提的sheet,没有则返回 return null; } WorksheetPart worksheetPart = (WorksheetPart)document.WorkbookPart.GetPartById(sheets.First().Id); //获取Excel中共享数据 SharedStringTable stringTable = document.WorkbookPart.SharedStringTablePart.SharedStringTable; IEnumerable<Row> rows = worksheetPart.Worksheet.Descendants<Row>();//获得Excel中得数据行 DataTable dt = new DataTable("Excel"); //因为须要将数据导入到DataTable中,所以我们假定Excel的第一行是列名,从第二行开端是行数据 foreach (Row row in rows) { if (row.RowIndex == 1) {//Excel第一行动列名 GetDataColumn(row, stringTable, ref dt); } else { GetDataRow(row, stringTable, workbookPart, ref dt);//Excel第二行同时为DataTable的第一行数据 } } return dt; } } /// <summary> /// 按照给定的Excel流组织成Datatable /// </summary> /// <param name="sheetIndex">须要读取的Sheet的索引</param> /// <param name="sheetIndex">Excel文件流</param> /// <returns>组织好的DataTable</returns> public DataTable ReadExcel(int sheetIndex, Stream stream) { using (SpreadsheetDocument document = SpreadsheetDocument.Open(stream, false)) {//打开Stream WorkbookPart workbookPart = document.WorkbookPart; IList<Sheet> sheets = workbookPart.Workbook.Descendants<Sheet>().ToList(); if (sheets.Count() == 0) {//找出合适前提的sheet,没有则返回 return null; } WorksheetPart worksheetPart = (WorksheetPart)document.WorkbookPart.GetPartById(sheets[sheetIndex].Id); //获取Excel中共享数据 SharedStringTable stringTable = document.WorkbookPart.SharedStringTablePart.SharedStringTable; IEnumerable<Row> rows = worksheetPart.Worksheet.Descendants<Row>();//获得Excel中得数据行 DataTable dt = new DataTable("Excel"); //因为须要将数据导入到DataTable中,所以我们假定Excel的第一行是列名,从第二行开端是行数据 foreach (Row row in rows) { if (row.RowIndex == 1) {//Excel第一行动列名 GetDataColumn(row, stringTable, ref dt); } else { GetDataRow(row, stringTable, workbookPart, ref dt);//Excel第二行同时为DataTable的第一行数据 } } return dt; } } /// <summary> /// 构建DataTable的列 /// </summary> /// <param name="row">OpenXML定义的Row对象</param> /// <param name="stringTablePart"></param> /// <param name="dt">须要返回的DataTable对象</param> /// <returns></returns> public void GetDataColumn(Row row, SharedStringTable stringTable, ref DataTable dt) { DataColumn col = new DataColumn(); Dictionary<string, int> columnCount = new Dictionary<string, int>(); foreach (Cell cell in row) { string cellVal = GetValue(cell, stringTable); col = new DataColumn(cellVal); if (IsContainsColumn(dt, col.ColumnName)) { if (!columnCount.ContainsKey(col.ColumnName)) columnCount.Add(col.ColumnName, 0); col.ColumnName = col.ColumnName + (columnCount[col.ColumnName]++); } dt.Columns.Add(col); } } /// <summary> /// 构建DataTable的每一行数据,并返回该Datatable /// </summary> /// <param name="row"></param> /// <param name="stringTable"></param> /// <param name="workbookPart">用于处理获取Cell中的信息,如果Cell存在,不是空单元格</param> /// <param name="dt">把行数据写入到datatabl中</param> private void GetDataRow(DocumentFormat.OpenXml.Spreadsheet.Row row, DocumentFormat.OpenXml.Spreadsheet.SharedStringTable stringTable, //不再使用 DocumentFormat.OpenXml.Packaging.WorkbookPart workbookPart, //用于处理获取Cell中的信息,如果Cell存在,不是空单元格 ref System.Data.DataTable dt) //把行数据写入到datatabl中。 { // 读取算法:按行一一读取单位格,若是整行均是空数据 // 则忽视改行(因为本人的工作内容不须要空行)-_- DataRow dr = dt.NewRow(); int i = 0; int nullRowCount = i; Dictionary<int, string> columnReference = new Dictionary<int, string>(); columnReference.Add(0, "A"); columnReference.Add(1, "B"); columnReference.Add(2, "C"); columnReference.Add(3, "D"); columnReference.Add(4, "E"); columnReference.Add(5, "F"); columnReference.Add(6, "G"); columnReference.Add(7, "H"); for (int columnIndex = 0; columnIndex < dt.Columns.Count; columnIndex++) { Cell cell = row.Elements<Cell>().FirstOrDefault(c => c.CellReference == $"{columnReference[columnIndex]}{row.RowIndex}");//例如:c.CellReference =="A12" string cellVal = null; if (cell != null) { cellVal = GetCellValue(cell, workbookPart); } if (string.IsNullOrEmpty(cellVal)) { nullRowCount++; } dr[i] = cellVal; i++; } if (nullRowCount != i) { dt.Rows.Add(dr); } } /// <summary> /// 获取单位格的值 /// </summary> /// <param name="cell">单元格</param> /// <param name="workbookPart"></param> /// <param name="type">1 不去空格 2 前后空格 3 所有空格 </param> /// <returns></returns> public static string GetCellValue(Cell cell, WorkbookPart workbookPart, int type = 2) { //合并单元格不做处理 if (cell.CellValue == null) return string.Empty; string cellInnerText = cell.CellValue.InnerXml; //纯字符串 if (cell.DataType != null && (cell.DataType.Value == CellValues.SharedString || cell.DataType.Value == CellValues.String || cell.DataType.Value == CellValues.Number)) { //获取spreadsheetDocument中共享的数据 SharedStringTable stringTable = workbookPart.SharedStringTablePart.SharedStringTable; //如果共享字符串表丢失,则说明出了问题。 if (!stringTable.Any()) return string.Empty; string text = stringTable.ElementAt(int.Parse(cellInnerText)).InnerText; if (type == 2) return text.Trim(); else if (type == 3) return text.Replace(" ", ""); else return text; } //bool类型 else if (cell.DataType != null && cell.DataType.Value == CellValues.Boolean) { return (cellInnerText != "0").ToString().ToUpper(); } //数字格式代码(numFmtId)小于164是内置的:https://www.it1352.com/736329.html else { //为空为数值 if (cell.StyleIndex == null) return cellInnerText; Stylesheet styleSheet = workbookPart.WorkbookStylesPart.Stylesheet; CellFormat cellFormat = (CellFormat)styleSheet.CellFormats.ChildElements[(int)cell.StyleIndex.Value]; uint formatId = cellFormat.NumberFormatId.Value; double doubleTime;//OLE 自动化日期值 DateTime dateTime;//yyyy/MM/dd HH:mm:ss switch (formatId) { case 0://常规 return cellInnerText; case 9://百分比【0%】 case 10://百分比【0.00%】 case 11://科学计数【1.00E+02】 case 12://分数【1/2】 return cellInnerText; case 14: doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("yyyy/MM/dd"); //case 15: //case 16: case 17: doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("yyyy/MM"); //case 18: //case 19: case 20: doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("H:mm"); case 21: doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("HH:mm:ss"); case 22: doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("yyyy/MM/dd HH:mm"); //case 45: //case 46: case 47: doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("yyyy/MM/dd"); case 58://【中国】11月11日 doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("MM/dd"); case 176://【中国】2020年11月11日 doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("yyyy/MM/dd"); case 177://【中国】11:22:00 doubleTime = double.Parse(cellInnerText); dateTime = DateTime.FromOADate(doubleTime); return dateTime.ToString("HH:mm:ss"); default: return cellInnerText; } } } /// <summary> /// 获取单位格的值 /// </summary> /// <param name="cell"></param> /// <param name="stringTablePart"></param> /// <returns></returns> private string GetValue(Cell cell, SharedStringTable stringTable) { //因为Excel的数据存储在SharedStringTable中,须要获取数据在SharedStringTable 中的索引 string value = string.Empty; try { if (cell.ChildElements.Count == 0) return value; value = double.Parse(cell.CellValue.InnerText).ToString(); if (cell.DataType != null) { switch (cell.DataType.Value) { case CellValues.SharedString: value = stringTable.ChildElements[Int32.Parse(value)].InnerText; break; } } } catch (Exception ex) { value = "N/A"; } return value; } /// <summary> /// 判断网格是否存在列 /// </summary> /// <param name="dt">网格</param> /// <param name="columnName">列名</param> /// <returns></returns> public bool IsContainsColumn(DataTable dt, string columnName) { if (dt == null || columnName == null) { return false; } return dt.Columns.Contains(columnName); } public static void ConvertToDateTime(ref DataTable dt, string columnNm, string dtFormat) { int findLoca_Old = dt.Columns.IndexOf(columnNm); DataColumn newColumn = new DataColumn(System.Guid.NewGuid().ToString(), typeof(String)); string newColumnNm = newColumn.ColumnName; dt.Columns.Add(newColumn); newColumn.SetOrdinal(findLoca_Old + 1); foreach (DataRow row in dt.Rows) { try { double val = Convert.ToDouble(row[columnNm]); row[newColumnNm] = DateTime.FromOADate(val).ToString(dtFormat); } catch (Exception ex) { ; } } dt.Columns.RemoveAt(findLoca_Old); newColumn.ColumnName = columnNm; } } }
到此这篇关于C#调用OpenXml读取excel行数据的文章就介绍到这了,更多相关C# OpenXml读取excel行数据内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!
最新评论