NPOI操作EXCEL(三)——反射机制进行excel表格数据的解析

我们先来回忆回忆上篇文章讲到的通过xml配置文件实现excel批量模板解析的整体思路:

1.对每个excel模板制定xml配置规则集,实现xml配置文件的解析服务

2.为每个excel模板制定DTO,继承于一个BaseDTO

3.实现两个工厂方法,一是获取某excel模板xml配置文件路径;一是获取某excel模板DTO对象

4.EXCEL工具类对外暴露两个接口,一是数据验证接口,返回整个excel所有错误单元格信息;一是数据解析接口,读取单元格数据到DTO。通过构造方法传入配置文件

5.整体流程:用户上传excel文件,首先存储到临时文件夹,其次获取xml配置文件路径,获取DTO对象,然后初始化excel工具类,再验证excel单元格数据,解析数据返回DTO,最后调用入库服务实现DTO数据入库

接下来我们就来看看excel工具类的具体实现代码

     public class ExcelImportService : ExcelAnalyzeService, IExcelImportService
{
private string _filePath;
private string _xmlPath;
private Dictionary<int, int> _rowCount = new Dictionary<int, int>();
private List<Regular> _list;// 规则集 /// <summary>
/// 构造方法
/// </summary>
/// <param name="filePath">excel文件路径</param>
/// <param name="xmlPath">配置文件路径</param>
public ExcelImportService(string filePath, string xmlPath)
{
_filePath = filePath;
_xmlPath = xmlPath;
_list = this.GetXMLInfo(_xmlPath);
}

// excel所有单元格数据验证
public UploadExcelFileResult ValidateExcel()
{
var result = new UploadExcelFileResult();
result.Success = true; _rowCount = new Dictionary<int, int>(); Stream fileStream = new FileStream(_filePath, FileMode.Open);
int edition = this.GetExcelEdition(_filePath);
if (edition != )
{
IWorkbook workbook = this.CreateWorkBook(edition, fileStream);
int sheetCount = _list.Find(e => e.HeaderRegular != null).HeaderRegular["sheetCount"]; for (int i = ; i < sheetCount; i++)
{
ISheet sheet = workbook.GetSheetAt(i);
Dictionary<int, string> dict = this.GetExcelHeaders(sheet, ref result, _list);
if (result.Success)
{
_rowCount.Add(i, sheet.LastRowNum);
result = this.CheckExcelDatasEnableNull(sheet, _list, dict, _rowCount[i]);
}
else
{
break;
}
}
}
else
{
result.Success = false;
result.Message = "文件类型错误!";
} fileStream.Close();
return result;
}

// 解析excel数据到DTO
public List<TableDTO> Import<TableDTO>()
{
var uploadExcelFileResult = new UploadExcelFileResult();
var resultList = new List<TableDTO>(); Stream fileStream = new FileStream(_filePath, FileMode.Open);
int edition = this.GetExcelEdition(_filePath);
IWorkbook workbook = this.CreateWorkBook(edition, fileStream);
int sheetCount = _list.Find(e => e.HeaderRegular != null).HeaderRegular["sheetCount"]; for (int i = ; i < sheetCount; i++)
{
ISheet sheet = workbook.GetSheetAt(i);
string sheetName = sheet.SheetName;
Dictionary<int, string> dict = this.GetExcelHeaders(sheet, ref uploadExcelFileResult, _list);
var sheetLists = this.GetExcelDatas<TableDTO>(sheet, sheetName, _list, dict, _rowCount[i]);
resultList.AddRange(sheetLists);
} fileStream.Close();
return resultList;
}
}

1.我们看到17行用到了GetXMLInfo()方法,就是第二篇文章中说到的XML文件解析方法,返回该excel的规则集

2.第28行GetExcelEdition()方法,是基础解析接口IExcelAnalyzeService的方法,验证返回excel版本

         public int GetExcelEdition(string fileName)
{
var edition = ;
string[] items = fileName.Split(new char[] { '.' });
int count = items.Length;
switch (items[count - ])
{
case "xls":
edition = ;
break;
case "xlsx":
edition = ;
break;
default:
break;
} return edition;
}

3.第31行CreateWorkBook()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel工作簿对象

         public IWorkbook CreateWorkBook(int edition, Stream  excelFileStream)
{
switch (edition)
{
case :
return new XSSFWorkbook(excelFileStream);
case :
return new HSSFWorkbook(excelFileStream);
default:
return null;
}
}

4.第32行是读取配置文件中excel中sheet个数(例如员工模板:我们支持一个excel文件多个sheet表单,可以是每个表单代表一个地区等等)

5.第37行中GetExcelHeaders()方法,是基础解析接口IExcelAnalyzeService的方法,验证返回excel表头数据

         public Dictionary<int, string> GetExcelHeaders(ISheet sheet, ref UploadExcelFileResult uploadExcelFileResult,
List<Regular> list)
{
int firstHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["firstHeaderRow"];
int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"]; var dict = new Dictionary<int, string>(); try
{
// 循环获得表头
for (int i = firstHeaderRowIndex - ; i < lastHeaderRowIndex; i++)
{
IRow headerRow = sheet.GetRow(i);
int cellCount = headerRow.LastCellNum; for (int j = headerRow.FirstCellNum; j < cellCount; j++)
{
if (!string.IsNullOrEmpty(headerRow.GetCell(j).StringCellValue.Trim()))
{
// 根据 键-值 是否已存在做不同处理
//TODO 代码待重构!!!
try
{
string oldValue = dict[j];
dict.Remove(j);
dict.Add(j, oldValue + headerRow.GetCell(j).StringCellValue.Trim());
}
catch (Exception)
{
dict.Add(j, headerRow.GetCell(j).StringCellValue.Trim());
}
}
}
}
// 遍历表头字典,消除空格
for (int i = ; i < dict.Count; i++)
{
var value = dict[i];
this.ReplaceSpace(ref value);
dict[i] = value;
}
// 检查表头模板是否被修改
for (int count = ; count < dict.Count; count++)
{
Regular header = list.Find(h => h.HeaderText == dict[count]); if (header == null)
{
uploadExcelFileResult.Success = false;
uploadExcelFileResult.Message = "读取EXCEL表头模板时发生错误,可能造成原因是:EXCEL模板被修改!请下载最新EXCEL模板!";
}
}
}
catch (Exception e)
{
uploadExcelFileResult.Success = false;
uploadExcelFileResult.Message = "读取EXCEL表头模板时发生错误,可能造成原因是:EXCEL模板被修改!请下载最新EXCEL模板!";
} return dict;
}

其中39行ReplaceSpace()是消除字符串中空格方法(所有半角、全角)。一直想通过正则表达式来做,但是没学到家,还没写好能够做到的正则表达式,所以写的有点复杂,若果谁有这样的正则表达式,请指点一二,感激不尽!!!

         // 去除空值
public void ReplaceSpace(ref string cellValue)
{
cellValue = TruncateString(cellValue, new char[] { ' ' }, new char[] { ' ' });
} // 对字符串做空格剔除处理
private string TruncateString(string originalWord, char[] spiltWord1, char[] spiltWord2)
{
var result = "";
var valueReplaceDbcCase = originalWord.Split(spiltWord1); if (valueReplaceDbcCase.Count() > )
{
for (int i = ; i < valueReplaceDbcCase.Count(); i++)
{
if (valueReplaceDbcCase[i] != "" && valueReplaceDbcCase[i] != " " &&
valueReplaceDbcCase[i] != " ")
{
result += TruncateString(valueReplaceDbcCase[i], spiltWord2, new char[]);
}
}
}
else
{
if (spiltWord2.Any())
{
result = TruncateString(originalWord, spiltWord2, new char[]);
}
else
{
result = originalWord;
}
} return result;
}

6.第41行CheckExcelDatasEnableNull()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel数据验证结果

         public UploadExcelFileResult CheckExcelDatasEnableNull(ISheet sheet, List<Regular> list, Dictionary<int, string> dict, int rowCount)
{
var result = new UploadExcelFileResult();
result.Success = true; // 记录单个sheet所有错误信息
var sheetErrors = new List<ExcelFileErrorPosition>();
// 表头结束行
int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"]; // 循环行数据
for (int i = lastHeaderRowIndex; i <= rowCount; i++)
{
// 标注该行是否出错
bool isrowfalse = false;
// 记录该行数据临时对象
var rowDatas = new List<string>();
// 记录该行错误列
var rowErrorCell = new List<int>();
// 记录该行错误列具体错误信息
var rowErrorMessages = new List<string>();
// 记录该行空值数
int nullcount = ; IRow dataRow = sheet.GetRow(i);
int cellCount = dict.Count; // 循环列数据
for (int j = dataRow.FirstCellNum; j < cellCount; j++)
{
string value = "";
Regular header = list.Find(h => h.HeaderText == dict[j]);
//value = dataRow.GetCell(j).ToString();
switch (dataRow.GetCell(j).CellType)
{
case CellType.Formula:
value = dataRow.GetCell(j).StringCellValue.ToString();
break;
default:
value = dataRow.GetCell(j).ToString();
break;
} // 记录可能出错数据
rowDatas.Add(value); // 检查空值
if (!this.CheckNull(value, ref nullcount))
{
// 检查类型
if (!this.CheckDataType(header.DataType, value))
{
isrowfalse = true;
result.Success = false;
// 记录该行错误信息
rowErrorCell.Add(j + );
rowErrorMessages.Add("读取EXCEL数据时发生数据格式错误,请检查该行该列数据格式!");
}
else
{
if (header.DataType == "System.string" || header.DataType == "System.String")
{
this.ReplaceSpace(ref value);
}
}
}
}
// 报错处理(空行不报错)
if (isrowfalse && nullcount < cellCount)
{
sheetErrors.Add(new ExcelFileErrorPosition
{
RowContent = rowDatas,
RowIndex = i + ,
CellIndex = rowErrorCell,
ErrorMessage = rowErrorMessages
});
}
}
result.ExcelFileErrorPositions = sheetErrors;
return result;
}

CheckNull()检查空值,是空值则nullcount++;

7.第74行GetExcelDatas()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel数据解析结果

         public List<TableDTO> GetExcelDatas<TableDTO>(ISheet sheet, string sheetName, List<Regular> list,
Dictionary<int, string> dict, int rowCount)
{
// 返回数据对象集合
var resultList = new List<TableDTO>();
// 表头结束行
int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"]; // 循环行数据
for (int i = lastHeaderRowIndex; i <= rowCount; i++)
{
// 产生一个新的泛型对象
var model = Activator.CreateInstance<TableDTO>();
// 记录该行空值数
int nullcount = ; IRow dataRow = sheet.GetRow(i);
int cellCount = dict.Count; if (dataRow != null)
{
// 循环列数据
for (int j = dataRow.FirstCellNum; j < cellCount; j++)
{
string value = "";
Regular header = list.Find(h => h.HeaderText == dict[j]);
PropertyInfo prop = model.GetType().GetProperty(header.PropertyName);
//value = dataRow.GetCell(j).ToString();
switch (dataRow.GetCell(j).CellType)
{
case CellType.Formula:
value = dataRow.GetCell(j).StringCellValue.ToString();
break;
default:
value = dataRow.GetCell(j).ToString();
break;
} // 去除空值
this.ReplaceSpace(ref value); if (value == "")
{
nullcount++;
} // 赋值
switch (header.DataType)
{
case "System.double":
double valueDecimal;
if (double.TryParse(value, out valueDecimal))
{
prop.SetValue(model, valueDecimal, null);
}
break;
case "System.Int16":
short valueInt16;
if (Int16.TryParse(value, out valueInt16))
{
prop.SetValue(model, valueInt16, null);
}
break;
case "System.Int32":
int valueInt32;
if (Int32.TryParse(value, out valueInt32))
{
prop.SetValue(model, valueInt32, null);
}
break;
case "System.Boolean":
bool valueBoolean;
if (Boolean.TryParse(value, out valueBoolean))
{
prop.SetValue(model, valueBoolean, null);
}
break;
case "System.DateTime":
DateTime valueDateTime;
if (DateTime.TryParse(value, out valueDateTime))
{
prop.SetValue(model, valueDateTime, null);
}
break;
default:
prop.SetValue(model, value, null);
break;
}
}
} // 添加非空行数据到DTO
if (nullcount < cellCount)
{
resultList.Add(model);
}
} return resultList;
}

OK,整体流程中所有代码都贴出来了,写得比较匆忙,如有不当的地方,请大家不吝赐教~~~

附:

博主的需求中还有非单行的复杂表头,涉及到合并单元格表头,甚至左表头等等复杂excel模板,如:

NPOI操作EXCEL(三)——反射机制进行excel表格数据的解析

NPOI操作EXCEL(三)——反射机制进行excel表格数据的解析

NPOI操作EXCEL(三)——反射机制进行excel表格数据的解析

......

这部分excel的解析简直就是非人类的需求,如有需要,会在后续博文继续贴出相关代码,请多多支持....

原创文章,代码都是从自己项目里贴出来的。转载请注明出处哦,亲~~~

上一篇:效率最高的Excel数据导入---(c#调用SSIS Package将数据库数据导入到Excel文件中【附源代码下载】) 转


下一篇:temp-重庆银行