我正在阅读使用javaScript上传的.csv / xlsx文件,并将结果作为包含每行的数组.我能够使用FileReader和SheetJs读取文件并使用以下代码获取数据.
// code for the new excel reader
$scope.do_file = function(files)
{
$scope.fileContent = [];
var X = XLSX;
var global_wb;
var f = files[0];
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;console.log(data);
global_wb = X.read(data, {type: 'array'});
var output = "";
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
};
对于阅读代码工作的大多数文件,但是当包含带有Windows-1255编码的希伯来文本的文件时,我会得到乱码数据.
寻找更多选项我试图使用reader.readAsText将文件作为文本读取并根据需要更改编码,请检查以下代码:
function is_Hebrew(data)
{
var position = data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
$scope.do_file = function(files)
{
var fullResult = [];
var file =files[0];
var reader = new FileReader();
reader.onload = function(e){
var data = e.target.result;
if(!is_Hebrew(data.toString()))
{
reader.readAsText(file,'ISO-8859-8');
}
};
reader.readAsText(file);
reader.onloadend = function(){
var lines = reader.result.split('\r\n');
console.log(lines);
lines.forEach(element => {
var cell = element.split(',');
fullResult.push(cell);
});
console.log(reader);
};
};
但是上面的代码不合适,因为它不会读取文件,因为每行标识每个单元格.如果任何一个单元格包含具有逗号分隔值的字符串(例如,如果单元格包含字符串值,例如“25,28,29”),则数组输出会将每个值视为每个单元格时提供错误的数据.
所以我决定坚持使用第一种方法,但我无法改变编码.是否有可能在第一个代码中更改编码,我使用readAsArrayBuffer来读取文件数据?
解决方法:
经过大量可能的解决方案后,我发现上述问题的答案是结合上述两种方法.第一种读取xlsx文件的方法和第二种读取csv文件的方法.此外,我在第二种方法中使用了另一个名为papaparse的javaScript库来解决每个单元格中读取数据的问题
$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
// code for the new excel reader
$scope.do_file = function(files)
{
var config = {
delimiter: "", // auto-detect
newline: "", // auto-detect
quoteChar: '"',
escapeChar: '"',
header: false,
trimHeader: false,
dynamicTyping: false,
preview: 0,
encoding: "",
worker: false,
comments: false,
step: undefined,
complete: undefined,
error: undefined,
download: false,
skipEmptyLines: false,
chunk: undefined,
fastMode: undefined,
beforeFirstChunk: undefined,
withCredentials: undefined
};
$scope.fileContent = [];
var f = files[0];
var fileExtension = f.name.replace(/^.*\./, '');
if(fileExtension == 'xlsx')
{
var X = XLSX;
var global_wb;
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
global_wb = X.read(data, {type: 'array'});
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
}
else if(fileExtension == 'csv')
{
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
console.log(f);
console.log($scope.is_Hebrew(data.toString()));
if(!$scope.is_Hebrew(data.toString()))
{
reader.readAsText(f,'ISO-8859-8');
}
};
reader.readAsText(f);
reader.onloadend = function(e){
var c = Papa.parse(reader.result,[ config])
console.log(c);
$scope.fileContent = c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
};
}
else
{
alert("File Not supported!");
}
$scope.fileContent.push([]);
};