使用pandas读取csv:
BPIC_2017_df = pd.read_csv('./datasets_csv/BPI_Challenge_2017.csv', index_col=0)
但是由于csv数据集太大了,读取报错:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-58-93042d10a770> in <module>
2
----> 3 BPIC_2017_df = pd.read_csv('./datasets_csv/BPI_Challenge_2017.csv', index_col=0)
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
684 )
685
--> 686 return _read(filepath_or_buffer, kwds)
687
688
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
456
457 try:
--> 458 data = parser.read(nrows)
459 finally:
460 parser.close()
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
1194 def read(self, nrows=None):
1195 nrows = _validate_integer("nrows", nrows)
-> 1196 ret = self._engine.read(nrows)
1197
1198 # May alter columns / col_dict
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
2229
2230 names, data = self._do_date_conversions(names, data)
-> 2231 index, names = self._make_index(data, alldata, names)
2232
2233 # maybe create a mi on the columns
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in _make_index(self, data, alldata, columns, indexnamerow)
1675 elif not self._has_complex_date_col:
1676 index = self._get_simple_index(alldata, columns)
-> 1677 index = self._agg_index(index)
1678 elif self._has_complex_date_col:
1679 if not self._name_processed:
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in _agg_index(self, index, try_parse_dates)
1768 )
1769
-> 1770 arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
1771 arrays.append(arr)
1772
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\io\parsers.py in _infer_types(self, values, na_values, try_num_bool)
1869 na_count = 0
1870 if issubclass(values.dtype.type, (np.number, np.bool_)):
-> 1871 mask = algorithms.isin(values, list(na_values))
1872 na_count = mask.sum()
1873 if na_count > 0:
E:\anaconda3\envs\tf_keras\lib\site-packages\pandas\core\algorithms.py in isin(comps, values)
441 # If the the values include nan we need to check for nan explicitly
442 # since np.nan it not equal to np.nan
--> 443 if np.isnan(values).any():
444 f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
445 else:
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
具体错误为:
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
我的解决方法是把
BPIC_2017_df = pd.read_csv('./datasets_csv/BPI_Challenge_2017.csv', index_col=0)
中的", index_col=0"这个设置去掉,读取成功!!!