包含数据 (bank-full.csv) 的 csv 文件由 google 在以下地址提供:https://drive.google.com/drive/folders/1cNtP4iDyGhF620ZbmJdmJWYQrRgJTCum
我的代码如下:
bank_dataframe = pd.read_csv('bank-full.csv', delimiter=';')
features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
'loan', 'contact', 'campaign', 'pdays', 'poutcome']
labels = ['y']
bank_dataframe = bank_dataframe.filter(features + labels)
from sklearn.preprocessing import LabelBinarizer
encoder = LabelBinarizer()
categorical_features = ['default', 'housing', 'job', 'loan', 'education', 'contact', 'poutcome']
for feature in categorical_features:
bank_dataframe[feature] = tuple(encoder.fit_transform(bank_dataframe[feature]))
bank_dataset = Dataset.from_tensor_slices(bank_dataframe)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\util\structure.py in normalize_element(element)
92 try:
---> 93 spec = type_spec_from_value(t, use_fallback=False)
94 except TypeError:
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\util\structure.py in type_spec_from_value(element, use_fallback)
465 raise TypeError("Could not build a TypeSpec for %r with type %s" %
--> 466 (element, type(element).__name__))
467
TypeError: Could not build a TypeSpec for age job marital education \
0 26 (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0) single (0, 0, 1, 0)
1 37 (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0) single (0, 0, 1, 0)
2 31 (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) single (0, 1, 0, 0)
3 47 (0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) married (0, 1, 0, 0)
4 36 (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0) married (0, 1, 0, 0)
... ... ... ... ...
45206 51 (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) married (0, 1, 0, 0)
45207 59 (0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0) married (0, 0, 1, 0)
45208 29 (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0) married (0, 1, 0, 0)
45209 43 (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0) married (0, 0, 1, 0)
45210 51 (0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0) single (0, 1, 0, 0)
default balance housing loan contact campaign pdays \
0 (0,) 2786 (0,) (0,) (1, 0, 0) 2 72
1 (0,) 331 (1,) (0,) (1, 0, 0) 3 -1
2 (0,) 92 (1,) (0,) (1, 0, 0) 2 -1
3 (0,) 1568 (1,) (0,) (1, 0, 0) 1 262
4 (0,) 24 (1,) (0,) (1, 0, 0) 1 154
... ... ... ... ... ... ... ...
45206 (0,) 423 (1,) (1,) (1, 0, 0) 1 90
45207 (0,) 3800 (0,) (1,) (1, 0, 0) 1 -1
45208 (0,) 65 (1,) (0,) (1, 0, 0) 14 -1
45209 (0,) 241 (0,) (0,) (1, 0, 0) 10 -1
45210 (0,) 516 (1,) (0,) (1, 0, 0) 1 363
poutcome y
0 (0, 0, 1, 0) yes
1 (0, 0, 0, 1) no
2 (0, 0, 0, 1) no
3 (0, 0, 1, 0) yes
4 (1, 0, 0, 0) no
... ... ...
45206 (1, 0, 0, 0) no
45207 (0, 0, 0, 1) no
45208 (0, 0, 0, 1) no
45209 (0, 0, 0, 1) no
45210 (1, 0, 0, 0) no
[45211 rows x 13 columns] with type DataFrame
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-83-d5b55dc9ba50> in <module>
1 # Convert the DataFrame to a Dataset
2
----> 3 bank_dataset = Dataset.from_tensor_slices(bank_dataframe)
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\ops\dataset_ops.py in from_tensor_slices(tensors)
680 Dataset: A `Dataset`.
681 """
--> 682 return TensorSliceDataset(tensors)
683
684 class _GeneratorState(object):
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\ops\dataset_ops.py in __init__(self, element)
2999 def __init__(self, element):
3000 """See `Dataset.from_tensor_slices()` for details."""
-> 3001 element = structure.normalize_element(element)
3002 batched_spec = structure.type_spec_from_value(element)
3003 self._tensors = structure.to_batched_tensor_list(batched_spec, element)
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\util\structure.py in normalize_element(element)
96 # the value. As a fallback try converting the value to a tensor.
97 normalized_components.append(
---> 98 ops.convert_to_tensor(t, name="component_%d" % i))
99 else:
100 if isinstance(spec, sparse_tensor.SparseTensorSpec):
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1497
1498 if ret is None:
-> 1499 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1500
1501 if ret is NotImplemented:
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
336 as_ref=False):
337 _ = as_ref
--> 338 return constant(v, dtype=dtype, name=name)
339
340
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in constant(value, dtype, shape, name)
262 """
263 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 264 allow_broadcast=True)
265
266
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
273 with trace.Trace("tf.constant"):
274 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
--> 275 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
276
277 g = ops.get_default_graph()
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
298 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape):
299 """Implementation of eager constant."""
--> 300 t = convert_to_eager_tensor(value, ctx, dtype)
301 if shape is None:
302 return t
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value, ctx.device_name, dtype)
99
100
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).
如果我从消息中很好地理解,Tensorflow 很难识别某些数据类型并将其转换为张量。是什么产生了这个问题,如何纠正?
试试这个代码:
import pandas as pd
bank_dataframe = pd.read_csv('bank.csv', delimiter=';')
features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
'loan', 'contact', 'campaign', 'pdays', 'poutcome']
labels = ['y']
bank_dataframe = bank_dataframe.filter(features + labels)
encoder = LabelBinarizer()
categorical_features = ['default', 'housing', 'job',
'loan', 'education', 'contact', 'poutcome',
'marital', 'y']
# Remove 'y' if you need to.
# But don't forget to use get_dummies on it some other time
# otherwise you will need another way to turn it into a tf.data.Dataset
bank_dataframe = pd.get_dummies(data=bank_dataframe,
columns=categorical_features)
bank_dataset = tf.data.Dataset.from_tensor_slices(bank_dataframe)
不要使用for循环进行one-hot编码。使用内置的 pandas get_dummies 可以在一行中为您完成此操作。为此,您不需要 LabelBinarizer。
有不清楚的地方可以询问我。