无法将 pandas 数据帧转换为 Tensorflow 2 数据集

问题描述 投票:0回答:1

包含数据 (bank-full.csv) 的 csv 文件由 google 在以下地址提供:https://drive.google.com/drive/folders/1cNtP4iDyGhF620ZbmJdmJWYQrRgJTCum

我的代码如下:

bank_dataframe = pd.read_csv('bank-full.csv', delimiter=';')
features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
            'loan', 'contact', 'campaign', 'pdays', 'poutcome']
labels = ['y']

bank_dataframe = bank_dataframe.filter(features + labels)
from sklearn.preprocessing import LabelBinarizer

encoder = LabelBinarizer()
categorical_features = ['default', 'housing', 'job', 'loan', 'education', 'contact', 'poutcome']

for feature in categorical_features:
    bank_dataframe[feature] = tuple(encoder.fit_transform(bank_dataframe[feature]))

bank_dataset = Dataset.from_tensor_slices(bank_dataframe)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\util\structure.py in normalize_element(element)
     92       try:
---> 93         spec = type_spec_from_value(t, use_fallback=False)
     94       except TypeError:

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\util\structure.py in type_spec_from_value(element, use_fallback)
    465   raise TypeError("Could not build a TypeSpec for %r with type %s" %
--> 466                   (element, type(element).__name__))
    467 

TypeError: Could not build a TypeSpec for        age                                   job  marital     education  \
0       26  (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)   single  (0, 0, 1, 0)   
1       37  (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)   single  (0, 0, 1, 0)   
2       31  (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)   single  (0, 1, 0, 0)   
3       47  (0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  married  (0, 1, 0, 0)   
4       36  (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)  married  (0, 1, 0, 0)   
...    ...                                   ...      ...           ...   
45206   51  (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  married  (0, 1, 0, 0)   
45207   59  (0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0)  married  (0, 0, 1, 0)   
45208   29  (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)  married  (0, 1, 0, 0)   
45209   43  (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)  married  (0, 0, 1, 0)   
45210   51  (0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0)   single  (0, 1, 0, 0)   

      default  balance housing  loan    contact  campaign  pdays  \
0        (0,)     2786    (0,)  (0,)  (1, 0, 0)         2     72   
1        (0,)      331    (1,)  (0,)  (1, 0, 0)         3     -1   
2        (0,)       92    (1,)  (0,)  (1, 0, 0)         2     -1   
3        (0,)     1568    (1,)  (0,)  (1, 0, 0)         1    262   
4        (0,)       24    (1,)  (0,)  (1, 0, 0)         1    154   
...       ...      ...     ...   ...        ...       ...    ...   
45206    (0,)      423    (1,)  (1,)  (1, 0, 0)         1     90   
45207    (0,)     3800    (0,)  (1,)  (1, 0, 0)         1     -1   
45208    (0,)       65    (1,)  (0,)  (1, 0, 0)        14     -1   
45209    (0,)      241    (0,)  (0,)  (1, 0, 0)        10     -1   
45210    (0,)      516    (1,)  (0,)  (1, 0, 0)         1    363   

           poutcome    y  
0      (0, 0, 1, 0)  yes  
1      (0, 0, 0, 1)   no  
2      (0, 0, 0, 1)   no  
3      (0, 0, 1, 0)  yes  
4      (1, 0, 0, 0)   no  
...             ...  ...  
45206  (1, 0, 0, 0)   no  
45207  (0, 0, 0, 1)   no  
45208  (0, 0, 0, 1)   no  
45209  (0, 0, 0, 1)   no  
45210  (1, 0, 0, 0)   no  

[45211 rows x 13 columns] with type DataFrame

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-83-d5b55dc9ba50> in <module>
      1 # Convert the DataFrame to a Dataset
      2 
----> 3 bank_dataset = Dataset.from_tensor_slices(bank_dataframe)

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\ops\dataset_ops.py in from_tensor_slices(tensors)
    680       Dataset: A `Dataset`.
    681     """
--> 682     return TensorSliceDataset(tensors)
    683 
    684   class _GeneratorState(object):

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\ops\dataset_ops.py in __init__(self, element)
   2999   def __init__(self, element):
   3000     """See `Dataset.from_tensor_slices()` for details."""
-> 3001     element = structure.normalize_element(element)
   3002     batched_spec = structure.type_spec_from_value(element)
   3003     self._tensors = structure.to_batched_tensor_list(batched_spec, element)

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\data\util\structure.py in normalize_element(element)
     96         # the value. As a fallback try converting the value to a tensor.
     97         normalized_components.append(
---> 98             ops.convert_to_tensor(t, name="component_%d" % i))
     99       else:
    100         if isinstance(spec, sparse_tensor.SparseTensorSpec):

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
   1497 
   1498     if ret is None:
-> 1499       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1500 
   1501     if ret is NotImplemented:

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    336                                          as_ref=False):
    337   _ = as_ref
--> 338   return constant(v, dtype=dtype, name=name)
    339 
    340 

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in constant(value, dtype, shape, name)
    262   """
    263   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 264                         allow_broadcast=True)
    265 
    266 

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    273       with trace.Trace("tf.constant"):
    274         return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
--> 275     return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
    276 
    277   g = ops.get_default_graph()

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
    298 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape):
    299   """Implementation of eager constant."""
--> 300   t = convert_to_eager_tensor(value, ctx, dtype)
    301   if shape is None:
    302     return t

~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
     96       dtype = dtypes.as_dtype(dtype).as_datatype_enum
     97   ctx.ensure_initialized()
---> 98   return ops.EagerTensor(value, ctx.device_name, dtype)
     99 
    100 

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).

如果我从消息中很好地理解,Tensorflow 很难识别某些数据类型并将其转换为张量。是什么产生了这个问题,如何纠正?

python-3.x tensorflow2.0 tensorflow-datasets
1个回答
0
投票

试试这个代码:

import pandas as pd
bank_dataframe = pd.read_csv('bank.csv', delimiter=';')
features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
            'loan', 'contact', 'campaign', 'pdays', 'poutcome']
labels = ['y']

bank_dataframe = bank_dataframe.filter(features + labels)
encoder = LabelBinarizer()
categorical_features = ['default', 'housing', 'job', 
                        'loan', 'education', 'contact', 'poutcome', 
                       'marital', 'y'] 
# Remove 'y' if you need to.
# But don't forget to use get_dummies on it some other time
# otherwise you will need another way to turn it into a tf.data.Dataset

bank_dataframe = pd.get_dummies(data=bank_dataframe,
                                columns=categorical_features)

bank_dataset = tf.data.Dataset.from_tensor_slices(bank_dataframe)

不要使用for循环进行one-hot编码。使用内置的 pandas get_dummies 可以在一行中为您完成此操作。为此,您不需要 LabelBinarizer。

有不清楚的地方可以询问我。

© www.soinside.com 2019 - 2024. All rights reserved.