TypeError：将输入绑定到 tf.function 失败，无法将 input_tensor TensorSpec 转换为 TensorSpec

Question

错误信息：

Traceback (most recent call last):
  File "/home/prakrisht/-------------/detect_from_webcam.py", line 173, in <module>
    run_inference(detection_model, category_index, cap)
  File "/home/prakrisht/----------------/detect_from_webcam.py", line 60, in run_inference
    output_dict = run_inference_for_single_image(model, image_np)
  File "/home/prakrisht/-----------------/detect_from_webcam.py", line 31, in run_inference_for_single_image
    output_dict = model(input_tensor)
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/saved_model/load.py", line 816, in _call_attribute
    return instance.__call__(*args, **kwargs)
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/prakrisht/anaconda3/envs/me_test/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/function_type_utils.py", line 446, in bind_function_inputs
    raise TypeError(
TypeError: Binding inputs to tf.function failed due to `Can not cast TensorSpec(shape=(1,), dtype=tf.float32, name='input_tensor') to TensorSpec(shape=(1, None, None, 3), dtype=tf.uint8, name='input_tensor')`. Received args: (<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>,) and kwargs: {} for signature: (input_tensor: TensorSpec(shape=(1, None, None, 3), dtype=tf.uint8, name='input_tensor')).

检测代码：摘要：

我们对摄像头输入进行推理
使用在自定义数据集上训练的 resnet101 模型
基本代码与图像完美配合
前段时间使用网络摄像头也可以正常工作，但现在不再工作了。

import numpy as np
import argparse
import tensorflow as tf
import cv2

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile


def load_model(model_path):
    model = tf.saved_model.load(model_path)

    return model


def run_inference_for_single_image(model, image):
    image = np.asarray(image).astype(np.float32)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]
    
    # Run inference
    output_dict = model(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key: value[0, :num_detections].numpy()
                   for key, value in output_dict.items()}
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
   
    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                                    output_dict['detection_masks'], output_dict['detection_boxes'],
                                    image.shape[0], image.shape[1])      
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    
    return output_dict


def run_inference(model, category_index, cap):
    while True:
        ret, image_np = cap.read()
        # Actual detection.
        output_dict = run_inference_for_single_image(model, image_np)
        # Visualization of the results of a detection.
        #print(output_dict)
        draw_boxes_on_image(
            image_np,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            #instance_masks=output_dict.get('detection_masks_reframed', None),
            #use_normalized_coordinates=True,
            line_thickness=8,
            font_scale=1)
        cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            break

import cv2
import numpy as np

def draw_boxes_on_image(image_np, boxes, classes, scores, category_index, 
                        threshold=0.5, line_thickness=8, font=cv2.FONT_HERSHEY_SIMPLEX, 
                        font_scale=0.7, font_thickness=2):
    """
    Draw bounding boxes on the image with class-specific colors, showing labels with the highest score on top.
    Alternate label placement between the left and right sides of the bounding box.

    Args:
    - image_np: Numpy array of the image.
    - boxes: Array of bounding boxes, with shape [N, 4].
    - classes: Array of class indices corresponding to the boxes.
    - scores: Array of confidence scores corresponding to the boxes.
    - category_index: Dictionary mapping class indices to class names.
    - threshold: Confidence threshold for displaying the bounding box.
    - line_thickness: Thickness of the bounding box lines.
    - font: Font type for the label.
    - font_scale: Scale (size) of the font for the label.
    - font_thickness: Thickness of the font for the label.
    """
    # Assign a unique color for each class
    colors = {}
    np.random.seed(42)  # For reproducibility
    for class_id in np.unique(classes):
        colors[class_id] = tuple(np.random.randint(0, 256, 3).tolist())

    # Sort detections by score in descending order
    sorted_indices = np.argsort(-scores)

    for i in sorted_indices:
        if scores[i] > threshold:
            box = boxes[i]
            class_id = int(classes[i])
            score = scores[i]

            ymin, xmin, ymax, xmax = box
            im_height, im_width, _ = image_np.shape
            (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), 
                                          int(ymin * im_height), int(ymax * im_height))

            # Draw bounding box
            color = colors[class_id]
            cv2.rectangle(image_np, (left, top), (right, bottom), color, line_thickness)

            # Prepare label text
            #strip "rotation" from the label
            label_text = category_index[class_id]["name"]
            if label_text.find("rotation") != -1:
                label_text = label_text[:label_text.find("rotation")]

            label = f'{label_text}: {int(score * 100)}%'
            label_size, base_line = cv2.getTextSize(label, font, font_scale, font_thickness)
            top = max(top, label_size[1])

            # Determine label position (alternate between left and right)
            if i % 2 == 0:
                # Left side of the box
                label_left = left
            else:
                # Right side of the box
                label_left = right - label_size[0]

            # Draw label background and text
            cv2.rectangle(image_np, (label_left, top - label_size[1]), 
                          (label_left + label_size[0], top + base_line), color, cv2.FILLED)
            cv2.putText(image_np, label, (label_left, top), font, font_scale, (0, 0, 0), font_thickness)

            # Adjust the top position to avoid overlapping with the next label
            top -= (label_size[1] + base_line + 5)

    return image_np 
if __name__ == '__main__':   
    model_path = './inference_graph_resnet101/saved_model'
    label_map_path = './-------------/1_label_map.pbtxt'
    detection_model = load_model(model_path)
    category_index = label_map_util.create_category_index_from_labelmap(label_map_path, use_display_name=True)

    cap = cv2.VideoCapture(-1)
    #check camera and display the feed

    #print all available cameras
    for i in range(0, 10):
        cap = cv2.VideoCapture(i)
        if not cap.isOpened():
            print(f"Camera {i} is not available")
        else:
            print(f"Camera {i} is available")
            cap.release()
    run_inference(detection_model, category_index, cap)

相机测试结果：

[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 0 is not available
Camera 1 is available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video2): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 2 is not available
Camera 3 is available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video4): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 4 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video5): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 5 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video6): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 6 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video7): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 7 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video8): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 8 is not available
[ WARN:[email protected]] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video9): can't open camera by index
[ERROR:[email protected]] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
Camera 9 is not available

故障排除已完成：

尝试了不同的相机索引。
用一组图像进行测试，效果完美
检查模型路径等。

不知道下一步该去哪里......

Answer 1

看代码就发现逻辑有问题

for i in range(0, 10):  # looping from 0 to 10
    cap = cv2.VideoCapture(i)  # capture camera number i
    if not cap.isOpened():
        print(f"Camera {i} is not available")
    else:
        print(f"Camera {i} is available")
        cap.release()  # releasing capture!!!
run_inference(detection_model, category_index, cap)  # there is no capture assigned to cap!!!

相反：

for i in range(0, 10):
    cap = cv2.VideoCapture(i)
    if not cap.isOpened():
        print(f"Camera {i} is not available")
    else:
        print(f"Camera {i} is available")
        run_inference(detection_model, category_index, cap)  # first run
        cap.release()  # then release.

TypeError：将输入绑定到 tf.function 失败，无法将 input_tensor TensorSpec 转换为 TensorSpec

问题描述投票：0回答：1

1个回答

最新问题

TypeError：将输入绑定到 tf.function 失败，无法将 input_tensor TensorSpec 转换为 TensorSpec

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1