我还不太理解所有 Python 方法(事实上……还很遥远)。
我正在使用网络摄像头来跟踪一个人。效果很好。
但是,它会随机选择视图中的某个人并跟踪他们,有时会跳到另一个人,我正在努力解决这个问题。
我不确定它如何决定跟踪谁,但我的想法是识别单个目标(带有边界框是理想的),然后可能要求它跟踪该边界框内的骨架。
我这里的代码基本上来自于谷歌搜索的代码和修补。难免会有错误!但它确实可以运行并工作。
寻找了很长时间的工作示例来尝试复制,但没有成功。
`import cv2
import mediapipe as mp # Controls the image/video processing
import serial # Serial for comms to the Arduino
import time # For delays etc
import platform # Details of the system we are running
import atexit # Clean method of exiting
import serial.tools.list_ports # Serial port information
import sys
import numpy as np # Processes numbers, strings and arrays
import keyboard # Allows use of the keyboard
#import pickle # Allows you to store data
print(platform.system(), platform.release())
print("Python version " + platform.python_version())
print("")
mp_drawing = mp.solutions.drawing_utils # initialize Pose estimator
mp_pose = mp.solutions.pose
#mp_drawing_styles = mp.solutions.drawing_styles
#mp_holistic = mp.solutions.holistic # Combines hands, pose and face meshes
#mp_face_mesh = mp.solutions.face_mesh # Much more detailed face tracking
pose = mp_pose.Pose(
min_detection_confidence=0.7, # If confidence if >0.5, then start tracking
min_tracking_confidence=0.7) # If below min tracking, then go back to detection
#static_image_mode = False # Whether to treat input images as static images or video stream - Default False
#model_complexit = 1 # Specify complexity of the pose landmark. 0, 1 or 2. Default = 1
#smooth_landmarks = True # Reduces jitter in the prediction. Default is True
nvt_flag = False
bowflag = False
curtseyflag = False
showvideo = True
lux = 0
heartbeat = 0
pip_x_offset=50 # Position of the PIP
pip_y_offset=50
font = cv2.FONT_HERSHEY_SIMPLEX
#--------------------- Arduino communications -----------------------
def findArduinoUnoPort(): # Check all the comm ports for an Arduino
portList = list(serial.tools.list_ports.comports())
for port in portList:
if "VID:PID=2341:0043" in port[0]\
or "VID:PID=2341:0043" in port[1]\
or "VID:PID=2341:0043" in port[2]:
print(port)
print(port[0])
print(port[1])
print(port[2])
return port[0]
def doAtExit():
if serialUno.isOpen():
serialUno.close()
print("Close serial")
print("serialUno.isOpen() = " + str(serialUno.isOpen()))
atexit.register(doAtExit)
unoPort = findArduinoUnoPort()
if not unoPort:
print("No Arduino found")
sys.exit("No Arduino found - Exiting system")
print("Arduino found: " + unoPort)
print()
serialArduino = serial.Serial(unoPort, 9600,timeout=0)
print("serialUno.isOpen() = " + str(serialArduino.isOpen()))
#------------------ Camera input selection -----------
header = cv2.imread("header.png") # Create the header page
cv2.putText(header, 'SELECT VIDEO SOURCE', (30,310), font, 1, (0, 255, 255), 2, cv2.LINE_4) # Text (frame, Text, pos, font, color, thickness)
cv2.putText(header, '1: Internal webcam', (30,360), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '2: External webcam', (30,410), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '3: Local video file', (30,460), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '4: External IP camera (no password)', (30,510), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '5: External IP camera (Password protected)', (30,560), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.imshow('Output', header)
cv2.waitKey(300)
while True:
if keyboard.is_pressed('1'):
cap = cv2.VideoCapture(0) # Laptop built-in webcam
vsource = 1
break;
elif keyboard.is_pressed('2'):
cap = cv2.VideoCapture(1, cv2.CAP_DSHOW) # External USB camera
vsource = 2
break;
elif keyboard.is_pressed('3'):
cap = cv2.VideoCapture('dance2.mp4') # Read local video file
vsource = 3
break;
elif keyboard.is_pressed('4'):
cap = cv2.VideoCapture('rtsp://192.168.1.64/1') # Capture from an IP camera
vsource = 4
break;
elif keyboard.is_pressed('5'):
cap = cv2.VideoCapture('rtsp://username:[email protected]/1') # Capture from an IP camera with password + username
vsource = 5
break;
cv2.waitKey(10)
cv2.putText(header, 'Source ' + str(vsource) +' selected', (30,610), font, 1, (255, 0, 255), 2, cv2.LINE_4) # Selected source
cv2.imshow('Output', header)
cv2.waitKey(300)
serialArduino.write(str.encode('4')) # Tell the Arduino we DO have a valid track (might get cancelled immediatley in the loop)
#------------------ Start the main camera routine -----------------------------------------------------------------------------------------------------------------------
while cap.isOpened(): # read frame from capture object
_, frame = cap.read()
width = 1280 # Resize the webcam feed
height = 720
dim = (width, height)
frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
#--------- Heartbeat Serial data to Arduino -----------------
heartbeat += 1
if heartbeat>50 and nvt_flag == False:
heartbeat = 0
serialArduino.write(str.encode('4')) # Tell the Arduino we DO have a valid track. (Pulse is checked on the Arduino)
#-------- Check the camera feed. Is there a target? ---------
try:
RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # convert the frame to RGB format
results = pose.process(RGB) # process the RGB frame to get the result
black = cv2.imread("black.png") # Create a blank page for the 'video off' feed
#print(results.pose_landmarks) # Print the co-ords of ALL the skeleton landmarks
if showvideo == True: # Show live video feed
mp_drawing.draw_landmarks( # draw detected skeleton on the frame (MP stands for mediapipe)
frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0,0,255), thickness=5, circle_radius=1), # Definition of the landmark circles (B G R)
mp_drawing.DrawingSpec(color=(255,255,255), thickness=2, circle_radius=1) # Definition of the joining lines (B G R)
)
else: # Draw black background
mp_drawing.draw_landmarks( # draw detected skeleton on the frame (MP stands for mediapipe)
black, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0,0,255), thickness=5, circle_radius=1), # Definition of the landmark circles (B G R)
mp_drawing.DrawingSpec(color=(255,255,255), thickness=2, circle_radius=1) # Definition of the joining lines (B G R)
)
for id, lm in enumerate(results.pose_landmarks.landmark): # Create a for loop to identify each skeleton joint
h, w, c = frame.shape
#print(id, lm) # Print ALL the skeleton joint position values
if (id == 0): # Identify the nose
cx0, cy0 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
nose = cy0
cv2.circle(frame, (cx0, cy0), 10, (255,0,0),3) # Larger round target on the nose
if showvideo == False:
cv2.circle(black, (cx0, cy0), 20, (255,0,0),3) # Larger round target on the nose
cv2.circle(black, (cx0, cy0), 40, (255,0,0),3) # Larger round target on the head
if (id == 11): # Identify the left shoulder
cx11, cy11 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
lshoulder = cy11
cv2.circle(frame, (cx11, cy11), 10, (0,255,0),3) # Larger round target on the L shoulder
cv2.circle(black, (cx11, cy11), 10, (0,255,0),1) # Larger round target on the L shoulder
if (id == 12): # Identify the right shoulder
cx12, cy12 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
rshoulder = cy12
cv2.circle(frame, (cx12, cy12), 10, (0,255,0),3) # Larger round target on the R shoulder
cv2.circle(black, (cx12, cy12), 10, (0,255,0),3) # Larger round target on the R shoulder
if (id == 25): # Identify the left knee
cx25, cy25 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
lknee = cy25
cv2.circle(frame, (cx25, cy25), 10, (0,255,255),3) # Larger round target on the left knee
cv2.circle(black, (cx25, cy25), 10, (0,255,255),3) # Larger round target on the left knee
if (id == 26): # Identify the right knee
cx26, cy26 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
rknee = cy26
cv2.circle(frame, (cx26, cy26), 10, (0,255,255),3) # Larger round target on the right knee
cv2.circle(black, (cx26, cy26), 10, (0,255,255),3) # Larger round target on the right knee
if (id == 23): # Identify the left hip
cx23, cy23 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
lhip= cy23
cv2.circle(frame, (cx23, cy23), 10, (0,255,255),3) # Larger round target on the left hip
cv2.circle(black, (cx23, cy23), 10, (0,255,255),3) # Larger round target on the left hip
if (id == 24): # Identify the right hip
cx24, cy24 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
rhip = cy24
cv2.circle(frame, (cx24, cy24), 10, (0,255,255),3) # Larger round target on the right hip
cv2.circle(black, (cx24, cy24), 10, (0,255,255),3) # Larger round target on the right hip
if (id == 31): # Identify the left foot
cx31, cy31 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
lfoot = cy31
cv2.circle(frame, (cx31, cy31), 10, (255,0,255),3) # Larger round target on the left foot
cv2.circle(black, (cx31, cy31), 10, (255,0,255),3) # Larger round target on the left foot
if (id == 32): # Identify the right foot
cx32, cy32 = int(lm.x * w), int(lm.y * h) # Calculate the actual pixel value
rfoot = cy32
cv2.circle(frame, (cx32, cy32), 10, (255,0,255),3) # Larger round target on the right foot
cv2.circle(black, (cx32, cy32), 10, (255,0,255),3) # Larger round target on the right foot
#---------------- Target distance & height --------------------
if (lfoot-nose)>(rfoot-nose): # Height in pixels calculation (foot to nose)
height = lfoot-nose
else:
height = rfoot-nose # CURRENTLY NOT USING THIS CALC
width = cx11-cx12 # Live reported width of the shoulders in pixels
focallength = (320 * 1000) / 280 # Work out the focal length (320 is between the shoulders in pixels) 1000mm is the inital setup distance
# 280mm is the measured width of the shoulders at 1000mm
livedistance = (280 * focallength) / width # Therefore, work out the live range to target
if showvideo == True:
cv2.putText(frame, 'Distance to target: ' + str(int(livedistance)) + ' mm', (20,70), font, 1, (0, 255, 0), 2, cv2.LINE_4)
else:
cv2.putText(black, 'Distance to target: ' + str(int(livedistance)) + ' mm', (20,70), font, 1, (0, 0 , 255), 2, cv2.LINE_4)
if showvideo == True:
frame[pip_y_offset:pip_y_offset+resize_img.shape[0], pip_x_offset:pip_x_offset+resize_img.shape[1]] = resize_img # Display the respect image PIP
cv2.putText(frame, 'Live video mode', (20,30), font, 1, (0, 255, 255), 2, cv2.LINE_4)
cv2.putText(frame, 'TEST MODE', (300,30), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.imshow('Output', frame)
if showvideo == False:
black[pip_y_offset:pip_y_offset+resize_img.shape[0], pip_x_offset:pip_x_offset+resize_img.shape[1]] = resize_img # Display the respect image PIP
cv2.putText(black, 'Masked video mode', (20,30), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(black, 'TEST MODE', (360,30), font, 1, (0, 255, 255), 2, cv2.LINE_4)
cv2.imshow('Output', black)
#------------ Toggle video mode ---------------------------------
if keyboard.is_pressed('v') and showvideo == True:
showvideo = False
while keyboard.is_pressed('v'):
cv2.waitKey(1)
if keyboard.is_pressed('v') and showvideo == False:
showvideo = True
while keyboard.is_pressed('v'):
cv2.waitKey(1)
if showvideo == True:
cv2.putText(frame, 'Live video mode', (20,30), font, 1, (0, 255, 0), 2, cv2.LINE_4) # Text (frame, Text, pos, font, color, thickness)
cv2.imshow('Output', frame) # Display the live video feed (window name, image)
else:
cv2.putText(black, 'Masked video mode', (20,30), font, 1, (0, 0, 255), 2, cv2.LINE_4) # Text (frame, Text, pos, font, color, thickness)
cv2.imshow('Output', black) # Display the blank background image
#------------ tracking online Y/N --------------
if nvt_flag == True: # Reset the no valid target flag
#serialArduino.write(str.encode('4')) # Tell the Arduino we DO have a valid track
print("Tracking online")
nvt_flag = False
except:
serialArduino.write(str.encode('5')) # Tell the Arduino we do NOT have a valid track
if nvt_flag == False:
print("No valid target")
nvt_image = cv2.imread("nvt.png") # Create the no valid target image
cv2.imshow('Output',nvt_image) # Display the no valid target image
nvt_flag = True # Set the tracking error message flag
#break # Disable the frame draw error break
if cv2.waitKey(1) == ord('q'):
print("Exit routine")
break
cap.release() # Closes the video window
cv2.destroyAllWindows()`
使用新的媒体管道:设置
num_poses > 1
。请参阅https://developers.google.com/mediapipe/solutions/vision/pose_landmarker/python