我编写了一个简单的 python 脚本,用于从图像返回文本。
import cv2
import pytesseract
import numpy
from PIL import Image
def getText(img): # accept PIL.Image
cvimg = numpy.array(img) # convert to openCV
greyscale = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY) # make greyscale
textFromImg = pytesseract.image_to_string(screenGR) # get text
return textFromImg # return text
我已经在此图像上测试了我的代码,但输出是一个空字符串。
为什么 tesseract 无法正确处理某些图像以及如何解决此问题?
图像质量和尺寸很重要。另请参阅页面分段 (psm) 可以做什么:
import cv2
import pytesseract
def getText(img):
# First step prepare image
image = cv2.imread(img, cv2.IMREAD_UNCHANGED)
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# B&W image is best
(thresh, blackAndWhiteImage) = cv2.threshold(grayImage, 170, 255, cv2.THRESH_BINARY)
# focus on text area
cropped_image = blackAndWhiteImage[59:96, 314:560] # img[y:y+h, x:x+w]
# resize the character if necessary
scale_percent = 100 # percent of original size
width = int(cropped_image.shape[1] * scale_percent / 100)
height = int(cropped_image.shape[0] * scale_percent / 100)
dim = (width, height)
resized = cv2.resize(cropped_image, dim, interpolation = cv2.INTER_AREA)
# Second ocr the text
custom_config = r'--psm 3 --oem 3 -l eng'
textFromImg = pytesseract.image_to_string(resized, config=custom_config)
# Show result of image transformation
cv2.imshow("Black & White", resized)
cv2.waitKey(1200)
cv2.destroyAllWindows()
return textFromImg
if __name__ == '__main__':
ocr_text = getText("sign.png")
print(ocr_text)
输出:
SPIKE PLANTED