当我使用 PyTesseract 识别该图像中的文本时,它返回“FORREST C. BLopGetTrT”而不是 FORREST C. BLODGETT 我得到的代码结果
我怀疑这些字母彼此太接近。我尝试增加图像的大小,但效果不佳。大小写(大写或小写)在这里并不重要。有什么办法可以解决这个问题并获得正确的名称吗?
我的代码
from pytesseract import image_to_string
import pytesseract
from PIL import Image
import cv2
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
img = cv2.imread("textimg.png")
(h, w) = img.shape[:2]
img = cv2.resize(img, (w*20, h*20))
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.threshold(gry, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
txt = image_to_string(thr, lang="eng")
print(txt)
我尝试增加图像的尺寸,但效果不佳。
使用膨胀和腐蚀等形态变换来细化文本边缘,这有助于分离太靠近的字母。
from pytesseract import image_to_string
import pytesseract
from PIL import Image
import cv2
import numpy as np
#Specify the path to tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-
OCR\tesseract.exe'
#Load image and convert to grayscale
img = cv2.imread("textimg.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#Apply sharpening filter
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
gry = cv2.filter2D(gry, -1, kernel)
#Thresholding
thr = cv2.threshold(gry, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
#Apply dilation to enhance letter separation
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
thr = cv2.dilate(thr, kernel, iterations=1)
#Tesseract configuration
custom_config = r'--oem 3 --psm 7'
txt = image_to_string(thr, config=custom_config, lang="eng")
print(txt)