I am trying to retrieve the text from an image that is a matrix 4x4. The text are numbers. Although I was expecting the numbers all I got was: BE, 8, EEE, BE. The image is attached here: image
Anyone have an idea to solve this?
import cv2
import numpy as np
from PIL import Image
import pytesseract
def preprocess_image(image_path):
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
return binary
def find_cells(binary_image):
contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
bounding_boxes = [cv2.boundingRect(contour) for contour in contours]
bounding_boxes = sorted(bounding_boxes, key=lambda x: (x[1], x[0]))
return bounding_boxes
def extract_text_from_cells(image_path, bounding_boxes):
image = Image.open(image_path)
cell_texts = []
for box in bounding_boxes:
left, top, width, height = box
right, bottom = left + width, top + height
cropped_image = image.crop((left, top, right, bottom))
text = pytesseract.image_to_string(cropped_image, config='--psm 6')
cell_texts.append(text.strip())
return cell_texts
image_path = r'C:\Users\Sandro\Desktop\BINGO\screenshot.png'
binary_image = preprocess_image(image_path)
cells = find_cells(binary_image)
texts = extract_text_from_cells(image_path, cells)
for i, text in enumerate(texts):
print(f"Texto da célula {i+1}: {text}")
pytesseractusing the commandpip install pytesseract. Can it get results if use other images? Or try changing the--psm 6in thepytesseract.image_to_stringto 8.