Need support to straighten,crop image properly for requirement in computer vision

Question

My requirement: Need to extract license plates without duplicates and store images in a folder,then apply ocr to extract text from images.

What i have achieved: Iam able to detect license plates properly and with the help of tracking mechanism i was able to avoid dupicate plates.

Pending item: Unable to format image properly which is causing ocr unable to read text

I have created below code which will detect license plates from vehicles and store images in a folder and ocr them to get text in to a separate file,In my case image which is being stored in folder is not straightening/cropping properly after processing and even image is looking dull,Because of this ocr is not recognizing text properly.Please help me resolve this,once ocr recognizes text from image my work is done

from ultralytics import YOLO
import cv2
import torch
import pytesseract
import os
import numpy as np
from collections import defaultdict
class_counts = defaultdict(int)
processed_track_ids = set()
pytesseract.pytesseract.tesseract_cmd = 'C:\Program Files\Tesseract-OCR\tesseract.exe'
model = YOLO("license_plate_detector.pt")
video_path='video.MP4'
cap = cv2.Videocapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
delay = int(1000/fps)
output_dir = 'extracted_dir'
os.makedirs(output_dir,exist_ok=True)
def resize_frame(frame, size=(640,640)):
    height,width = frame.shape[:2]
    return cv2.resize(frame, size, interpolation=cv2.INTER_LINEAR)
license_plates = []
def straighten_crop_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.canny(gray,50,150)
    lines = cv2.Houghlines(edges,1,np.pi/180,200)
    if lines is not None:
        for rho,theta in lines[:,0]:
            a = np.cos(theta)
            b = np.sin(theta)
            x0 = a * rho
            y0 = b * rho
            x1 = int(x0 + 1000 * (-b))
            y1 = int(y0 + 1000 * (a))
            x2 = int(x0 - 1000 * (-b))
            y2 = int(y0 - 1000 * (a))
            cv2.line(image,(x1,y1),(x2,y2),(0,0,255),2)
        contours,_ = cv2.findContours(edges, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)
        if len(contours) == 0:
            print("No contours found")
            return image
       license_plate_contours = contours[0]
       x,y,w,h = cv2.boundingRect(license_plate_contour)
       cropped_image = image[y:y+h-10, x:x+w]
       rect = cv2.minAreaRect(license_plate_contour)
       box = cv2.boxPoints(rect)
       box = np.int0(box)
       width = int(rect[1][0])
       height = int(rect[1][1])
       if abs(width - height) > 10:
          src_pts = box.astype("float32")
          dst_pts = np.array([[0, height-1],[0,0],[width-1,0],[width-1, height-1]],dtype="float32")
          M = cv2.getPerspectiveTransform(src_pts, dst_pts)
          straightened_image = cv2.warpPerspective(cropped_image,M,(width, height))
   else:
          straightened_image = cropped_image
   if straightened_image.size == 0:
        print("Straightened image is empty")
        return image
   kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
   straightened_image = cv2.dilate(straightened_image,kernel,iterations=1)
   straightened_image = cv2.convertScaleAbs(straightened_image, alpha=1.5,beta=50)
   return straightened_image
def enchance_image(image):
    straightened_image = straighten_crop_image(image)
    gray = cv2.cvtColor(straightened_image, cv2.COLOR_BGR2GRAY)
    return gray
while cap.isOpened():
    ret,frame = cap.read()
    if not ret:
    break
frame_resized = resize_frame(frame)
frame_tensor = torch.tensor(frame_resized).permute(2,0,1).unsqueeze(0).float() / 255.0
results = model.track(frame_sensor,persist=True,conf=0.4,iou=0.4)

for result in results:
    if result.boxes.id is not None:        
       for bbox,track_id in zip(result.boxes.xyxy,result.boxes.id):           
        bbox = bbox.int().tolist()
        license_plate = frame_resized[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        enhanced_license_plate = enhance_image(license_plate)
        if track_id not in processed_track_ids:
            ocr_text = pytesseract.image_to_string(enhanced_license_plate)
            print(f'OCR text: {ocr_text}')
            processed_track_ids.add(track_id)
            plate_filename = os.path.join(output_dir, f'track_id_{track_id}_plate.png')
            cv2.imwrite(plate_filename, enhanced_license_plate)
            ocr_filename = os.path.join(output_dir,f'track_{track_id}_plate.txt')
            with open(ocr_filename, 'w') as f:
                f.write(ocr_text)
            print(f'Text saved to :{ocr_filename}')

        cv2.rectangle(frame_resized, (bbox[0],bbox[1]),(bbox[2],bbox[3]), (0,255,0),2)
        cv2.putText(frame_resized, 'License Plate',(bbox[0],bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,0.9,(0,255,0),2)

cv2.imshow('License plate detection', frame_resized)
if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
df = pd.DataFrame(license_plates, columns=['License Plate'])
df.to_excel('extracted_license_plates.xlsx'. index=False)

score 0 · Accepted Answer · answered Jun 02 '25 at 15:57

from ultralytics import YOLO
import cv2
import torch
import paddleocr
import os
import numpy as np
import math


model = YOLO("license_plate_detector.pt")
video_path='video.MP4'
cap = cv2.Videocapture(video_path)
with open("c:\\coco1.txt","r") as f: #coco1.txt should contain numberplate
    class_names = f.read().splitlines()

output_dir = 'extracted_dir'
os.makedirs(output_dir,exist_ok=True)

def perform_ocr(image_array):
    if image_array is None:
         raise ValueError("Image is none")
    results = ocr.ocr(image_array, rec=True)
    detected_text = []
    if results[0] is not None:
        for result in results[0]:
            text = result[1][0]
            detected_text.append(text)
   return ''.join(detected_text)


license_plates = []

def rotate_image(image, angle):
    if abs(angle) < 1e-6 or angle in [90, 80, 100]:
        return image
    h, w = image.shape[:2]
    new_w = int(w *abs(np.cos(np.radians(angle))) + h * abs(np.sin(np.radians(angle))))
    new_h = int(h * abs(np.cos(np.radians(angle))) + w * abs(np.sin(np.radians(angle))))
    canvas = np.zeros((new_h,new_w,3), dtype=np.uint8)
    canvas_center = (new_w // 2, new_h // 2)
    rot_mat = cv2.getRotationMatrix2D((w// 2, h // 2), angle, 1.0)
    rot_mat[0, 2] += (new_w - w) / 2
    rot_mat[1, 2] += (new_h - h) / 2
    result = cv2.warpAffine(image, rot_mat, (new_w,new_h),flags=cv2.INTER_LINEAR)
    return result
    def compute_skew(src_img):
        if len(src_img.shape) == 3:
            h, w, _ = src_img.shape
        elif len(src_img.shape) == 2:
            h, w = src_img.shape
        else:
            print('unsupported image type')
            return 0.0
        img = cv2.medianBlur(src_img, 3)
        edges = cv2.canny(img, threshold1=20, threshold2=100, apertureSize=3, L2gradient=True)
    lines - cv2.HoughLinesP(edges, 1,math.pi/180,30,minLineLength=w / 4.0, maxLineGap=h / 4.0)
    angle = 0.0
    cnt = 0
    if lines is not None:
        for line in lines:
            for x1,y1,x2,y2 in line:
                ang = np.arctan2(y2 - y1, x2 - x1)
                if math.fabs(ang) <= math.radians(30):
                     angle += ang
                     cnt += 1
     if cnt == 0:
         return 0.0
     return (angle / cnt) * 180 / math.pi

def deskew(src_img):
    return rotate_image(src_img, compute_skew(src_img))

def RGB(event,x,y,flags,param):
    if event == cv2.EVENT_MOUSEMOVE:
        point = [x, y]
        print(point)
cv2.namedWindow('RGB')
cv2.setMouseCallback('RGB',RGB)

count = 0
area = [(540,522),(28,462),(3,507),(553,599)]
counter = []
save_folder = "sav_images"
df = pd.DataFrame(columns=["Date", "License Plate"])



while cap.isOpened():
    ret,frame = cap.read()
    if not ret:
    break

    frame = cv2.resize(frame, (640,640)
    frame_tensor = torch.tensor(frame_resized).permute(2,0,1).unsqueeze(0).float() / 255.0
    results = model.track(frame_sensor,persist=True,imgsz=240)

    for result in results:
        if results[0].boxes.id is not None and results[0].boxes.id is not None: 
            boxes = results[0].boxes.xyxy.int().cpu().tolist()
            class_ids = results[0].boxes.xyxy.int().cpu().tolist()
            track_ids = results[0].boxes.id.int().cpu().tolist()
            confidences = results[0].boxes.conf.cpu().tolist()       
           for bbox,class_id,track_id .conf in zip(boxes,class_ids,track_ids,confidences):           
            c = class_names[class_id]
            x1,y1,x2,y2 = box
            height, width = frame.shape[:2]
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(width, x2)
            y2 = min(height, y2)
            cx=int(x1+x2)//2
            cy=int(y1+y2)//2
            result = cv2.pointPolygonTest(np.array(area,np.int32),((cx, cy)), False)
            if result >= 0:
                if track_id not in counter:
                    counter.apppend(track_id)
                    crop = frame[y1:y2, x1:x2]
                    crop = cv2.resize(crop,(120,100))

                ocr_text = pytesseract.image_to_string(enhanced_license_plate)
                print(f'OCR text: {ocr_text}')
                processed_track_ids.add(track_id)
                plate_filename = os.path.join(output_dir, f'track_id_{track_id}_plate.png')
                cv2.imwrite(plate_filename,crop)
                text = perform_ocr(crop)
                print(text)
                text = text.replace('(','').replace('),'').replace(',','').replace(']','').replace('-',' ')
                df.loc[len(df)] = [datetime.now().strftime("%Y=%m-%d %H:%M:%S"), text]
    mycounter = len(counter)
    cvzone.putTextRect(frame,f'{mycounter}',(50,60),1,1)
    cv2.polylines(frames,[np.array(area, np.int32)], True, (255,0,0),2)           
    cv2.imshow('RGB', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


df.to_excel('extracted_license_plates.xlsx'. index=False)

Need support to straighten,crop image properly for requirement in computer vision

1 Answers1