My requirement: Need to extract license plates without duplicates and store images in a folder,then apply ocr to extract text from images.
What i have achieved: Iam able to detect license plates properly and with the help of tracking mechanism i was able to avoid dupicate plates.
Pending item: Unable to format image properly which is causing ocr unable to read text
I have created below code which will detect license plates from vehicles and store images in a folder and ocr them to get text in to a separate file,In my case image which is being stored in folder is not straightening/cropping properly after processing and even image is looking dull,Because of this ocr is not recognizing text properly.Please help me resolve this,once ocr recognizes text from image my work is done
from ultralytics import YOLO
import cv2
import torch
import pytesseract
import os
import numpy as np
from collections import defaultdict
class_counts = defaultdict(int)
processed_track_ids = set()
pytesseract.pytesseract.tesseract_cmd = 'C:\Program Files\Tesseract-OCR\tesseract.exe'
model = YOLO("license_plate_detector.pt")
video_path='video.MP4'
cap = cv2.Videocapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
delay = int(1000/fps)
output_dir = 'extracted_dir'
os.makedirs(output_dir,exist_ok=True)
def resize_frame(frame, size=(640,640)):
height,width = frame.shape[:2]
return cv2.resize(frame, size, interpolation=cv2.INTER_LINEAR)
license_plates = []
def straighten_crop_image(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.canny(gray,50,150)
lines = cv2.Houghlines(edges,1,np.pi/180,200)
if lines is not None:
for rho,theta in lines[:,0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
cv2.line(image,(x1,y1),(x2,y2),(0,0,255),2)
contours,_ = cv2.findContours(edges, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
if len(contours) == 0:
print("No contours found")
return image
license_plate_contours = contours[0]
x,y,w,h = cv2.boundingRect(license_plate_contour)
cropped_image = image[y:y+h-10, x:x+w]
rect = cv2.minAreaRect(license_plate_contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = int(rect[1][0])
height = int(rect[1][1])
if abs(width - height) > 10:
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],[0,0],[width-1,0],[width-1, height-1]],dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
straightened_image = cv2.warpPerspective(cropped_image,M,(width, height))
else:
straightened_image = cropped_image
if straightened_image.size == 0:
print("Straightened image is empty")
return image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
straightened_image = cv2.dilate(straightened_image,kernel,iterations=1)
straightened_image = cv2.convertScaleAbs(straightened_image, alpha=1.5,beta=50)
return straightened_image
def enchance_image(image):
straightened_image = straighten_crop_image(image)
gray = cv2.cvtColor(straightened_image, cv2.COLOR_BGR2GRAY)
return gray
while cap.isOpened():
ret,frame = cap.read()
if not ret:
break
frame_resized = resize_frame(frame)
frame_tensor = torch.tensor(frame_resized).permute(2,0,1).unsqueeze(0).float() / 255.0
results = model.track(frame_sensor,persist=True,conf=0.4,iou=0.4)
for result in results:
if result.boxes.id is not None:
for bbox,track_id in zip(result.boxes.xyxy,result.boxes.id):
bbox = bbox.int().tolist()
license_plate = frame_resized[bbox[1]:bbox[3], bbox[0]:bbox[2]]
enhanced_license_plate = enhance_image(license_plate)
if track_id not in processed_track_ids:
ocr_text = pytesseract.image_to_string(enhanced_license_plate)
print(f'OCR text: {ocr_text}')
processed_track_ids.add(track_id)
plate_filename = os.path.join(output_dir, f'track_id_{track_id}_plate.png')
cv2.imwrite(plate_filename, enhanced_license_plate)
ocr_filename = os.path.join(output_dir,f'track_{track_id}_plate.txt')
with open(ocr_filename, 'w') as f:
f.write(ocr_text)
print(f'Text saved to :{ocr_filename}')
cv2.rectangle(frame_resized, (bbox[0],bbox[1]),(bbox[2],bbox[3]), (0,255,0),2)
cv2.putText(frame_resized, 'License Plate',(bbox[0],bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,0.9,(0,255,0),2)
cv2.imshow('License plate detection', frame_resized)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
df = pd.DataFrame(license_plates, columns=['License Plate'])
df.to_excel('extracted_license_plates.xlsx'. index=False)

