object_detection.py
import datetime from ultralytics import YOLO import cv2 from helper import create_video_writer # define some constants CONFIDENCE_THRESHOLD = 0.8 GREEN = (0, 255, 0) # initialize the video capture object video_cap = cv2.VideoCapture("2.mp4") # initialize the video writer object writer = create_video_writer(video_cap, "output.mp4") # load the pre-trained YOLOv8n model model = YOLO("yolov8n.pt") while True: # start time to compute the fps start = datetime.datetime.now() ret, frame = video_cap.read() # if there are no more frames to process, break out of the loop if not ret: break # run the YOLO model on the frame detections = model(frame)[0] # loop over the detections for data in detections.boxes.data.tolist(): # extract the confidence (i.e., probability) associated with the detection confidence = data[4] # filter out weak detections by ensuring the # confidence is greater than the minimum confidence if float(confidence) < CONFIDENCE_THRESHOLD: continue # if the confidence is greater than the minimum confidence, # draw the bounding box on the frame xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3]) cv2.rectangle(frame, (xmin, ymin) , (xmax, ymax), GREEN, 2) # end time to compute the fps end = datetime.datetime.now() # show the time it took to process 1 frame total = (end - start).total_seconds() print(f"Time to process 1 frame: {total * 1000:.0f} milliseconds") # calculate the frame per second and draw it on the frame fps = f"FPS: {1 / total:.2f}" cv2.putText(frame, fps, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 8) # show the frame to our screen cv2.imshow("Frame", frame) writer.write(frame) if cv2.waitKey(1) == ord("q"): break video_cap.release() writer.release() cv2.destroyAllWindows()
object_detection_tracking.py
import datetime from ultralytics import YOLO import cv2 from helper import create_video_writer from deep_sort_realtime.deepsort_tracker import DeepSort CONFIDENCE_THRESHOLD = 0.8 GREEN = (0, 255, 0) WHITE = (255, 255, 255) # initialize the video capture object video_cap = cv2.VideoCapture("2.mp4") # initialize the video writer object writer = create_video_writer(video_cap, "output.mp4") # load the pre-trained YOLOv8n model model = YOLO("yolov8n.pt") tracker = DeepSort(max_age=50) while True: start = datetime.datetime.now() ret, frame = video_cap.read() if not ret: break # run the YOLO model on the frame detections = model(frame)[0] # initialize the list of bounding boxes and confidences results = [] ###################################### # DETECTION ###################################### # loop over the detections for data in detections.boxes.data.tolist(): # extract the confidence (i.e., probability) associated with the prediction confidence = data[4] # filter out weak detections by ensuring the # confidence is greater than the minimum confidence if float(confidence) < CONFIDENCE_THRESHOLD: continue # if the confidence is greater than the minimum confidence, # get the bounding box and the class id xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3]) class_id = int(data[5]) # add the bounding box (x, y, w, h), confidence and class id to the results list results.append([[xmin, ymin, xmax - xmin, ymax - ymin], confidence, class_id]) ###################################### # TRACKING ###################################### # update the tracker with the new detections tracks = tracker.update_tracks(results, frame=frame) # loop over the tracks for track in tracks: # if the track is not confirmed, ignore it if not track.is_confirmed(): continue # get the track id and the bounding box track_id = track.track_id ltrb = track.to_ltrb() xmin, ymin, xmax, ymax = int(ltrb[0]), int( ltrb[1]), int(ltrb[2]), int(ltrb[3]) # draw the bounding box and the track id cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), GREEN, 2) cv2.rectangle(frame, (xmin, ymin - 20), (xmin + 20, ymin), GREEN, -1) cv2.putText(frame, str(track_id), (xmin + 5, ymin - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.5, WHITE, 2) # end time to compute the fps end = datetime.datetime.now() # show the time it took to process 1 frame print(f"Time to process 1 frame: {(end - start).total_seconds() * 1000:.0f} milliseconds") # calculate the frame per second and draw it on the frame fps = f"FPS: {1 / (end - start).total_seconds():.2f}" cv2.putText(frame, fps, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 8) # show the frame to our screen cv2.imshow("Frame", frame) writer.write(frame) if cv2.waitKey(1) == ord("q"): break video_cap.release() writer.release() cv2.destroyAllWindows()