from ultralytics import YOLO
import cv2
import os
import numpy as np

# Load the object detection model
det_model = YOLO("yolo11m-2_uniform.onnx")  # General object detection model

# Load the pose estimation model
pose_model = YOLO("yolo11s-pose.pt")  # Pose estimation model

video_path = "PF-071124-2.mp4"
cap = cv2.VideoCapture(video_path)

ret = True
frame_count = 0  # Initialize a frame counter
save_dir = "/results/"  # Directory to save the crops
os.makedirs(save_dir, exist_ok=True)  # Create the directory if it doesn't exist

while ret:
    ret, frame = cap.read()
    frame_count += 1  # Increment the frame counter
    frame2_, frame_ = np.empty(2)
    if ret:
        # Run object detection
        det_results = det_model.predict(frame, conf=0.5)

        # Filter detections for persons (class 0)
        person_detections = [det for det in det_results if det.names[0] == 'crew']

        #for i, det in enumerate(person_detections[0].boxes.xyxy):
            # Extract bounding box coordinates
        #    x1, y1, x2, y2 = map(int, det[:4])
        #    crop = frame[y1:y2, x1:x2] 
        
        # Run pose estimation on detected persons
        for i, person in person_detections:
            #how to return the tensor to posemodel???
            x1, y1, x2, y2 = map(int, person[i].boxes.xyxy[:4])
            person_image = frame[y1:y2, x1:x2]  # Crop the person from the image
            
            pose_results = pose_model(person_image)
            frame_ = pose_results[0].plot()
        cv2.imshow("frame", frame_)
            
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()