import cv2 import onnxruntime as ort import torch import numpy as np from torchvision import transforms # Load YOLO model for person detection yolo_model_path = 'yolo11m-2_uniform.onnx' ort_session = ort.InferenceSession(yolo_model_path) # Load YOLO pose estimation model pose_model_path = 'yolo11s-pose.pt' pose_model = torch.load(pose_model_path) pose_model.eval() transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) video_path = 'PF-071124-2.mp4' cap = cv2.VideoCapture(video_path) if not cap.isOpened(): print("Error opening video file") exit() def get_person_bboxes(outputs): # Implement YOLO output parsing here return [] def check_sop_compliance(pose_output): # Implement your SOP compliance logic here return True while cap.isOpened(): ret, frame = cap.read() if not ret: break input_frame = cv2.resize(frame, (640, 640)) input_frame = np.transpose(input_frame, (2, 0, 1)).astype(np.float32) / 255.0 input_tensor = np.expand_dims(input_frame, 0) yolo_outputs = ort_session.run(None, {'images': input_tensor}) persons = get_person_bboxes(yolo_outputs) for bbox in persons: x1, y1, x2, y2 = map(int, bbox) roi = frame[y1:y2, x1:x2] roi_resized = cv2.resize(roi, (256, 256)) roi_tensor = transform(roi_resized).unsqueeze(0) with torch.no_grad(): pose_output = pose_model(roi_tensor) compliant = check_sop_compliance(pose_output) color = (0, 255, 0) if compliant else (0, 0, 255) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) label = 'Compliant' if compliant else 'Non-compliant' cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2) cv2.imshow('SOP Compliance', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()