vision-ai-test/main4.py

70 lines
1.9 KiB
Python

import cv2
import onnxruntime as ort
import torch
import numpy as np
from torchvision import transforms
# Load YOLO model for person detection
yolo_model_path = 'yolo11m-2_uniform.onnx'
ort_session = ort.InferenceSession(yolo_model_path)
# Load YOLO pose estimation model
pose_model_path = 'yolo11s-pose.pt'
pose_model = torch.load(pose_model_path)
pose_model.eval()
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
video_path = 'PF-071124-2.mp4'
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error opening video file")
exit()
def get_person_bboxes(outputs):
# Implement YOLO output parsing here
return []
def check_sop_compliance(pose_output):
# Implement your SOP compliance logic here
return True
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
input_frame = cv2.resize(frame, (640, 640))
input_frame = np.transpose(input_frame, (2, 0, 1)).astype(np.float32) / 255.0
input_tensor = np.expand_dims(input_frame, 0)
yolo_outputs = ort_session.run(None, {'images': input_tensor})
persons = get_person_bboxes(yolo_outputs)
for bbox in persons:
x1, y1, x2, y2 = map(int, bbox)
roi = frame[y1:y2, x1:x2]
roi_resized = cv2.resize(roi, (256, 256))
roi_tensor = transform(roi_resized).unsqueeze(0)
with torch.no_grad():
pose_output = pose_model(roi_tensor)
compliant = check_sop_compliance(pose_output)
color = (0, 255, 0) if compliant else (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
label = 'Compliant' if compliant else 'Non-compliant'
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
cv2.imshow('SOP Compliance', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()