# ----------------------------------------------------------- # file: run_inference.py # ----------------------------------------------------------- import yaml import torch import cv2 import numpy as np from pathlib import Path
model: name: sam_samantha version: 5 backbone: vit_h image_size: 1024 num_classes: 1 # Usually segmentation → binary mask preprocess: normalize: true mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] device: cuda Below is a minimal, self‑contained script that loads the model and runs a single inference on a video frame.
# ------------------------------------------------------------------ # 4️⃣ Pre‑process a single frame (example uses OpenCV) # ------------------------------------------------------------------ def preprocess(img: np.ndarray, cfg) -> torch.Tensor: # Resize while keeping aspect ratio (optional) target_sz = cfg["model"]["image_size"] img_resized = cv2.resize(img, (target_sz, target_sz))
