Source code for libreyolo.v8.model

"""
Libre YOLO8 implementation.
"""

import io
import json
from datetime import datetime
from typing import Union, List, Optional, Tuple
from pathlib import Path
import torch
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2

from .nn import LibreYOLO8Model
from .utils import preprocess_image, postprocess, draw_boxes, make_anchors, decode_boxes, nms
from ..common.eigen_cam import compute_eigen_cam, overlay_heatmap
from ..common.cam import CAM_METHODS
from ..common.image_loader import ImageInput, ImageLoader
from ..common.utils import get_safe_stem, get_slice_bboxes


[docs] class LIBREYOLO8: """ Libre YOLO8 model for object detection. Args: model_path: Path to model weights file (required) size: Model size variant (required). Must be one of: "n", "s", "m", "l", "x" reg_max: Regression max value for DFL (default: 16) nb_classes: Number of classes (default: 80 for COCO) save_feature_maps: Feature map saving mode. Options: - False: Disabled (default) - True: Save all layers - List of layer names: Save only specified layers (e.g., ["backbone_p1", "neck_c2f21"]) save_eigen_cam: If True, saves EigenCAM heatmap visualizations on each inference (default: False) cam_method: CAM method for explain(). Options: "eigencam", "gradcam", "gradcam++", "xgradcam", "hirescam", "layercam", "eigengradcam" (default: "eigencam") cam_layer: Target layer for CAM computation (default: "neck_c2f22") device: Device for inference. "auto" (default) uses CUDA if available, else MPS, else CPU. Can also specify directly: "cuda", "cuda:0", "mps", "cpu". tiling: Enable tiling for processing large/high-resolution images (default: False). When enabled, large images are automatically split into overlapping 640x640 tiles, inference is run on each tile, and results are merged using NMS. Example: >>> model = LIBREYOLO8(model_path="path/to/weights.pt", size="x", save_feature_maps=True) >>> detections = model(image=image_path, save=True) >>> # Use explain() for XAI heatmaps >>> heatmap = model.explain("image.jpg", method="gradcam") """
[docs] def __init__( self, model_path: Union[str, dict], size: str, reg_max: int = 16, nb_classes: int = 80, save_feature_maps: Union[bool, List[str]] = False, save_eigen_cam: bool = False, cam_method: str = "eigencam", cam_layer: Optional[str] = None, device: str = "auto", tiling: bool = False ): """ Initialize the Libre YOLO8 model. Args: model_path: Path to user-provided model weights file or loaded state dict size: Model size variant. Must be "n", "s", "m", "l", or "x" reg_max: Regression max value for DFL (default: 16) nb_classes: Number of classes (default: 80) save_feature_maps: Feature map saving mode. Options: - False: Disabled - True: Save all layers - List[str]: Save only specified layer names save_eigen_cam: If True, saves EigenCAM heatmap visualizations cam_method: Default CAM method for explain() (default: "eigencam") cam_layer: Target layer for CAM computation (default: "neck_c2f22") device: Device for inference ("auto", "cuda", "mps", "cpu") tiling: Enable tiling for large images (default: False). When enabled, images larger than 640x640 are split into overlapping tiles for inference. """ self.tiling = tiling if size not in ['n', 's', 'm', 'l', 'x']: raise ValueError(f"Invalid size: {size}. Must be one of: 'n', 's', 'm', 'l', 'x'") # Resolve device if device == "auto": if torch.cuda.is_available(): self.device = torch.device("cuda") elif torch.backends.mps.is_available(): self.device = torch.device("mps") else: self.device = torch.device("cpu") else: self.device = torch.device(device) self.size = size self.reg_max = reg_max self.nb_classes = nb_classes self.save_feature_maps = save_feature_maps self.save_eigen_cam = save_eigen_cam self.cam_method = cam_method.lower() self.feature_maps = {} self.hooks = [] self._eigen_cam_layer = cam_layer or "neck_c2f22" # Default layer for EigenCAM/CAM # Initialize model self.model = LibreYOLO8Model(config=size, reg_max=reg_max, nb_classes=nb_classes) # Load weights if isinstance(model_path, dict): self.model_path = None self.model.load_state_dict(model_path, strict=True) else: self.model_path = model_path self._load_weights(model_path) # Set to evaluation mode and move to device self.model.eval() self.model.to(self.device) # Register hooks for feature map extraction if self.save_feature_maps or self.save_eigen_cam: self._register_hooks()
def _load_weights(self, model_path: str): """Load model weights from file.""" if not Path(model_path).exists(): raise FileNotFoundError(f"Model weights file not found: {model_path}") try: state_dict = torch.load(model_path, map_location='cpu', weights_only=False) self.model.load_state_dict(state_dict, strict=True) except Exception as e: raise RuntimeError(f"Failed to load model weights from {model_path}: {e}") from e
[docs] def get_available_layer_names(self) -> List[str]: """ Get list of available layer names for feature map saving. Returns: List of layer names that can be used with save_feature_maps parameter. """ return sorted(self._get_available_layers().keys())
def _get_available_layers(self) -> dict: """Get mapping of layer names to module objects.""" return { # Backbone layers "backbone_p1": self.model.backbone.p1, "backbone_p2": self.model.backbone.p2, "backbone_c2f1": self.model.backbone.c2f1, "backbone_p3": self.model.backbone.p3, "backbone_c2f2_P3": self.model.backbone.c2f2, "backbone_p4": self.model.backbone.p4, "backbone_c2f3_P4": self.model.backbone.c2f3, "backbone_p5": self.model.backbone.p5, "backbone_c2f4": self.model.backbone.c2f4, "backbone_sppf_P5": self.model.backbone.sppf, # Neck layers "neck_c2f21": self.model.neck.c2f21, "neck_c2f11": self.model.neck.c2f11, "neck_c2f12": self.model.neck.c2f12, "neck_c2f22": self.model.neck.c2f22, # Head layers "head8_conv11": self.model.head8.conv11, "head8_conv21": self.model.head8.conv21, "head16_conv11": self.model.head16.conv11, "head16_conv21": self.model.head16.conv21, "head32_conv11": self.model.head32.conv11, "head32_conv21": self.model.head32.conv21, } def _register_hooks(self): """Register forward hooks to capture feature maps from model layers.""" def get_hook(name): def hook(module, input, output): # Detach and move to CPU to prevent memory leaks self.feature_maps[name] = output.detach().cpu() return hook available_layers = self._get_available_layers() layers_to_hook = set() if self.save_feature_maps is True: # Hook into all available layers layers_to_hook.update(available_layers.keys()) elif isinstance(self.save_feature_maps, list): # Hook into specified layers only invalid_layers = [l for l in self.save_feature_maps if l not in available_layers] if invalid_layers: available = ", ".join(sorted(available_layers.keys())) raise ValueError( f"Invalid layer names: {invalid_layers}. " f"Available layers: {available}" ) layers_to_hook.update(self.save_feature_maps) # Add EigenCAM layer if enabled if self.save_eigen_cam: layers_to_hook.add(self._eigen_cam_layer) # Register hooks for all required layers for layer_name in layers_to_hook: module = available_layers[layer_name] self.hooks.append(module.register_forward_hook(get_hook(layer_name))) def _save_feature_maps(self, image_path): """Save feature map visualizations to disk.""" # Determine the base name for the output directory if isinstance(image_path, str): stem = get_safe_stem(image_path) else: stem = "inference" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = Path("runs/feature_maps") / f"{stem}_{timestamp}" save_dir.mkdir(parents=True, exist_ok=True) # Save metadata metadata = { "model": "LIBREYOLO8", "size": self.size, "input_size": [640, 640], "image_source": str(image_path) if isinstance(image_path, str) else "PIL/numpy input", "layers_captured": list(self.feature_maps.keys()) } with open(save_dir / "metadata.json", "w") as f: json.dump(metadata, f, indent=2) # Save feature map visualizations for layer_name, fmap in self.feature_maps.items(): # fmap shape: (batch, channels, H, W) - take first batch item fmap = fmap[0] if fmap.dim() == 4 else fmap # Create a 4x4 grid of the first 16 channels channels = min(fmap.shape[0], 16) fig, axes = plt.subplots(4, 4, figsize=(12, 12)) for i in range(16): ax = axes[i // 4, i % 4] if i < channels: # Normalize the feature map for better visualization channel_data = fmap[i].numpy() ax.imshow(channel_data, cmap='viridis') ax.axis('off') plt.suptitle(f"Feature Maps: {layer_name}\nShape: {list(fmap.shape)}", fontsize=14) plt.tight_layout() plt.savefig(save_dir / f"{layer_name}.png", bbox_inches='tight', dpi=100) plt.close() # Clear feature maps after saving (only if not using eigen_cam) if not self.save_eigen_cam: self.feature_maps.clear() return str(save_dir) def _save_eigen_cam(self, image_path, original_img: Image.Image): """Save EigenCAM heatmap visualizations to disk.""" # Get the activation from the target layer if self._eigen_cam_layer not in self.feature_maps: return None activation = self.feature_maps[self._eigen_cam_layer] # activation shape: (batch, channels, H, W) - take first batch item activation = activation[0].numpy() if activation.dim() == 4 else activation.numpy() # Compute EigenCAM heatmap heatmap = compute_eigen_cam(activation) # Determine the base name for the output directory if isinstance(image_path, str): stem = get_safe_stem(image_path) else: stem = "inference" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = Path("runs/eigen_cam") / f"{stem}_{timestamp}" save_dir.mkdir(parents=True, exist_ok=True) # Convert PIL Image to numpy array for overlay img_array = np.array(original_img) # Save heatmap overlay overlay = overlay_heatmap(img_array, heatmap, alpha=0.5) Image.fromarray(overlay).save(save_dir / "heatmap_overlay.jpg") heatmap_resized = cv2.resize(heatmap, (img_array.shape[1], img_array.shape[0])) heatmap_gray = (heatmap_resized * 255).astype(np.uint8) Image.fromarray(heatmap_gray).save(save_dir / "heatmap_grayscale.png") # Save metadata metadata = { "model": "LIBREYOLO8", "size": self.size, "target_layer": self._eigen_cam_layer, "image_source": str(image_path) if isinstance(image_path, str) else "PIL/numpy input" } with open(save_dir / "metadata.json", "w") as f: json.dump(metadata, f, indent=2) # Clear feature maps after saving self.feature_maps.clear() return str(save_dir)
[docs] def __call__( self, image: ImageInput, save: bool = False, output_path: str = None, conf_thres: float = 0.25, iou_thres: float = 0.45, color_format: str = "auto", batch_size: int = 1 ) -> Union[dict, List[dict]]: """ Run inference on an image or directory of images. Args: image: Input image or directory. Supported types: - str: Local file path, directory path, or URL (http/https/s3/gs) - pathlib.Path: Local file path or directory path - PIL.Image: PIL Image object - np.ndarray: NumPy array (HWC or CHW, RGB or BGR) - torch.Tensor: PyTorch tensor (CHW or NCHW) - bytes: Raw image bytes - io.BytesIO: BytesIO object containing image data save: If True, saves the image with detections drawn. Defaults to False. output_path: Optional path to save the annotated image. If not provided, saves to 'runs/detections/' with a timestamped name. conf_thres: Confidence threshold (default: 0.25) iou_thres: IoU threshold for NMS (default: 0.45) color_format: Color format hint for NumPy/OpenCV arrays. - "auto": Auto-detect (default) - "rgb": Input is RGB format - "bgr": Input is BGR format (e.g., OpenCV) batch_size: Number of images to process per batch when handling multiple images (e.g., directories). Currently used for chunking at the Python level; true batched model inference is planned for future versions. Default: 1 (process one image at a time). Returns: For single image: Dictionary containing detection results with keys: - boxes: List of bounding boxes in xyxy format - scores: List of confidence scores - classes: List of class IDs - num_detections: Number of detections - source: Source image path (if available) - saved_path: Path to saved image (if save=True) For directory: List of dictionaries, one per image processed. """ # Check if input is a directory if isinstance(image, (str, Path)) and Path(image).is_dir(): image_paths = ImageLoader.collect_images(image) if not image_paths: return [] return self._process_in_batches( image_paths, batch_size=batch_size, save=save, output_path=output_path, conf_thres=conf_thres, iou_thres=iou_thres, color_format=color_format ) # Use tiled inference for large images when tiling is enabled if self.tiling: return self._predict_tiled(image, save, output_path, conf_thres, iou_thres, color_format) return self._predict_single(image, save, output_path, conf_thres, iou_thres, color_format)
def _process_in_batches( self, image_paths: List[Path], batch_size: int = 1, save: bool = False, output_path: str = None, conf_thres: float = 0.25, iou_thres: float = 0.45, color_format: str = "auto" ) -> List[dict]: """ Process multiple images, respecting batch_size for chunking. This method provides the scaffolding for batch processing. Currently, it processes images sequentially within each batch chunk. Future versions will implement true batched model inference for improved throughput. Args: image_paths: List of image paths to process. batch_size: Number of images per batch chunk. save: If True, saves annotated images. output_path: Optional output path for saved images. conf_thres: Confidence threshold. iou_thres: IoU threshold for NMS. color_format: Color format hint. Returns: List of detection dictionaries, one per image. """ results = [] for i in range(0, len(image_paths), batch_size): chunk = image_paths[i:i + batch_size] # TODO: Implement _predict_batch() for true batched model inference # For now, process images sequentially within each chunk for path in chunk: results.append( self._predict_single(path, save, output_path, conf_thres, iou_thres, color_format) ) return results def _predict_single( self, image: ImageInput, save: bool = False, output_path: str = None, conf_thres: float = 0.25, iou_thres: float = 0.45, color_format: str = "auto" ) -> dict: """ Run inference on a single image. This is the internal implementation for single-image inference. Use __call__ for the public API which also supports directories. """ # Store original image path for saving image_path = image if isinstance(image, (str, Path)) else None # Preprocess image input_tensor, original_img, original_size = preprocess_image(image, input_size=640, color_format=color_format) # Run inference with torch.no_grad(): output = self.model(input_tensor.to(self.device)) # Postprocess detections = postprocess( output, conf_thres=conf_thres, iou_thres=iou_thres, input_size=640, original_size=original_size ) # Add source path for traceability detections["source"] = str(image_path) if image_path else None # Save feature maps if enabled if self.save_feature_maps: feature_maps_path = self._save_feature_maps(image_path) detections["feature_maps_path"] = feature_maps_path # Save EigenCAM heatmap if enabled if self.save_eigen_cam: eigen_cam_path = self._save_eigen_cam(image_path, original_img) detections["eigen_cam_path"] = eigen_cam_path # Draw and save if requested if save: if detections["num_detections"] > 0: annotated_img = draw_boxes( original_img, detections["boxes"], detections["scores"], detections["classes"] ) else: annotated_img = original_img if output_path: final_output_path = Path(output_path) if final_output_path.suffix == "": # If directory, create it and use default naming final_output_path.mkdir(parents=True, exist_ok=True) if isinstance(image_path, (str, Path)): stem = get_safe_stem(image_path) ext = Path(image_path).suffix else: stem = "inference" ext = ".jpg" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") final_output_path = final_output_path / f"{stem}_{timestamp}{ext}" else: # If file path, ensure parent directory exists final_output_path.parent.mkdir(parents=True, exist_ok=True) else: # Determine save directory (matching feature map style) if isinstance(image_path, (str, Path)): stem = get_safe_stem(image_path) ext = Path(image_path).suffix else: stem = "inference" ext = ".jpg" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = Path("runs/detections") save_dir.mkdir(parents=True, exist_ok=True) final_output_path = save_dir / f"{stem}_{timestamp}{ext}" annotated_img.save(final_output_path) detections["saved_path"] = str(final_output_path) return detections def _merge_tile_detections( self, boxes: List, scores: List, classes: List, iou_thres: float ) -> Tuple[List, List, List]: """ Merge detections from tiles using class-wise NMS. Args: boxes: List of boxes in xyxy format from all tiles. scores: List of confidence scores from all tiles. classes: List of class IDs from all tiles. iou_thres: IoU threshold for NMS. Returns: Tuple of (final_boxes, final_scores, final_classes) after merging. """ if not boxes: return [], [], [] boxes_t = torch.tensor(boxes, dtype=torch.float32, device=self.device) scores_t = torch.tensor(scores, dtype=torch.float32, device=self.device) classes_t = torch.tensor(classes, dtype=torch.int64, device=self.device) final_boxes, final_scores, final_classes = [], [], [] for cls_id in torch.unique(classes_t): mask = classes_t == cls_id cls_boxes = boxes_t[mask] cls_scores = scores_t[mask] keep = nms(cls_boxes, cls_scores, iou_thres) final_boxes.extend(cls_boxes[keep].cpu().tolist()) final_scores.extend(cls_scores[keep].cpu().tolist()) final_classes.extend([cls_id.item()] * len(keep)) return final_boxes, final_scores, final_classes def _predict_tiled( self, image: ImageInput, save: bool = False, output_path: str = None, conf_thres: float = 0.25, iou_thres: float = 0.45, color_format: str = "auto" ) -> dict: """ Run tiled inference on large images. Splits the image into overlapping 640x640 tiles, runs inference on each, shifts detections back to original coordinates, and merges with NMS. Args: image: Input image (path, PIL Image, numpy array, etc.) save: If True, saves the annotated image. output_path: Optional path to save the annotated image. conf_thres: Confidence threshold. iou_thres: IoU threshold for NMS. color_format: Color format hint for numpy arrays. Returns: Dictionary with detection results including tiling metadata. """ # Load full image img_pil = ImageLoader.load(image, color_format=color_format) orig_width, orig_height = img_pil.size image_path = image if isinstance(image, (str, Path)) else None # Skip tiling if image is already small enough if orig_width <= 640 and orig_height <= 640: return self._predict_single(image, save, output_path, conf_thres, iou_thres, color_format) # Get tile coordinates slices = get_slice_bboxes(orig_width, orig_height) # Collect all detections from tiles all_boxes, all_scores, all_classes = [], [], [] for x1, y1, x2, y2 in slices: # Crop tile from image tile = img_pil.crop((x1, y1, x2, y2)) # Run inference on tile (without saving) result = self._predict_single(tile, save=False, conf_thres=conf_thres, iou_thres=iou_thres) # Shift boxes back to original image coordinates for box in result["boxes"]: shifted_box = [box[0] + x1, box[1] + y1, box[2] + x1, box[3] + y1] all_boxes.append(shifted_box) all_scores.extend(result["scores"]) all_classes.extend(result["classes"]) # Merge detections from all tiles using class-wise NMS final_boxes, final_scores, final_classes = self._merge_tile_detections( all_boxes, all_scores, all_classes, iou_thres ) detections = { "boxes": final_boxes, "scores": final_scores, "classes": final_classes, "num_detections": len(final_boxes), "source": str(image_path) if image_path else None, "tiled": True, "num_tiles": len(slices) } # Draw and save if requested if save: if detections["num_detections"] > 0: annotated_img = draw_boxes( img_pil, detections["boxes"], detections["scores"], detections["classes"] ) else: annotated_img = img_pil if output_path: final_output_path = Path(output_path) if final_output_path.suffix == "": final_output_path.mkdir(parents=True, exist_ok=True) if isinstance(image_path, (str, Path)): stem = get_safe_stem(image_path) ext = Path(image_path).suffix else: stem = "inference" ext = ".jpg" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") final_output_path = final_output_path / f"{stem}_{timestamp}{ext}" else: final_output_path.parent.mkdir(parents=True, exist_ok=True) else: if isinstance(image_path, (str, Path)): stem = get_safe_stem(image_path) ext = Path(image_path).suffix else: stem = "inference" ext = ".jpg" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = Path("runs/detections") save_dir.mkdir(parents=True, exist_ok=True) final_output_path = save_dir / f"{stem}_{timestamp}{ext}" annotated_img.save(final_output_path) detections["saved_path"] = str(final_output_path) return detections
[docs] def export(self, output_path: str = None, input_size: int = 640, opset: int = 12) -> str: """ Export the model to ONNX format. Args: output_path: Path to save the ONNX file. If None, uses the model's weights path with .onnx extension. input_size: The image size to export for (default: 640). opset: ONNX opset version (default: 12). Returns: Path to the exported ONNX file. """ import inspect import torch.onnx # Torch's exporter requires the `onnx` package in the environment. # Use a spec check (instead of importing) so optional deps don't trigger # static-analysis import errors. import importlib.util if importlib.util.find_spec("onnx") is None: raise ImportError( "ONNX export requires the optional ONNX dependencies. " "Install them with `uv sync --extra onnx` (recommended) or " "`pip install -e '.[onnx]'`." ) if output_path is None: if self.model_path and isinstance(self.model_path, str): output_path = str(Path(self.model_path).with_suffix('.onnx')) else: output_path = f"libreyolo8{self.size}.onnx" print(f"Exporting LibreYOLO8 {self.size} to {output_path}...") # 1. Create a dummy input (Batch, Channels, Height, Width) device = next(self.model.parameters()).device dummy_input = torch.randn(1, 3, input_size, input_size).to(device) # 2. Define a wrapper that decodes boxes for end-to-end inference class ONNXWrapper(torch.nn.Module): def __init__(self, model): super().__init__() self.model = model def forward(self, x): output = self.model(x) # Collect outputs from the 3 heads box_layers = [output['x8']['box'], output['x16']['box'], output['x32']['box']] cls_layers = [output['x8']['cls'], output['x16']['cls'], output['x32']['cls']] strides = [8, 16, 32] # Generate anchors (Traceable) anchors, stride_tensor = make_anchors(box_layers, strides) # Flatten and concatenate predictions # Box: (Batch, 4, H, W) -> (Batch, N, 4) box_preds = torch.cat([x.flatten(2).permute(0, 2, 1) for x in box_layers], dim=1) # Cls: (Batch, 80, H, W) -> (Batch, N, 80) cls_preds = torch.cat([x.flatten(2).permute(0, 2, 1) for x in cls_layers], dim=1) # Decode boxes to xyxy (Batch, N, 4) decoded_boxes = decode_boxes(box_preds, anchors, stride_tensor) # Apply sigmoid to class scores cls_scores = cls_preds.sigmoid() # Return concatenated [boxes, scores]: (Batch, N, 84) return torch.cat([decoded_boxes, cls_scores], dim=-1) wrapper = ONNXWrapper(self.model) wrapper.eval() # 3. Perform the export try: # Newer PyTorch versions may default to the "dynamo" ONNX exporter, which # pulls in extra deps like `onnxscript`. Prefer the legacy exporter by # explicitly setting `dynamo=False` when the argument exists. export_kwargs = {} try: if "dynamo" in inspect.signature(torch.onnx.export).parameters: export_kwargs["dynamo"] = False except Exception: # If signature introspection fails for any reason, just proceed. pass torch.onnx.export( wrapper, dummy_input, output_path, export_params=True, opset_version=opset, do_constant_folding=True, input_names=['images'], output_names=['output'], dynamic_axes={ 'images': {0: 'batch', 2: 'height', 3: 'width'}, 'output': {0: 'batch'} }, **export_kwargs, ) print(f"Export complete: {output_path}") return output_path except Exception as e: print(f"Export failed: {e}") raise e
[docs] def predict( self, image: ImageInput, save: bool = False, output_path: str = None, conf_thres: float = 0.25, iou_thres: float = 0.45, color_format: str = "auto", batch_size: int = 1 ) -> Union[dict, List[dict]]: """ Alias for __call__ method. Args: image: Input image or directory. Supported types: - str: Local file path, directory path, or URL (http/https/s3/gs) - pathlib.Path: Local file path or directory path - PIL.Image: PIL Image object - np.ndarray: NumPy array (HWC or CHW, RGB or BGR) - torch.Tensor: PyTorch tensor (CHW or NCHW) - bytes: Raw image bytes - io.BytesIO: BytesIO object containing image data save: If True, saves the image with detections drawn. Defaults to False. output_path: Optional path to save the annotated image. conf_thres: Confidence threshold (default: 0.25) iou_thres: IoU threshold for NMS (default: 0.45) color_format: Color format hint for NumPy/OpenCV arrays ("auto", "rgb", "bgr") batch_size: Number of images to process per batch when handling multiple images (e.g., directories). Default: 1. Returns: For single image: Dictionary containing detection results. For directory: List of dictionaries, one per image processed. """ return self(image=image, save=save, output_path=output_path, conf_thres=conf_thres, iou_thres=iou_thres, color_format=color_format, batch_size=batch_size)
[docs] def explain( self, image: ImageInput, method: Optional[str] = None, target_layer: Optional[str] = None, eigen_smooth: bool = False, save: bool = False, output_path: Optional[str] = None, alpha: float = 0.5, color_format: str = "auto" ) -> dict: """ Generate explainability heatmap for the given image using CAM methods. This method provides visual explanations of what the model focuses on when making predictions. It supports multiple CAM (Class Activation Mapping) techniques including gradient-based and gradient-free methods. Args: image: Input image. Supported types: - str: Local file path or URL (http/https/s3/gs) - pathlib.Path: Local file path - PIL.Image: PIL Image object - np.ndarray: NumPy array (HWC or CHW, RGB or BGR) - torch.Tensor: PyTorch tensor (CHW or NCHW) - bytes: Raw image bytes - io.BytesIO: BytesIO object containing image data method: CAM method to use. Options: - "eigencam": Gradient-free, SVD-based (default) - "gradcam": Gradient-weighted class activation - "gradcam++": Improved GradCAM with second-order gradients - "xgradcam": Axiom-based GradCAM - "hirescam": High-resolution CAM - "layercam": Layer-wise CAM - "eigengradcam": Eigen-based gradient CAM target_layer: Layer name for CAM computation. Use get_available_layer_names() to see options. Defaults to "neck_c2f22". eigen_smooth: Apply SVD smoothing to the heatmap (default: False). save: If True, saves the heatmap visualization to disk. output_path: Optional path to save the visualization. alpha: Blending factor for overlay (default: 0.5). color_format: Color format hint for NumPy/OpenCV arrays ("auto", "rgb", "bgr"). Returns: Dictionary containing: - heatmap: Grayscale heatmap array of shape (H, W) with values in [0, 1] - overlay: RGB overlay image as numpy array - original_image: Original image as PIL Image - method: CAM method used - target_layer: Target layer used - saved_path: Path to saved visualization (if save=True) Example: >>> model = LIBREYOLO8("yolo8n.pt", size="n") >>> result = model.explain("image.jpg", method="gradcam", save=True) >>> heatmap = result["heatmap"] >>> overlay = result["overlay"] """ if not 0.0 <= alpha <= 1.0: raise ValueError(f"alpha must be between 0 and 1, got {alpha}") method = (method or self.cam_method).lower() target_layer = target_layer or self._eigen_cam_layer if method not in CAM_METHODS: available = ", ".join(CAM_METHODS.keys()) raise ValueError(f"Unknown CAM method '{method}'. Available: {available}") # Validate layer available_layers = self._get_available_layers() if target_layer not in available_layers: available = ", ".join(sorted(available_layers.keys())) raise ValueError(f"Unknown layer '{target_layer}'. Available: {available}") # Preprocess image input_tensor, original_img, original_size = preprocess_image(image, input_size=640, color_format=color_format) # Get target layer module target_module = available_layers[target_layer] # Create CAM instance cam_class = CAM_METHODS[method] cam = cam_class( model=self.model, target_layers=[target_module], reshape_transform=None ) try: # Compute CAM grayscale_cam = cam(input_tensor.to(self.device), eigen_smooth=eigen_smooth) # Get the first batch item heatmap = grayscale_cam[0] # Resize heatmap to original image size heatmap_resized = cv2.resize(heatmap, (original_size[0], original_size[1])) # Normalize to [0, 1] heatmap_min, heatmap_max = heatmap_resized.min(), heatmap_resized.max() if heatmap_max - heatmap_min > 1e-8: heatmap_resized = (heatmap_resized - heatmap_min) / (heatmap_max - heatmap_min) # Create overlay img_array = np.array(original_img) overlay = overlay_heatmap(img_array, heatmap_resized, alpha=alpha) result = { "heatmap": heatmap_resized, "overlay": overlay, "original_image": original_img, "method": method, "target_layer": target_layer, } # Save if requested if save: image_path = image if isinstance(image, str) else None if isinstance(image_path, str): stem = get_safe_stem(image_path) else: stem = "inference" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if output_path: save_dir = Path(output_path) if save_dir.suffix: save_dir = save_dir.parent else: save_dir = Path(f"runs/{method}") / f"{stem}_{timestamp}" save_dir.mkdir(parents=True, exist_ok=True) # Save overlay Image.fromarray(overlay).save(save_dir / "heatmap_overlay.jpg") # Save grayscale heatmap heatmap_gray = (heatmap_resized * 255).astype(np.uint8) Image.fromarray(heatmap_gray).save(save_dir / "heatmap_grayscale.png") # Save metadata metadata = { "model": "LIBREYOLO8", "size": self.size, "method": method, "target_layer": target_layer, "eigen_smooth": eigen_smooth, "image_source": str(image) if isinstance(image, str) else "PIL/numpy input" } with open(save_dir / "metadata.json", "w") as f: json.dump(metadata, f, indent=2) result["saved_path"] = str(save_dir) return result finally: # Clean up CAM resources cam.release()
[docs] @staticmethod def get_available_cam_methods() -> List[str]: """ Get list of available CAM methods. Returns: List of CAM method names that can be used with explain(). """ return list(CAM_METHODS.keys())