Source code for libreyolo.common.cam.gradcam

"""
GradCAM implementation for YOLO model interpretability.

GradCAM (Gradient-weighted Class Activation Mapping) uses the gradients
flowing into the target layer to produce a coarse localization map highlighting
important regions in the image for predicting the concept.

Reference: https://arxiv.org/abs/1610.02391
"""

from typing import List, Optional, Callable
import numpy as np
import torch
import torch.nn as nn

from .base import BaseCAM


[docs] class GradCAM(BaseCAM): """ GradCAM: Gradient-weighted Class Activation Mapping. Weights the 2D activations by the average gradient to produce class-discriminative localization maps. Reference: Selvaraju, R. R., et al. (2017). Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization. arXiv:1610.02391 """
[docs] def __init__( self, model: nn.Module, target_layers: List[nn.Module], reshape_transform: Optional[Callable] = None ) -> None: """ Initialize GradCAM. Args: model: The neural network model. target_layers: List of target layers for CAM computation. reshape_transform: Optional transform for activation shapes. """ super().__init__( model, target_layers, reshape_transform, uses_gradients=True # GradCAM requires gradients )
[docs] def get_cam_weights( self, input_tensor: torch.Tensor, target_layer: nn.Module, targets: Optional[List], activations: np.ndarray, grads: np.ndarray ) -> np.ndarray: """ Compute GradCAM weights by global average pooling the gradients. The weight for each channel is the mean gradient value across the spatial dimensions (H, W). Args: input_tensor: The input image tensor. target_layer: The layer being processed. targets: Optional target specifications. activations: The layer activations of shape (B, C, H, W). grads: The gradients of shape (B, C, H, W). Returns: Weights array of shape (B, C). """ if grads is None: # Fallback if no gradients available return np.ones((activations.shape[0], activations.shape[1]), dtype=np.float32) # Global average pooling of gradients: (B, C, H, W) -> (B, C) return np.mean(grads, axis=(2, 3))