Source code for libreyolo.common.cam.gradcampp

"""
GradCAM++ implementation for YOLO model interpretability.

GradCAM++ uses second-order gradients to compute weighted importance,
providing better localization especially for multiple instances of the
same class in an image.

Reference: https://arxiv.org/abs/1710.11063
"""

from typing import List, Optional, Callable
import numpy as np
import torch
import torch.nn as nn

from .base import BaseCAM


[docs] class GradCAMPlusPlus(BaseCAM): """ GradCAM++: Improved Gradient-weighted Class Activation Mapping. Uses second-order gradients (squared gradients) for better weighting, particularly effective when multiple instances of the same class appear in the image. Reference: Chattopadhyay, A., et al. (2018). Grad-CAM++: Improved Visual Explanations for Deep Convolutional Networks. arXiv:1710.11063 """
[docs] def __init__( self, model: nn.Module, target_layers: List[nn.Module], reshape_transform: Optional[Callable] = None ) -> None: """ Initialize GradCAM++. Args: model: The neural network model. target_layers: List of target layers for CAM computation. reshape_transform: Optional transform for activation shapes. """ super().__init__( model, target_layers, reshape_transform, uses_gradients=True # GradCAM++ requires gradients )
[docs] def get_cam_weights( self, input_tensor: torch.Tensor, target_layer: nn.Module, targets: Optional[List], activations: np.ndarray, grads: np.ndarray ) -> np.ndarray: """ Compute GradCAM++ weights using second-order gradient information. GradCAM++ formula: alpha_kc = grad^2 / (2*grad^2 + sum(A * grad^3)) weights = sum(alpha * ReLU(grad)) Args: input_tensor: The input image tensor. target_layer: The layer being processed. targets: Optional target specifications. activations: The layer activations of shape (B, C, H, W). grads: The gradients of shape (B, C, H, W). Returns: Weights array of shape (B, C). """ if grads is None: return np.ones((activations.shape[0], activations.shape[1]), dtype=np.float32) # Second-order gradients grads_2 = grads ** 2 grads_3 = grads ** 3 # Sum of (activations * grad^3) over spatial dimensions # Shape: (B, C) sum_activations_grads = np.sum(activations * grads_3, axis=(2, 3)) # Compute alpha (pixel-wise importance weights) # alpha = grad^2 / (2*grad^2 + sum(A*grad^3) + eps) eps = 1e-7 # We need to keep spatial dimensions for alpha # Shape after sum_activations_grads: (B, C) # We need to broadcast it: (B, C, 1, 1) sum_term = sum_activations_grads[:, :, np.newaxis, np.newaxis] # Alpha shape: (B, C, H, W) alpha = grads_2 / (2 * grads_2 + sum_term * grads + eps) # Handle NaN/Inf alpha = np.nan_to_num(alpha, nan=0.0, posinf=0.0, neginf=0.0) # ReLU on gradients positive_grads = np.maximum(grads, 0) # Weighted sum: alpha * ReLU(grad), then sum over spatial dims # Shape: (B, C) weights = np.sum(alpha * positive_grads, axis=(2, 3)) return weights