Source code for libreyolo.common.cam.eigen_cam

"""
EigenCAM implementation for YOLO model interpretability.

EigenCAM computes the first principal component of 2D activations using SVD,
producing class-agnostic saliency maps without requiring backpropagation.

Reference: https://arxiv.org/abs/2008.00299
"""

from typing import List, Optional, Callable
import numpy as np
import torch
import torch.nn as nn

from .base import BaseCAM


[docs] class EigenCAM(BaseCAM): """ EigenCAM: Class Activation Map using Principal Components. This is a gradient-free method that uses SVD to find the first principal component of the 2D activations. It produces class-agnostic saliency maps that highlight generally important regions. Reference: Muhammad, M. B., & Yeasin, M. (2020). Eigen-CAM: Class Activation Map using Principal Components. arXiv:2008.00299 """
[docs] def __init__( self, model: nn.Module, target_layers: List[nn.Module], reshape_transform: Optional[Callable] = None ) -> None: """ Initialize EigenCAM. Args: model: The neural network model. target_layers: List of target layers for CAM computation. reshape_transform: Optional transform for activation shapes. """ super().__init__( model, target_layers, reshape_transform, uses_gradients=False # EigenCAM doesn't need gradients )
[docs] def get_cam_weights( self, input_tensor: torch.Tensor, target_layer: nn.Module, targets: Optional[List], activations: np.ndarray, grads: Optional[np.ndarray] ) -> np.ndarray: """ EigenCAM doesn't use weights - it directly computes SVD projection. This method returns ones since the actual computation happens in get_cam_image which is overridden. """ # Return uniform weights - actual computation in get_cam_image return np.ones((activations.shape[0], activations.shape[1]), dtype=np.float32)
[docs] def get_cam_image( self, input_tensor: torch.Tensor, target_layer: nn.Module, targets: Optional[List], activations: np.ndarray, grads: Optional[np.ndarray], eigen_smooth: bool = False ) -> np.ndarray: """ Compute EigenCAM using SVD on activations. Args: input_tensor: The input image tensor. target_layer: The layer being processed. targets: Ignored for EigenCAM. activations: The layer activations of shape (B, C, H, W). grads: Ignored for EigenCAM. eigen_smooth: Ignored (always uses eigen method). Returns: CAM array of shape (B, H, W). """ return self._get_2d_projection(activations)
# Standalone functions for backward compatibility with existing code def compute_eigen_cam(activations: np.ndarray) -> np.ndarray: """ Compute EigenCAM heatmap from layer activations using SVD. This is a standalone function for backward compatibility. Args: activations: Feature map tensor of shape (C, H, W) where C is channels. Returns: Normalized heatmap of shape (H, W) with values in [0, 1]. """ # Handle NaN and Inf values activations = np.nan_to_num(activations, nan=0.0, posinf=0.0, neginf=0.0) # Reshape: (C, H, W) -> (H*W, C) reshaped = activations.reshape(activations.shape[0], -1).T.astype(np.float64) # Center the data (important for SVD) reshaped = reshaped - reshaped.mean(axis=0) # Compute SVD and project onto first principal component try: with np.errstate(divide='ignore', over='ignore', invalid='ignore'): _, S, VT = np.linalg.svd(reshaped, full_matrices=False) # Check if singular values are valid if len(S) == 0 or S[0] < 1e-10: return np.zeros(activations.shape[1:], dtype=np.float32) projection = reshaped @ VT[0] # Sign correction heuristic: # SVD sign is arbitrary. Ensure the projection is positively correlated # with the mean activation to avoid inverted heatmaps. if np.corrcoef(projection, reshaped.mean(axis=1))[0, 1] < 0: projection = -projection except (np.linalg.LinAlgError, ValueError): return np.zeros(activations.shape[1:], dtype=np.float32) # Reshape back to spatial dimensions heatmap = projection.reshape(activations.shape[1:]) # Handle any NaN/Inf from computation heatmap = np.nan_to_num(heatmap, nan=0.0, posinf=0.0, neginf=0.0) # ReLU: keep only positive activations heatmap = np.maximum(heatmap, 0) # Normalize to [0, 1] heatmap_min, heatmap_max = heatmap.min(), heatmap.max() if heatmap_max - heatmap_min > 1e-8: heatmap = (heatmap - heatmap_min) / (heatmap_max - heatmap_min) else: heatmap = np.zeros_like(heatmap) return heatmap.astype(np.float32)