Source code for aepsych.models.inducing_points.kmeans

from typing import Optional

import torch
from aepsych.models.inducing_points.base import BaseAllocator
from scipy.cluster.vq import kmeans2


[docs]class KMeansAllocator(BaseAllocator): """An inducing point allocator that uses k-means++ to allocate inducing points."""
[docs] def allocate_inducing_points( self, inputs: Optional[torch.Tensor] = None, covar_module: Optional[torch.nn.Module] = None, num_inducing: int = 100, input_batch_shape: torch.Size = torch.Size([]), ) -> torch.Tensor: """ Generates `num_inducing` inducing points using k-means++ initialization on the input data. Args: inputs (torch.Tensor): A tensor of shape (n, d) containing the input data. covar_module (torch.nn.Module, optional): Kernel covariance module; included for API compatibility, but not used here. num_inducing (int, optional): The number of inducing points to generate. Defaults to 100. input_batch_shape (torch.Size, optional): Batch shape, defaults to an empty size; included for API compatibility, but not used here. Returns: torch.Tensor: A (num_inducing, d)-dimensional tensor of inducing points selected via k-means++. """ if inputs is None: # Dummy points return self._allocate_dummy_points(num_inducing=num_inducing) if inputs.shape[1] != self.dim: # The inputs were augmented somehow, assuming it was added to the end of dims inputs = inputs[:, : self.dim, ...] self.last_allocator_used = self.__class__ # Ensure inputs are unique to avoid duplication issues with k-means++ unique_inputs = torch.unique(inputs, dim=0) # If unique inputs are less than or equal to the required inducing points, return them directly if unique_inputs.shape[0] <= num_inducing: return unique_inputs # Run k-means++ on the unique inputs to select inducing points inducing_points = torch.tensor( kmeans2(unique_inputs.cpu().numpy(), num_inducing, minit="++")[0] ) return inducing_points