Source code for twocan.utils

"""
Utility functions and preprocessing classes for Twocan image registration.

This module provides preprocessing classes for different imaging modalities,
utility functions for data manipulation, and helper functions for working with
SpatialData objects and transformation matrices.
"""

from typing import List, Optional, Tuple, Union
import re
import numpy as np
import cv2
#from skimage import transform
from skimage.filters import gaussian
from scipy.stats.mstats import winsorize
import numpy as np
from typing import List
import spatialdata as sd
from spatialdata.transformations import get_transformation, set_transformation
from spatialdata.transformations.transformations import BaseTransformation, Sequence
import pandas as pd


[docs] def stretch_255(image: np.ndarray) -> np.ndarray: """Convert an image to 8-bit grayscale by stretching its range to [0, 255]. This function linearly stretches the intensity values of an image to span the full 8-bit range [0, 255]. If the input image has zero maximum value, the original image is returned unchanged. Parameters ---------- image : np.ndarray Input image array of any shape and dtype. Returns ------- np.ndarray 8-bit grayscale image with values in [0, 255] and dtype uint8. Returns original image unchanged if max value is 0. """ if image.max() == 0: return image return (image * (255/image.max())).astype('uint8')
[docs] def prep_zarr(IF_arr: np.ndarray, IMC_arr: np.ndarray, IF_panel: List[str], IMC_panel: List[str]) -> sd.SpatialData: """Create a SpatialData object from IF and IMC arrays with their channel panels. This function takes raw image arrays and their corresponding channel names to create a properly formatted SpatialData object containing both modalities. The function handles both 2D and 3D input arrays, automatically adding a channel dimension if needed. Parameters ---------- IF_arr : np.ndarray Immunofluorescence array of shape (H, W) or (C, H, W). If 2D, will be expanded to (1, H, W). IMC_arr : np.ndarray Imaging mass cytometry array of shape (H, W) or (C, H, W). If 2D, will be expanded to (1, H, W). IF_panel : List[str] List of channel names for IF data. Must match the number of channels in IF_arr. IMC_panel : List[str] List of channel names for IMC data. Must match the number of channels in IMC_arr. Returns ------- sd.SpatialData SpatialData object containing both modalities with proper channel information and coordinate systems. """ # prep zarr if IF_arr.ndim == 2: IF_arr = IF_arr[None, :, :] if IMC_arr.ndim == 2: IMC_arr = IMC_arr[None, :, :] IF = sd.models.Image2DModel.parse(data=IF_arr, c_coords=IF_panel) IMC = sd.models.Image2DModel.parse(data=IMC_arr, c_coords=IMC_panel) return sd.SpatialData({'IF': IF, 'IMC': IMC})
[docs] def get_aligned_coordinates( moving_element: sd.models.SpatialElement, reference_element: sd.models.SpatialElement, transformation: BaseTransformation, reference_coordinate_system: str = "global", moving_coordinate_system: str = "global", new_coordinate_system: str = 'aligned', write_to_sdata: sd.SpatialData = None, ) -> None: """Apply a transformation to align two spatial elements in a new coordinate system. This function applies a spatial transformation to align a moving element with a reference element, creating a new coordinate system that contains both elements in aligned space. The transformation is applied on top of any existing transformations. Parameters ---------- moving_element : sd.models.SpatialElement The spatial element to be transformed (e.g., image, points, shapes). reference_element : sd.models.SpatialElement The reference element that defines the target space. transformation : BaseTransformation The transformation to apply to the moving element. reference_coordinate_system : str, default="global" Name of the coordinate system for the reference element. moving_coordinate_system : str, default="global" Name of the coordinate system for the moving element. new_coordinate_system : str, default='aligned' Name of the new coordinate system after alignment. write_to_sdata : sd.SpatialData, optional If provided, write the transformation to this SpatialData object. Raises ------ AssertionError If the existing transformations are not BaseTransformation instances. """ old_moving_transformation = get_transformation(moving_element, moving_coordinate_system) old_reference_transformation = get_transformation(reference_element, reference_coordinate_system) assert isinstance(old_moving_transformation, BaseTransformation) assert isinstance(old_reference_transformation, BaseTransformation) # new_moving_transformation = Sequence([old_moving_transformation, transformation]) new_reference_transformation = old_reference_transformation # set_transformation(moving_element, new_moving_transformation, new_coordinate_system, write_to_sdata=write_to_sdata)
[docs] def read_M(M: str) -> np.ndarray: """Parse a string representation of an affine transformation matrix. This function parses string representations of 2x3 affine transformation matrices, handling various formatting inconsistencies by normalizing whitespace and bracket placement. Useful when reading from csv. Parameters ---------- M : str String representation of a 2x3 affine transformation matrix. Can contain various whitespace and formatting variations. Returns ------- np.ndarray 2x3 affine transformation matrix of shape (2, 3). Notes ----- This function uses eval() which can be unsafe with untrusted input. It should only be used with trusted transformation matrix strings. """ mstring = re.sub(r"\s+", ",", M) mstring = re.sub(r"\[\[,", "[[", mstring) mstring = re.sub(r"\],\[,", "],[", mstring) m = eval('np.array('+ mstring +')') return m
[docs] def multi_channel_corr(source: np.ndarray, target: np.ndarray) -> np.ndarray: """Calculate correlation coefficients between all pairs of channels from two images. This function computes the Pearson correlation coefficient between every channel in the source image and every channel in the target image. The images are first normalized (z-scored) channel-wise before correlation calculation. Parameters ---------- source : np.ndarray Source image array of shape (n_channels_1, n_pixels). Each row represents a flattened channel. target : np.ndarray Target image array of shape (n_channels_2, n_pixels). Each row represents a flattened channel. Returns ------- np.ndarray Correlation matrix of shape (n_channels_1, n_channels_2). Element (i, j) contains the correlation between source channel i and target channel j. Notes ----- The function assumes the input arrays have the same number of pixels (same second dimension). Channels are normalized to have zero mean and unit standard deviation before correlation calculation. """ if (source.shape[1] == 0) or (target.shape[1] == 0): return np.nan if any(source.std(axis=1) == 0) or any(target.std(axis=1) == 0): return np.nan # Normalize the data source = (source - source.mean(axis=1)[:, None]) / source.std(axis=1)[:, None] target = (target - target.mean(axis=1)[:, None]) / target.std(axis=1)[:, None] # Calculate correlation matrix using matrix multiplication return np.dot(source, target.T) / source.shape[1]
[docs] class IFProcessor: """Preprocessing pipeline for Immunofluorescence (IF) images. This class provides a standardized preprocessing pipeline for IF images including channel summation, normalization, Gaussian blurring, and optional binarization. The processor can be configured with trial parameters from Optuna optimization. Parameters ---------- binarize : bool, default=True Whether to apply binarization after preprocessing. binarization_threshold : float, default=0.1 Threshold value for binarization (0-1 range after normalization). sigma : float, default=1 Standard deviation for Gaussian blur kernel. Attributes ---------- binarize : bool Whether binarization is enabled. binarization_threshold : float Current binarization threshold. sigma : float Current Gaussian blur sigma value. """ def __init__(self, binarize=True, binarization_threshold=0.1, sigma=1): self.binarize = binarize self.binarization_threshold = binarization_threshold self.sigma = sigma
[docs] def configure(self, trial_params): """Configure processor parameters from Optuna trial parameters. This method updates the processor parameters based on values suggested by an Optuna trial. It looks for specific parameter names in the trial params dictionary and updates the corresponding attributes. Parameters ---------- trial_params : dict Dictionary of trial parameters from Optuna optimization. Expected keys: 'IF_binarization_threshold', 'IF_gaussian_sigma', 'binarize_images'. Returns ------- self : IFProcessor Returns self for method chaining. """ if trial_params: # Extract IF-specific parameters from trial params if "IF_binarization_threshold" in trial_params: self.binarization_threshold = trial_params["IF_binarization_threshold"] if "IF_gaussian_sigma" in trial_params: self.sigma = trial_params["IF_gaussian_sigma"] if "binarize_images" in trial_params: self.binarize = trial_params["binarize_images"] return self
def __call__(self, source_image): """Apply the preprocessing pipeline to an IF image. The preprocessing pipeline consists of: 1. Sum all channels to create a single composite image 2. Normalize to [0, 1] range by dividing by maximum value 3. Apply Gaussian blur with specified sigma 4. Optionally binarize using the threshold Parameters ---------- source_image : np.ndarray Input IF image of shape (C, H, W) where C is number of channels. Returns ------- np.ndarray Processed image of shape (H, W). If binarize=True, returns boolean array. Otherwise returns float array in [0, 1] range. """ source_image = source_image.sum(0) source_image = source_image / source_image.max() source_image = gaussian(source_image, sigma=self.sigma) if self.binarize: source_image = source_image > self.binarization_threshold return source_image
[docs] class IMCProcessor: """Preprocessing pipeline for Imaging Mass Cytometry (IMC) images. This class provides a comprehensive preprocessing pipeline specifically designed for IMC images, including arcsinh transformation for variance stabilization, winsorization for outlier handling, normalization, Gaussian blurring, and optional binarization. Parameters ---------- arcsinh_normalize : bool, default=True Whether to apply arcsinh transformation for variance stabilization. arcsinh_cofactor : float, default=5 Cofactor for arcsinh transformation. Lower values increase the transformation strength. winsorize_limits : list of float, default=[None, None] Lower and upper percentile limits for winsorization. [0.01, 0.01] means clip bottom 1% and top 1% of values. binarize : bool, default=True Whether to apply binarization after preprocessing. binarization_threshold : float, default=2 Threshold value for binarization. sigma : float, default=1 Standard deviation for Gaussian blur kernel. Attributes ---------- arcsinh_normalize : bool Whether arcsinh transformation is enabled. arcsinh_cofactor : float Current arcsinh cofactor value. winsorize_limits : list Current winsorization limits. binarize : bool Whether binarization is enabled. binarization_threshold : float Current binarization threshold. sigma : float Current Gaussian blur sigma value. Notes ----- The arcsinh transformation is particularly useful for IMC data because it stabilizes variance across the intensity range, which is important for count-based mass spectrometry data. """ def __init__(self, arcsinh_normalize=True, arcsinh_cofactor=5, winsorize_limits=[None, None], binarize=True, binarization_threshold=2, sigma=1): self.arcsinh_normalize = arcsinh_normalize self.arcsinh_cofactor = arcsinh_cofactor self.winsorize_limits = winsorize_limits self.binarize = binarize self.binarization_threshold = binarization_threshold self.sigma = sigma
[docs] def configure(self, trial_params): """Configure processor parameters from Optuna trial parameters. This method updates the processor parameters based on values suggested by an Optuna trial. It looks for specific IMC parameter names in the trial params dictionary. Parameters ---------- trial_params : dict Dictionary of trial parameters from Optuna optimization. Expected keys: 'IMC_arcsinh_normalize', 'IMC_arcsinh_cofactor', 'IMC_winsorization_lower_limit', 'IMC_winsorization_upper_limit', 'IMC_binarization_threshold', 'IMC_gaussian_sigma', 'binarize_images'. Returns ------- self : IMCProcessor Returns self for method chaining. """ if "IMC_arcsinh_normalize" in trial_params: self.arcsinh_normalize = trial_params["IMC_arcsinh_normalize"] if "IMC_arcsinh_cofactor" in trial_params: self.arcsinh_cofactor = trial_params["IMC_arcsinh_cofactor"] if "IMC_winsorization_lower_limit" in trial_params and "IMC_winsorization_upper_limit" in trial_params: self.winsorize_limits = [trial_params["IMC_winsorization_lower_limit"], trial_params["IMC_winsorization_upper_limit"]] if "IMC_binarization_threshold" in trial_params: self.binarization_threshold = trial_params["IMC_binarization_threshold"] if "IMC_gaussian_sigma" in trial_params: self.sigma = trial_params["IMC_gaussian_sigma"] if "binarize_images" in trial_params: self.binarize = trial_params["binarize_images"] return self
def __call__(self, target_image): """Apply the IMC preprocessing pipeline to an image. The preprocessing pipeline consists of: 1. Optional arcsinh transformation for variance stabilization 2. Sum all channels to create composite image 3. Winsorization to clip outlier intensities 4. Normalize to [0, 1] range 5. Apply Gaussian blur 6. Optional binarization Parameters ---------- target_image : np.ndarray Input IMC image of shape (C, H, W) where C is number of channels. Returns ------- np.ndarray Processed image of shape (H, W). If binarize=True, returns boolean array. Otherwise returns float array in [0, 1] range. """ if self.arcsinh_normalize: target_image = np.arcsinh(target_image/self.arcsinh_cofactor) target_image = target_image.sum(0) target_image = winsorize(target_image, limits=self.winsorize_limits) target_image = target_image / target_image.max() target_image = gaussian(target_image, sigma=self.sigma) if self.binarize: target_image = target_image > self.binarization_threshold return target_image
[docs] def pick_best_registration(study_df): """Calculate triangle score and return best trial from optimization results. This function implements a balanced scoring approach for selecting the best registration trial from a set of optimization results. It combines three key metrics (logical AND, IoU, and correlation) using a triangular scoring scheme that balances all three aspects of registration quality. The triangle score is calculated as: .. code-block:: text (1/3) * |norm_and * norm_corr + norm_corr * norm_iou + norm_iou * norm_and| where each metric is normalized to [0,1] within the group. Parameters ---------- study_df : pd.DataFrame DataFrame containing trial results with required columns: - 'user_attrs_logical_and': Logical AND overlap between images - 'user_attrs_logical_iou': Intersection over Union score - 'user_attrs_reg_image_max_corr': Maximum correlation between channels Returns ------- pd.Series DataFrame row containing the trial with the highest balanced score. The returned series includes all original columns plus computed normalization columns and the final 'balanced_score'. Notes ----- The logical AND values are log-transformed before normalization because they typically span several orders of magnitude. The triangle score approach ensures that no single metric dominates the selection, leading to more robust registration quality assessment. Missing or NaN values in any of the required columns will result in NaN normalized scores, which may affect the final ranking. """ study_df['norm_and'] = (np.log10(study_df['user_attrs_logical_and']+1)) / (np.log10(study_df['user_attrs_logical_and']+1).max()) study_df['norm_iou'] = study_df['user_attrs_logical_iou'] / study_df['user_attrs_logical_iou'].max() study_df['norm_corr'] = study_df['user_attrs_reg_image_max_corr'] / study_df['user_attrs_reg_image_max_corr'].max() study_df['balanced_score'] = 1/3 * abs(study_df['norm_and'] * study_df['norm_corr'] + study_df['norm_corr'] * study_df['norm_iou'] + study_df['norm_iou'] * study_df['norm_and']) # Get the row with maximum triangle score best_row = study_df.loc[study_df['balanced_score'].idxmax()] return best_row