Source code for gecos.optimizer

# This source code is part of the Gecos package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

__author__ = "Patrick Kunzmann, Benjamin Mayer"
__all__ = ["ColorOptimizer", "ScoreFunction", "DefaultScoreFunction"]

from collections import namedtuple
import abc
import copy
import skimage
import numpy as np
import numpy.random as random
from .colors import lab_to_rgb


MIN_COORD = np.array([ 0, -128, -128], dtype=float)
MAX_COORD = np.array([99,  127,  127], dtype=float)


[docs]class ColorOptimizer(object):
    """
    Create an optimizer that tries to find an optimal color conformation
    within a given color space based on a score function.

    The optimizer tries to minimize the return value of the score
    function by adjusting the *Lab* values (coordinates) for each
    symbol in a given alphabet.

    The optimizer uses the random number generator from *NumPy*.
    Therefore, call :func:`numpy.random.seed()` to set the seed for the
    optimizer

    Parameters
    ----------
    alphabet : biotite.sequence.Alphabet
        The alphabet to calculate the color conformation for.
    score_function : ScoreFunction or callable
        The score function which should be minimized.
        When calling the object, its only parameter must be an array of
        coordinates with shape *(n, 3)*, where *n* is the length of the
        alphabet.
        Its return value must be a single float - the score.
    space : ColorSpace
        The color space that defines the allowed space for the
        coordinates.
    constraints : ndarray, shape=(n,3), dtype=float, optional
        An array whose non-NaN values are interpreted as constraints.
        Constrained values will be fixed during the optimization.
    """

[docs]    class Result(namedtuple("Result", ["alphabet", "trajectory", "scores"])):
        """
        The result of an optimization.
        Contains the final color scheme information as well as the
        course of the coordinates and the score during the optimization.

        Parameters
        ----------
        alphabet : biotite.sequence.Alphabet
            The alphabet the optimizer used.
        trajectory : ndarray, shape=(m,n,3), dtype=float
            The course of the coordinates during the simulation.
        scores : ndarray, shape=(m,), dtype=float
            The course of the score during the simulation.

        Attributes
        ----------
        alphabet : biotite.sequence.Alphabet
            The alphabet the optimizer used.
        trajectory : ndarray, shape=(m,n,3), dtype=float
            The course of coordinates during the simulation.
        lab_colors : ndarray, shape=(n,3), dtype=float
            The final *Lab* color conformation, i.e. the last element of
            `trajectory`.
        rgb_colors : ndarray, shape=(n,3), dtype=float
            The final color conformation converted into *RGB* colors.
        scores : ndarray, shape=(m,), dtype=float
            The course of the score during the simulation.
        score : float
            The final score, i.e. the last element of `scores`.
        
        """
        
        @property
        def score(self):
            return self.scores[-1]

        @property
        def lab_colors(self):
            return copy.deepcopy(self.trajectory[-1])
        
        @property
        def rgb_colors(self):
            return lab_to_rgb(np.floor(self.lab_colors))
    
    def __init__(self, alphabet, score_function, space, constraints=None):
        self._alphabet = alphabet
        self._n_symbols = len(alphabet)
        self._score_func = score_function
        self._space = space.space.copy()
        self._trajectory = []
        self._scores = []

        if constraints is None:
            self._constraint_mask = np.zeros(self._n_symbols, dtype=bool)
            self._constraints = np.full((self._n_symbols, 3), np.nan)
        else:
            self._constraint_mask = ~np.isnan(constraints).any(axis=-1)
            # Check constraints
            constraint_vals = constraints[self._constraint_mask]
            invalid_ind = np.where(~self._is_allowed(constraint_vals))[0]
            if len(invalid_ind) > 0:
                raise ValueError(
                    f"Constraint {constraint_vals[invalid_ind[0]]} "
                    f"is outside the allowed space"
                )
            self._constraints = constraints.copy()

        # Sample random initial coordinates
        coord = np.zeros((self._n_symbols, 3))
        self._apply_constraints(coord)
        self._set_coordinates(
            self._sample_coord(
                coord,
                lambda c: (
                    random.rand(*c.shape)
                    # Bring random values into correct range
                    * (MAX_COORD - MIN_COORD) + MIN_COORD
                )
            )
        )

[docs]    def set_coordinates(self, coord):
        """
        Set the the coordinates of the current color conformation.
        Potential color constraints are applied on these.
        This coordinate changes will be tracked in the trajectory.
        
        Parameters
        ----------
        coord : ndarray, shape=(n,3), dtype=float
            The new coordinates.
        """
        if coord.shape != (self._n_symbols, 3):
            raise ValueError(
                f"Given shape is {coord.shape}, "
                f"but expected shape is {(len(self._alphabet), 3)}"
            )

        invalid_ind = np.where(~self._is_allowed(coord))[0]
        if len(invalid_ind) > 0:
            raise ValueError(
                f"Coordinate {coord[invalid_ind[0]]} "
                f"is outside the allowed space"
            )

        coord = coord.copy()
        self._apply_constraints(coord)
        self._set_coordinates(coord)
        
    def _set_coordinates(self, coord, score=None):
        self._coord = coord
        self._trajectory.append(coord)
        if score is None:
            score = self._score_func(coord)
        self._scores.append(score)
    
[docs]    def optimize(self, n_steps=20000,
                 beta_start=1, beta_end=500,
                 stepsize_start=10, stepsize_end=0.2):
        r"""
        Perform a simulated annealing optimization on the current
        coordinate to minimize the score returned by the score function.
        
        This is basically a Metropolis-Monte-Carlo optimization where
        the inverse temperature and step size is varied according to
        exponential annealing schedule over the course of the
        optimization.                            
        
        Parameters
        ----------
        n_steps : int
            The number of simulated annealing steps.
        beta_start, beta_end : float
            The inverse temperature in the first and last step of the
            optimization, respectively.
            Higher values allow less increase of score, i.e. result
            in a steering into the local minimum.
            Must be positive.
        stepsize_start, stepsize_end : float
            The step size in the first and last step of the
            optimization, respectively.
            it is the radius in which the coordinates are randomly
            altered at in each optimization step.
            Must be positive.

        Notes
        -----
        The algorithm is a heuristic thats motivated by the physical
        process of annealing.
        If we, e.g., cool steel than a slow cooling can yield a superior
        quality, whereas for a fast cooling the steel can become
        brittle.
        The same happens here within the search space for the given
        minimization task.       
        """
        betas = _calculate_schedule(n_steps, beta_start, beta_end)
        stepsizes = _calculate_schedule(n_steps, stepsize_start, stepsize_end)

        for i in range(n_steps):
            score = self._scores[-1]
            new_coord = self._sample_coord(
                self._coord,
                lambda c: c + (random.rand(*c.shape)-0.5) * 2 * stepsizes[i]
            )
            new_score = self._score_func(new_coord)
            
            if new_score < score:
                self._set_coordinates(new_coord, new_score)
                
            else:
                p_accept = np.exp( -betas[i] * (new_score-score))
                if random.rand() <= p_accept:
                    self._set_coordinates(new_coord, new_score)
                else:
                    self._set_coordinates(self._coord, score)
                    
[docs]    def get_result(self):
        """
        Get the result of the optimization.

        Returns
        -------
        result : ColorOptimizer.Result
            The result.
        """
        trajectory = np.array(self._trajectory)
        scores = np.array(self._scores)
        return ColorOptimizer.Result(
            alphabet = self._alphabet,
            trajectory = trajectory,
            scores = scores
        )
    
    def _is_allowed(self, coord):
        """
        Get a mask indicating allowed color values, i.e. values within
        the color space.
        """
        mask = ((coord >= MIN_COORD) & (coord <= MAX_COORD)).all(axis=-1)
        # Only check values that are within valid index range
        # Use floor for correct conversion of negative values to index
        ind = np.floor(coord[mask] - MIN_COORD).astype(int)
        mask[mask.copy()] = self._space[ind[..., 0], ind[..., 1], ind[..., 2]]
        return mask
    
    def _sample_coord(self, old_coord, sampler):
        """
        Based on given coordinates sample new coordinates that are
        within the color space.
        The sampling function must accept a given array of coordinates
        and return a new array of the same size.
        """
        new_coord = old_coord.copy()
        # Resample coordinates that are not in valid space until they
        # are in valid space
        resample_mask = ~self._constraint_mask.copy()
        while resample_mask.any():
            new_coord[resample_mask] = sampler(old_coord[resample_mask])
            resample_mask[self._is_allowed(new_coord)] = False
        return new_coord

    
    def _apply_constraints(self, coord):
        coord[self._constraint_mask] = self._constraints[self._constraint_mask]


def _calculate_schedule(n_steps, start, end):
    """
    Calculate the values for each step in an exponential schedule.
    """
    # Use float 64
    return start * (end/start)**np.linspace(0, 1, n_steps, dtype=np.float64)



[docs]class ScoreFunction(metaclass=abc.ABCMeta):
    """
    Abstract base class for a score function.
    A score function calculates a score from a color conformation
    (coordinates).

    The score is calculated by calling the object with the coordinates
    as single argument.
    Hence, classes inheriting from this base class mut override the
    :func:`__call__()` method.

    Parameters
    ----------
    n_symbols : int
        The amount of symbols in the system.
        Equivalent to the length of the alphabet the color scheme is
        generated for.
        This value is used to check the shape of the coordinates when
        calling the score function.
    """

    def __init__(self, n_symbols):
        self._n_symbols = n_symbols

[docs]    @abc.abstractmethod
    def __call__(self, coord):
        """
        Calculate the score for the given coordinates.

        Parameters
        ----------
        coord : ndarray, shape=(n,3), dtype=float
            The coordinates.
        
        Returns
        -------
        score : float
            The score assigned to `coord`.
        """
        if len(coord) != self._n_symbols:
            raise ValueError(
                f"Expected {self._n_symbols} coordinates, but got {len(coord)}"
            )


[docs]class DefaultScoreFunction(ScoreFunction):
    """
    Create an instance of the default score function *Gecos* uses.

    The score function contains two terms:
    A sum of harmonic potentials between each pair of symbols, based on
    a substitution matrix, and *contrast score* that favors schemes with
    a high contrast.

    Parameters
    ----------
    matrix : biotite.sequence.align.SubstitutionMatrix
        A distance matrix is calculated from this score matrix.
        The equilibrium positions scale linearly with the values in the
        distance matrix.
    contrast : int, optional
        A weight for the *contrast score*.
    distance_formula : {'CIE76', 'CIEDE94', 'CIEDE2000'}, optional
        The formula to use for calculation of perceptual color
        difference.
        While ``'CIEDE2000'`` is the most accurate formula for the
        perceptual difference, ``'CIE76'`` features the fastest
        calculation.
    """
    def __init__(self, matrix, contrast=700, distance_formula="CIEDE2000"):
        if not matrix.is_symmetric():
            raise ValueError("Substitution matrix must be symmetric")
        n_symbols = len(matrix.get_alphabet1())
        super().__init__(n_symbols)
        self._tri_indices = np.tril_indices(n_symbols, k=-1)
        self._ideal_dist = DefaultScoreFunction._calculate_ideal_distances(
            self._tri_indices, matrix
        )
        self._contrast = contrast
        if distance_formula not in ["CIE76", "CIEDE94", "CIEDE2000"]:
            raise ValueError(
                f"Unknown color distance formula '{distance_formula}'"
            )
        self._distance_formula = distance_formula
    
    def __call__(self, coord):
        super().__call__(coord)
        
        dist = DefaultScoreFunction._calculate_distances(
            self._tri_indices, coord, self._distance_formula
        )
        # This factor translates visual distances
        # into normalized substitution matrix distances
        mean_dist = np.average(dist)
        # Harmonic potentials between each pair of symbols
        harmonic_loss = np.sum((dist / mean_dist - self._ideal_dist)**2)
        # Contrast term: Favours conformations
        # with large absolute color differences
        contrast_loss = self._contrast / mean_dist
        return harmonic_loss + contrast_loss
    
    @staticmethod
    def _calculate_distances(tri_indices, coord, distance_formula):
        ind1, ind2 = tri_indices
        if distance_formula == "CIE76":
            return np.sqrt(
                np.sum((coord[ind1, :] - coord[ind2, :])**2, axis=-1)
            )
        elif distance_formula == "CIEDE94":
            return skimage.color.deltaE_ciede94(
                coord[ind1], coord[ind2]
            )
        else: #"CIEDE2000"
            return skimage.color.deltaE_ciede2000(
                coord[ind1], coord[ind2]
            )

    @staticmethod
    def _calculate_ideal_distances(tri_indices, substitution_matrix):
        scores = substitution_matrix.score_matrix()
        diff_to_max = np.diag(scores) - scores
        dist_matrix = (diff_to_max + diff_to_max.T) / 2
        ind_i, ind_j = tri_indices
        distances = dist_matrix[ind_i, ind_j]
        # Scale, so that average distance is 1
        distances /= np.average(distances)
        return distances
Navigation

Related Topics

Source code for gecos.optimizer