# This source code is part of the Gecos package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.
__author__ = "Patrick Kunzmann, Benjamin Mayer"
__all__ = ["ColorOptimizer", "ScoreFunction", "DefaultScoreFunction"]
from collections import namedtuple
import abc
import copy
import skimage
import numpy as np
import numpy.random as random
from .colors import lab_to_rgb
MIN_COORD = np.array([ 0, -128, -128], dtype=float)
MAX_COORD = np.array([99, 127, 127], dtype=float)
[docs]class ColorOptimizer(object):
"""
Create an optimizer that tries to find an optimal color conformation
within a given color space based on a score function.
The optimizer tries to minimize the return value of the score
function by adjusting the *Lab* values (coordinates) for each
symbol in a given alphabet.
The optimizer uses the random number generator from *NumPy*.
Therefore, call :func:`numpy.random.seed()` to set the seed for the
optimizer
Parameters
----------
alphabet : biotite.sequence.Alphabet
The alphabet to calculate the color conformation for.
score_function : ScoreFunction or callable
The score function which should be minimized.
When calling the object, its only parameter must be an array of
coordinates with shape *(n, 3)*, where *n* is the length of the
alphabet.
Its return value must be a single float - the score.
space : ColorSpace
The color space that defines the allowed space for the
coordinates.
constraints : ndarray, shape=(n,3), dtype=float, optional
An array whose non-NaN values are interpreted as constraints.
Constrained values will be fixed during the optimization.
"""
[docs] class Result(namedtuple("Result", ["alphabet", "trajectory", "scores"])):
"""
The result of an optimization.
Contains the final color scheme information as well as the
course of the coordinates and the score during the optimization.
Parameters
----------
alphabet : biotite.sequence.Alphabet
The alphabet the optimizer used.
trajectory : ndarray, shape=(m,n,3), dtype=float
The course of the coordinates during the simulation.
scores : ndarray, shape=(m,), dtype=float
The course of the score during the simulation.
Attributes
----------
alphabet : biotite.sequence.Alphabet
The alphabet the optimizer used.
trajectory : ndarray, shape=(m,n,3), dtype=float
The course of coordinates during the simulation.
lab_colors : ndarray, shape=(n,3), dtype=float
The final *Lab* color conformation, i.e. the last element of
`trajectory`.
rgb_colors : ndarray, shape=(n,3), dtype=float
The final color conformation converted into *RGB* colors.
scores : ndarray, shape=(m,), dtype=float
The course of the score during the simulation.
score : float
The final score, i.e. the last element of `scores`.
"""
@property
def score(self):
return self.scores[-1]
@property
def lab_colors(self):
return copy.deepcopy(self.trajectory[-1])
@property
def rgb_colors(self):
return lab_to_rgb(np.floor(self.lab_colors))
def __init__(self, alphabet, score_function, space, constraints=None):
self._alphabet = alphabet
self._n_symbols = len(alphabet)
self._score_func = score_function
self._space = space.space.copy()
self._trajectory = []
self._scores = []
if constraints is None:
self._constraint_mask = np.zeros(self._n_symbols, dtype=bool)
self._constraints = np.full((self._n_symbols, 3), np.nan)
else:
self._constraint_mask = ~np.isnan(constraints).any(axis=-1)
# Check constraints
constraint_vals = constraints[self._constraint_mask]
invalid_ind = np.where(~self._is_allowed(constraint_vals))[0]
if len(invalid_ind) > 0:
raise ValueError(
f"Constraint {constraint_vals[invalid_ind[0]]} "
f"is outside the allowed space"
)
self._constraints = constraints.copy()
# Sample random initial coordinates
coord = np.zeros((self._n_symbols, 3))
self._apply_constraints(coord)
self._set_coordinates(
self._sample_coord(
coord,
lambda c: (
random.rand(*c.shape)
# Bring random values into correct range
* (MAX_COORD - MIN_COORD) + MIN_COORD
)
)
)
[docs] def set_coordinates(self, coord):
"""
Set the the coordinates of the current color conformation.
Potential color constraints are applied on these.
This coordinate changes will be tracked in the trajectory.
Parameters
----------
coord : ndarray, shape=(n,3), dtype=float
The new coordinates.
"""
if coord.shape != (self._n_symbols, 3):
raise ValueError(
f"Given shape is {coord.shape}, "
f"but expected shape is {(len(self._alphabet), 3)}"
)
invalid_ind = np.where(~self._is_allowed(coord))[0]
if len(invalid_ind) > 0:
raise ValueError(
f"Coordinate {coord[invalid_ind[0]]} "
f"is outside the allowed space"
)
coord = coord.copy()
self._apply_constraints(coord)
self._set_coordinates(coord)
def _set_coordinates(self, coord, score=None):
self._coord = coord
self._trajectory.append(coord)
if score is None:
score = self._score_func(coord)
self._scores.append(score)
[docs] def optimize(self, n_steps=20000,
beta_start=1, beta_end=500,
stepsize_start=10, stepsize_end=0.2):
r"""
Perform a simulated annealing optimization on the current
coordinate to minimize the score returned by the score function.
This is basically a Metropolis-Monte-Carlo optimization where
the inverse temperature and step size is varied according to
exponential annealing schedule over the course of the
optimization.
Parameters
----------
n_steps : int
The number of simulated annealing steps.
beta_start, beta_end : float
The inverse temperature in the first and last step of the
optimization, respectively.
Higher values allow less increase of score, i.e. result
in a steering into the local minimum.
Must be positive.
stepsize_start, stepsize_end : float
The step size in the first and last step of the
optimization, respectively.
it is the radius in which the coordinates are randomly
altered at in each optimization step.
Must be positive.
Notes
-----
The algorithm is a heuristic thats motivated by the physical
process of annealing.
If we, e.g., cool steel than a slow cooling can yield a superior
quality, whereas for a fast cooling the steel can become
brittle.
The same happens here within the search space for the given
minimization task.
"""
betas = _calculate_schedule(n_steps, beta_start, beta_end)
stepsizes = _calculate_schedule(n_steps, stepsize_start, stepsize_end)
for i in range(n_steps):
score = self._scores[-1]
new_coord = self._sample_coord(
self._coord,
lambda c: c + (random.rand(*c.shape)-0.5) * 2 * stepsizes[i]
)
new_score = self._score_func(new_coord)
if new_score < score:
self._set_coordinates(new_coord, new_score)
else:
p_accept = np.exp( -betas[i] * (new_score-score))
if random.rand() <= p_accept:
self._set_coordinates(new_coord, new_score)
else:
self._set_coordinates(self._coord, score)
[docs] def get_result(self):
"""
Get the result of the optimization.
Returns
-------
result : ColorOptimizer.Result
The result.
"""
trajectory = np.array(self._trajectory)
scores = np.array(self._scores)
return ColorOptimizer.Result(
alphabet = self._alphabet,
trajectory = trajectory,
scores = scores
)
def _is_allowed(self, coord):
"""
Get a mask indicating allowed color values, i.e. values within
the color space.
"""
mask = ((coord >= MIN_COORD) & (coord <= MAX_COORD)).all(axis=-1)
# Only check values that are within valid index range
# Use floor for correct conversion of negative values to index
ind = np.floor(coord[mask] - MIN_COORD).astype(int)
mask[mask.copy()] = self._space[ind[..., 0], ind[..., 1], ind[..., 2]]
return mask
def _sample_coord(self, old_coord, sampler):
"""
Based on given coordinates sample new coordinates that are
within the color space.
The sampling function must accept a given array of coordinates
and return a new array of the same size.
"""
new_coord = old_coord.copy()
# Resample coordinates that are not in valid space until they
# are in valid space
resample_mask = ~self._constraint_mask.copy()
while resample_mask.any():
new_coord[resample_mask] = sampler(old_coord[resample_mask])
resample_mask[self._is_allowed(new_coord)] = False
return new_coord
def _apply_constraints(self, coord):
coord[self._constraint_mask] = self._constraints[self._constraint_mask]
def _calculate_schedule(n_steps, start, end):
"""
Calculate the values for each step in an exponential schedule.
"""
# Use float 64
return start * (end/start)**np.linspace(0, 1, n_steps, dtype=np.float64)
[docs]class ScoreFunction(metaclass=abc.ABCMeta):
"""
Abstract base class for a score function.
A score function calculates a score from a color conformation
(coordinates).
The score is calculated by calling the object with the coordinates
as single argument.
Hence, classes inheriting from this base class mut override the
:func:`__call__()` method.
Parameters
----------
n_symbols : int
The amount of symbols in the system.
Equivalent to the length of the alphabet the color scheme is
generated for.
This value is used to check the shape of the coordinates when
calling the score function.
"""
def __init__(self, n_symbols):
self._n_symbols = n_symbols
[docs] @abc.abstractmethod
def __call__(self, coord):
"""
Calculate the score for the given coordinates.
Parameters
----------
coord : ndarray, shape=(n,3), dtype=float
The coordinates.
Returns
-------
score : float
The score assigned to `coord`.
"""
if len(coord) != self._n_symbols:
raise ValueError(
f"Expected {self._n_symbols} coordinates, but got {len(coord)}"
)
[docs]class DefaultScoreFunction(ScoreFunction):
"""
Create an instance of the default score function *Gecos* uses.
The score function contains two terms:
A sum of harmonic potentials between each pair of symbols, based on
a substitution matrix, and *contrast score* that favors schemes with
a high contrast.
Parameters
----------
matrix : biotite.sequence.align.SubstitutionMatrix
A distance matrix is calculated from this score matrix.
The equilibrium positions scale linearly with the values in the
distance matrix.
contrast : int, optional
A weight for the *contrast score*.
distance_formula : {'CIE76', 'CIEDE94', 'CIEDE2000'}, optional
The formula to use for calculation of perceptual color
difference.
While ``'CIEDE2000'`` is the most accurate formula for the
perceptual difference, ``'CIE76'`` features the fastest
calculation.
"""
def __init__(self, matrix, contrast=700, distance_formula="CIEDE2000"):
if not matrix.is_symmetric():
raise ValueError("Substitution matrix must be symmetric")
n_symbols = len(matrix.get_alphabet1())
super().__init__(n_symbols)
self._tri_indices = np.tril_indices(n_symbols, k=-1)
self._ideal_dist = DefaultScoreFunction._calculate_ideal_distances(
self._tri_indices, matrix
)
self._contrast = contrast
if distance_formula not in ["CIE76", "CIEDE94", "CIEDE2000"]:
raise ValueError(
f"Unknown color distance formula '{distance_formula}'"
)
self._distance_formula = distance_formula
def __call__(self, coord):
super().__call__(coord)
dist = DefaultScoreFunction._calculate_distances(
self._tri_indices, coord, self._distance_formula
)
# This factor translates visual distances
# into normalized substitution matrix distances
mean_dist = np.average(dist)
# Harmonic potentials between each pair of symbols
harmonic_loss = np.sum((dist / mean_dist - self._ideal_dist)**2)
# Contrast term: Favours conformations
# with large absolute color differences
contrast_loss = self._contrast / mean_dist
return harmonic_loss + contrast_loss
@staticmethod
def _calculate_distances(tri_indices, coord, distance_formula):
ind1, ind2 = tri_indices
if distance_formula == "CIE76":
return np.sqrt(
np.sum((coord[ind1, :] - coord[ind2, :])**2, axis=-1)
)
elif distance_formula == "CIEDE94":
return skimage.color.deltaE_ciede94(
coord[ind1], coord[ind2]
)
else: #"CIEDE2000"
return skimage.color.deltaE_ciede2000(
coord[ind1], coord[ind2]
)
@staticmethod
def _calculate_ideal_distances(tri_indices, substitution_matrix):
scores = substitution_matrix.score_matrix()
diff_to_max = np.diag(scores) - scores
dist_matrix = (diff_to_max + diff_to_max.T) / 2
ind_i, ind_j = tri_indices
distances = dist_matrix[ind_i, ind_j]
# Scale, so that average distance is 1
distances /= np.average(distances)
return distances