"""Score types for tests that completed successfully.
These include various representations of goodness-of-fit.
"""
from __future__ import division
import math
import numpy as np
import quantities as pq
from sciunit import utils
from sciunit import errors
from .base import Score
from .incomplete import InsufficientDataScore
class BooleanScore(Score):
"""A boolean score, which must be True or False."""
_allowed_types = (bool,)
_description = ('True if the observation and prediction were '
'sufficiently similar; False otherwise')
@classmethod
def compute(cls, observation, prediction):
"""Compute whether the observation equals the prediction."""
return BooleanScore(observation == prediction)
@property
def norm_score(self):
"""Return 1.0 for a True score and 0.0 for False score."""
return 1.0 if self.score else 0.0
def __str__(self):
return 'Pass' if self.score else 'Fail'
class ZScore(Score):
"""A Z score.
A float indicating standardized difference
from a reference mean.
"""
_allowed_types = (float,)
_description = ('The difference between the means of the observation and '
'prediction divided by the standard deviation of the '
'observation')
_best = 0.0 # A Z-Score of 0.0 is best
_worst = np.inf # A Z-score of infinity (or negative infinity) is worst
@classmethod
def compute(cls, observation, prediction):
"""Compute a z-score from an observation and a prediction."""
assert isinstance(observation, dict),\
"Observation must be a dict when using ZScore, not type %s" \
% type(observation)
try:
p_value = prediction['mean'] # Use the prediction's mean.
except (TypeError, KeyError, IndexError): # If there isn't one...
try:
p_value = prediction['value'] # Use the prediction's value.
except (TypeError, IndexError): # If there isn't one...
p_value = prediction # Use the prediction (assume numeric).
try:
o_mean = observation['mean']
o_std = observation['std']
except KeyError:
error = ("Observation must have keys 'mean' and 'std' "
"when using ZScore")
return InsufficientDataScore(error)
if not o_std > 0:
error = 'Observation standard deviation must be > 0'
return InsufficientDataScore(error)
value = (p_value - o_mean)/o_std
value = utils.assert_dimensionless(value)
if np.isnan(value):
error = 'One of the input values was NaN'
return InsufficientDataScore(error)
score = ZScore(value)
return score
@property
def norm_score(self):
"""Return the normalized score.
Equals 1.0 for a z-score of 0, falling to 0.0 for extremely positive
or negative values.
"""
cdf = (1.0 + math.erf(self.score / math.sqrt(2.0))) / 2.0
return 1 - 2*math.fabs(0.5 - cdf)
def __str__(self):
return 'Z = %.2f' % self.score
class CohenDScore(ZScore):
"""A Cohen's D score.
A float indicating difference
between two means normalized by the pooled standard deviation.
"""
_description = ("The Cohen's D between the prediction and the observation")
@classmethod
def compute(cls, observation, prediction):
"""Compute a Cohen's D from an observation and a prediction."""
assert isinstance(observation, dict)
assert isinstance(prediction, dict)
p_mean = prediction['mean'] # Use the prediction's mean.
p_std = prediction['std']
o_mean = observation['mean']
o_std = observation['std']
try: # Try to pool taking samples sizes into account.
p_n = prediction['n']
o_n = observation['n']
s = (((p_n-1)*(p_std**2) + (o_n-1)*(o_std**2))/(p_n+o_n-2))**0.5
except KeyError: # If sample sizes are not available.
s = (p_std**2 + o_std**2)**0.5
value = (p_mean - o_mean)/s
value = utils.assert_dimensionless(value)
return CohenDScore(value)
def __str__(self):
return 'D = %.2f' % self.score
class RatioScore(Score):
"""A ratio of two numbers.
Usually the prediction divided by
the observation.
"""
_allowed_types = (float,)
_description = ('The ratio between the prediction and the observation')
_best = 1.0 # A RatioScore of 1.0 is best
def _check_score(self, score):
if score < 0.0:
raise errors.InvalidScoreError(("RatioScore was initialized with "
"a score of %f, but a RatioScore "
"must be non-negative.") % score)
@classmethod
def compute(cls, observation, prediction, key=None):
"""Compute a ratio from an observation and a prediction."""
assert isinstance(observation, (dict, float, int, pq.Quantity))
assert isinstance(prediction, (dict, float, int, pq.Quantity))
obs, pred = cls.extract_means_or_values(observation, prediction,
key=key)
value = pred / obs
value = utils.assert_dimensionless(value)
return RatioScore(value)
@property
def norm_score(self):
"""Return 1.0 for a ratio of 1, falling to 0.0 for extremely small
or large values."""
score = math.log10(self.score)
cdf = (1.0 + math.erf(score / math.sqrt(2.0))) / 2.0
return 1 - 2*math.fabs(0.5 - cdf)
def __str__(self):
return 'Ratio = %.2f' % self.score
class PercentScore(Score):
"""A percent score.
A float in the range [0,0,100.0] where higher is better.
"""
_description = ('100.0 is considered perfect agreement between the '
'observation and the prediction. 0.0 is the worst possible'
' agreement')
def _check_score(self, score):
if not (0.0 <= score <= 100.0):
raise errors.InvalidScoreError(("Score of %f must be in "
"range 0.0-100.0" % score))
@property
def norm_score(self):
"""Return 1.0 for a percent score of 100, and 0.0 for 0."""
return float(self.score)/100
def __str__(self):
return '%.1f%%' % self.score
class FloatScore(Score):
"""A float score.
A float with any value.
"""
_allowed_types = (float, pq.Quantity,)
def _check_score(self, score):
if isinstance(score, pq.Quantity) and score.size != 1:
raise errors.InvalidScoreError("Score must have size 1.")
_description = ('There is no canonical mapping between this score type and'
' a measure of agreement between the observation and the '
'prediction')
@classmethod
def compute_ssd(cls, observation, prediction):
"""Compute sum-squared diff between observation and prediction."""
# The sum of the squared differences.
value = ((observation - prediction)**2).sum()
score = FloatScore(value)
return score
def __str__(self):
return '%.3g' % self.score
class RandomScore(Score):
"""A random score in [0,1].
This has no scientific value and should only be used for debugging
purposes. For example, one might assign a random score under some error
condition to move forward with an application that requires a numeric
score, and use the presence of a RandomScore in the output as an
indication of an internal error.
"""
_allowed_types = (float,)
_description = ('There is a random number in [0,1] and has no relation to '
'the prediction or the observation')
def __str__(self):
return '%.3g' % self.score