import numpy as np
import pandas as pd
import sklearn.metrics
from drain.util import to_float
"""Methods that calculate metrics for classification models.
All metrics are functions of two numpy arrays of floats of equal length:
- y_true: the labels, either 0 or 1 or NaN
- y_score: the scores, which can take any non-NaN number.
All metrics have been implemented to support missing labels.
"""
def _argsort(y_score, k=None):
"""
Returns the indexes in descending order of the top k score
or all scores if k is None
"""
ranks = y_score.argsort()
argsort = ranks[::-1]
if k is not None:
argsort = argsort[0:k]
return argsort
def _argtop(y_score, k=None):
"""
Returns the indexes of the top k scores (not necessarily sorted)
"""
# avoid sorting when just want the top all
if k is None:
return slice(0, len(y_score))
else:
return _argsort(y_score, k)
[docs]def count(y_true, y_score=None, countna=False):
"""
Counts the number of examples. If countna is False then only count labeled examples,
i.e. those with y_true not NaN
"""
if not countna:
return (~np.isnan(to_float(y_true))).sum()
else:
return len(y_true)
[docs]def count_series(y_true, y_score, countna=False):
"""
Returns series whose i-th entry is the number of examples in the top i
"""
y_true, y_score = to_float(y_true, y_score)
top = _argsort(y_score)
if not countna:
a = (~np.isnan(y_true[top])).cumsum()
else:
a = range(1, len(y_true)+1)
return pd.Series(a, index=range(1, len(a)+1))
[docs]def baseline(y_true, y_score=None):
"""
Number of positive labels divided by number of labels,
or zero if there are no labels
"""
if len(y_true) > 0:
return np.nansum(y_true)/count(y_true, countna=False)
else:
return 0.0
[docs]def roc_auc(y_true, y_score):
"""
Returns are under the ROC curve
"""
notnull = ~np.isnan(y_true)
fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true[notnull], y_score[notnull])
return sklearn.metrics.auc(fpr, tpr)
[docs]def precision(y_true, y_score, k=None, return_bounds=False):
"""
If return_bounds is False then returns precision on the
labeled examples in the top k.
If return_bounds is True the returns a tuple containing:
- precision on the labeled examples in the top k
- number of labeled examples in the top k
- lower bound of precision in the top k, assuming all
unlabaled examples are False
- upper bound of precision in the top k, assuming all
unlabaled examples are True
"""
y_true, y_score = to_float(y_true, y_score)
top = _argtop(y_score, k)
n = np.nan_to_num(y_true[top]).sum() # fill missing labels with 0
d = (~np.isnan(y_true[top])).sum() # count number of labels
p = n/d
if return_bounds:
k = len(y_true) if k is None else k
bounds = (n/k, (n+k-d)/k) if k != 0 else (np.nan, np.nan)
return p, d, bounds[0], bounds[1]
else:
return p
[docs]def precision_series(y_true, y_score, k=None):
"""
Returns series of length k whose i-th entry is the precision in the top i
TODO: extrapolate here
"""
y_true, y_score = to_float(y_true, y_score)
top = _argsort(y_score, k)
n = np.nan_to_num(y_true[top]).cumsum() # fill missing labels with 0
d = (~np.isnan(y_true[top])).cumsum() # count number of labels
return pd.Series(n/d, index=np.arange(1, len(n)+1))
[docs]def recall(y_true, y_score, k=None, value=True):
"""
Returns recall (number of positive examples) in the top k
If value is False then counts number of negative examples
TODO: add prop argument to return recall proportion instead of count
"""
y_true, y_score = to_float(y_true, y_score)
top = _argtop(y_score, k)
if not value:
y_true = 1-y_true
r = np.nan_to_num(y_true[top]).sum()
return r
[docs]def recall_series(y_true, y_score, k=None, value=True):
"""
Returns series of length k whose i-th entry is the recall in the top i
"""
y_true, y_score = to_float(y_true, y_score)
top = _argsort(y_score, k)
if not value:
y_true = 1-y_true
a = np.nan_to_num(y_true[top]).cumsum()
return pd.Series(a, index=np.arange(1, len(a)+1))