Source code for datalib.metrics._plot.cap_curve

"""
Module containing the implementar for CAP Curve Display.
"""
import matplotlib.pyplot as plt
import numpy as np

from sklearn.utils.validation import _check_pos_label_consistency
from sklearn.utils import check_matplotlib_support
from sklearn.utils._plotting import _BinaryClassifierCurveDisplayMixin
from .. import cap_curve


[docs]class CAPCurveDisplay(_BinaryClassifierCurveDisplayMixin): """CAP Curve visualization. Parameters ---------- cumulative_gains : ndarray Cumulative gain with each threshold (percentage of class 1). thresholds : ndarray Increasing thresholds (percentage of examples) on the decision function used to compute cap curve. positive_rate : ndarray Rate of positive class examples to compute the perfect curve. gini : float, default=None Gini score. If None, the gini score is not shown. estimator_name : str, default=None Name of estimator. If None, the estimator name is not shown. pos_label : str or int, default=None The class considered as the positive class when computing the CAP curve. By default, `estimators.classes_[1]` is considered as the positive class. Attributes ---------- line_ : matplotlib Artist CAP Curve. ax_ : matplotlib Axes Axes with CAP Curve. figure_ : matplotlib Figure Figure containing the curve. """ def __init__( self, *, cumulative_gains, thresholds, positive_rate=None, gini=None, estimator_name=None, pos_label=None, ): self.estimator_name = estimator_name self.cumulative_gains = cumulative_gains self.thresholds = thresholds self.positive_rate = positive_rate self.gini = gini self.pos_label = pos_label
[docs] def plot( self, *, plot_random=False, plot_perfect=False, name=None, ax=None, **kwargs, ): """Plot visualization Extra keyword arguments will be passed to matplotlib's ``plot``. Parameters ---------- ax : matplotlib axes, default=None Axes object to plot on. If `None`, a new figure and axes is created. name : str, default=None Name of CAP Curve for labeling. If `None`, use `estimator_name` if not `None`, otherwise no labeling is shown. plot_random : boolean, default = False Flag indicating whether to plot the baseline random curve (True) or not (False). plot_perfect : boolean, default = False Flag indicating whether to plot the baseline perfect curve (True) or not (False). Returns ------- display : :class:`~sklearn.metrics.plot.CAPCurveDisplay` Object that stores computed values. """ check_matplotlib_support("CAPCurveDisplay.plot") name = self.estimator_name if name is None else name line_kwargs = {} if self.gini is not None and name is not None: line_kwargs["label"] = f"{name} (GINI = {self.gini:0.2f})" elif self.gini is not None: line_kwargs["label"] = f"Gini = {self.gini:0.2f}" elif name is not None: line_kwargs["label"] = name line_kwargs.update(**kwargs) if ax is None: _, ax = plt.subplots() if plot_random is True: ax.plot([0, 1], [0, 1], linestyle="--", label="Random Model") if plot_perfect is True and self.positive_rate is not None: ax.plot( [0, self.positive_rate, 1], [0, 1, 1], label="Perfect Model", ) (self.line_,) = ax.plot( self.thresholds, self.cumulative_gains, **line_kwargs ) info_pos_label = ( f" (Positive label: {self.pos_label})" if self.pos_label is not None else "" ) xlabel = "% of Observations" + info_pos_label ylabel = "% of Positive Observations" + info_pos_label ax.set(xlabel=xlabel, ylabel=ylabel) if "label" in line_kwargs: ax.legend(loc="lower right") self.ax_ = ax self.figure_ = ax.figure return self
[docs] @classmethod def from_estimator( cls, estimator, X, y, *, sample_weight=None, response_method="auto", pos_label=None, plot_random=False, plot_perfect=False, name=None, ax=None, **kwargs, ): """Create a CAP Curve display from an estimator. Parameters ---------- estimator : estimator instance Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier. X : {array-like, sparse matrix} of shape (n_samples, n_features) Input values. y : array-like of shape (n_samples,) Target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. response_method : {'predict_proba', 'decision_function', 'auto'} \ default='auto' Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next. pos_label : str or int, default=None The class considered as the positive class when computing the ROC-AUC metric. By default, `estimators.classes_[1]` is considered as the positive class. plot_random : boolean, default = False Flag indicating whether to plot the baseline random curve (True) or not (False). plot_perfect : boolean, default = False Flag indicating whether to plot the baseline perfect curve (True) or not (False). name : str, default=None Name of CAP Curve for labeling. If `None`, use the name of the estimator. ax : matplotlib axes, default=None Axes object to plot on. If `None`, a new figure and axes is created. **kwargs : dict Keyword arguments to be passed to matplotlib's `plot`. Returns ------- display : :class:`~sklearn.metrics.plot.CAPCurveDisplay` The ROC Curve display. Examples -------- >>> import matplotlib.pyplot as plt >>> from datalib.metrics import CAPCurveDisplay >>> from sklearn.datasets import make_classification >>> from sklearn.model_selection import train_test_split >>> from sklearn.svm import SVC >>> X, y = make_classification(random_state=0) >>> X_train, X_test, y_train, y_test = train_test_split(X, y) >>> clf = SVC(random_state=0).fit(X_train, y_train) >>> CAPCurveDisplay.from_estimator(clf, X_test, y_test) >>> plt.show() """ check_matplotlib_support(f"{cls.__name__}.from_estimator") y_score, pos_label, name = cls._validate_and_get_response_values( estimator, X, y, response_method=response_method, pos_label=pos_label, name=name, ) return cls.from_predictions( y_true=y, y_score=y_score, sample_weight=sample_weight, pos_label=pos_label, plot_random=plot_random, plot_perfect=plot_perfect, name=name, ax=ax, **kwargs, )
[docs] @classmethod def from_predictions( cls, y_true, y_score, *, sample_weight=None, pos_label=None, plot_random=False, plot_perfect=False, name=None, ax=None, **kwargs, ): """Plot CAP curve given the true and predicted score. Parameters ---------- y_true : array-like of shape (n_samples,) True labels. y_score : array-like of shape (n_samples,) Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by “decision_function” on some classifiers). sample_weight : array-like of shape (n_samples,), default=None Sample weights. pos_label : str or int, default=None The label of the positive class. When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an error will be raised. plot_random : boolean, default = False Flag indicating whether to plot the baseline random curve (True) or not (False). plot_perfect : boolean, default = False Flag indicating whether to plot the baseline perfect curve (True) or not (False). name : str, default=None Name of ROC curve for labeling. If `None`, name will be set to `"Classifier"`. ax : matplotlib axes, default=None Axes object to plot on. If `None`, a new figure and axes is created. **kwargs : dict Additional keywords arguments passed to matplotlib `plot` function. Returns ------- display : :class:`~sklearn.metrics.CAPCurveDisplay` Object that stores computed values. Examples -------- >>> import matplotlib.pyplot as plt >>> from datalib.metrics import CAPCurveDisplay >>> from sklearn.datasets import make_classification >>> from sklearn.model_selection import train_test_split >>> from sklearn.svm import SVC >>> X, y = make_classification(random_state=0) >>> X_train, X_test, y_train, y_test = train_test_split(X, y) >>> clf = SVC(random_state=0, probability=True).fit(X_train, y_train) >>> y_pred = clf.predict_proba(X_test)[:, 1] >>> CAPCurveDisplay.from_predictions(y_test, y_pred) >>> plt.show() """ check_matplotlib_support(f"{cls.__name__}.from_predictions") cumulative_gains, thresholds, gini = cap_curve( y_true, y_score, sample_weight ) positive_rate = ( np.sum(y_true) / len(y_true) if plot_perfect is True else None ) name = "Classifier" if name is None else name pos_label = _check_pos_label_consistency(pos_label, y_true) viz = CAPCurveDisplay( cumulative_gains=cumulative_gains, thresholds=thresholds, positive_rate=positive_rate, gini=gini, estimator_name=name, pos_label=pos_label, ) return viz.plot( ax=ax, name=name, plot_random=plot_random, plot_perfect=plot_perfect, **kwargs, )