Module ruleskit.rule

Expand source code
from abc import ABC
import numpy as np
from typing import Optional, Union, Tuple
from time import time
from pathlib import Path
from .condition import Condition
from .activation import Activation
from .utils import rfunctions as functions
from .thresholds import Thresholds
import logging

logger = logging.getLogger(__name__)


# noinspection PyUnresolvedReferences
class Rule(ABC):

    """An abstract Rule object.

    A Rule is a condition (represented by any daughter class of ruleskit.Condition), applied on real features and target
    data.
    The Rule contains, in addition to the Condition object, many attributes dependent on the features data, such as
    the activation vector (a 1-D np.ndarray with 0 when the rule is activated - condition is met - and 0 when it is not)
    but also the rule's prediction (computed in the daughter class).

    Daughter classes can remember more attributes (precision, user-definded criterion...).

    Rule also include metrics that can be used for profiling the code : it will remember the time taken to fit the rule
    (fitting is the computation of the rule's attribute from the condition and the features data), the time taken
    to compute the activation vector and the time taken to make a prediction.

    To compute those metrics, one must use the rule's "fit" methods. Once this is done, one cas use the "predict"
     methods on a different set of features data.

    The Rule object can access any attribute of its condition as if it was its own : rule.features_indexes will return
    the features_indexes attribute's value of the condition in the Rule object. See Condition class for more details.

    The Rule object can also access any attribute of its activation vector as if it was its own. See Activation class
    for more details.
    """

    LOCAL_ACTIVATION = True
    THRESHOLDS = None
    """Thresholds that the Rule must meet to be good. See `ruleskit.thresholds.Thresholds` for more details."""

    @classmethod
    def SET_THRESHOLDS(cls, path: Union[str, Path, "TransparentPath"], show=False):
        """Set thresholds globally for all futur Rules"""
        cls.THRESHOLDS = Thresholds(path, show)

    def __init__(
        self, condition: Optional[Condition] = None, activation: Optional[Activation] = None,
    ):

        if condition is not None and not isinstance(condition, Condition):
            raise TypeError("Argument 'condition' must derive from Condition or be None.")
        if activation is not None and not isinstance(activation, Activation):
            raise TypeError("Argument 'activation' must derive from Activation or be None.")
        if activation is not None and condition is None:
            raise ValueError("Condition can not be None if activation is not None")

        self._condition = condition
        self._activation = activation
        self._thresholds = Rule.THRESHOLDS
        if self._activation is not None:
            self.check_thresholds("coverage")

        self._coverage = None
        self._prediction = None

        self._time_fit = -1
        self._time_calc_activation = -1
        self._time_predict = -1

        self._good = True
        self._bad_because = None

    def set_thresholds(self, path: Union[str, Path, "TransparentPath"], show=False):
        """Set thresholds for this rule only"""
        self._thresholds = Thresholds(path, show)

    def check_thresholds(self, attribute: Optional[str] = None) -> None:
        """If `ruleskit.rule.Rule.THRESHOLDS` is specified, will check that this rule is good regarding those
        thresholds, and set the flags *good* and *bad_because* accordingly

        Parameters
        ----------
        attribute: Optional[str]
            If specified, will only check the threshold of this rule attribute. If not, will test every rule attributes
            for which a threshold is defined.
        """

        if Rule.THRESHOLDS is None:
            return

        if attribute is not None:
            if not Rule.THRESHOLDS(attribute, self):
                self._bad_because = attribute
                self._good = False
            return

        for attribute in dir(self):
            if attribute.startswith("__"):
                continue
            if not Rule.THRESHOLDS(attribute, self):
                self._bad_because = attribute
                self._good = False
                return
        logger.debug(f"Rule {self} is good")

    @property
    def coverage(self) -> float:
        if self._activation is not None:
            self._coverage = self._activation.coverage
            return self._activation.coverage
        return self._coverage

    @coverage.setter
    def coverage(self, value):
        if self._activation is not None:
            self._activation.coverage = value
        self._coverage = value

    def __and__(self, other: "Rule") -> "Rule":
        """Logical AND (&) of two rules. It is simply the logical AND of the two rule's conditions and activations. """
        condition = self._condition & other._condition
        activation = self._activation & other._activation
        return self.__class__(condition, activation)

    def __add__(self, other: "Rule") -> "Rule":
        return NotImplemented("Can not add rules (seen as 'logical OR'). You can use logical AND however.")

    # def __del__(self):
    #     self.del_activation()

    def del_activation(self):
        """Deletes the activation vector's data, but not the object itself, so any computed attributes will remain
        available"""
        if hasattr(self, "_activation") and self._activation is not None:
            self._activation.delete()

    @property
    def activation_available(self) -> bool:
        """Returns True if the rule has an activation vector, and if this Activation's object data is available."""
        if self._activation is None:
            return False
        if self._activation.data_format == "file":
            return self._activation.data.is_file()
        else:
            return self._activation.data is not None

    @property
    def condition(self) -> Condition:
        return self._condition

    @property
    def activation(self) -> Union[None, np.ndarray]:
        """Returns the Activation vector's data in a form of a 1-D np.ndarray, or None if not available.

        Returns
        -------
        np.ndarray
            of the form [0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...]
        """
        if self._activation:
            return self._activation.raw
        return None

    @property
    def prediction(self) -> Union[str, float]:
        return self._prediction

    @property
    def thresholds(self) -> Thresholds:
        return self._thresholds

    @property
    def good(self) -> bool:
        return self._good

    @property
    def bad_because(self) -> str:
        return self._bad_because

    @property
    def time_fit(self) -> float:
        """Profiling attribute. Time in seconds taken to fit the rule"""
        return self._time_fit

    @property
    def time_predict(self) -> float:
        """Profiling attribute. Time in seconds taken by the rule to make a prediction"""
        return self._time_predict

    @property
    def time_calc_activation(self) -> float:
        """Profiling attribute. Time in seconds taken to comptue the activation vector"""
        return self._time_calc_activation

    def __getattr__(self, item):
        """If item is not found in self, try to fetch it from its activation or condition."""
        if item == "_activation" or item == "_condition":
            raise AttributeError(f"'Rule' object has no attribute '{item}'.")

        if hasattr(self._activation, item):
            return getattr(self._activation, item)
        if hasattr(self._condition, item):
            return getattr(self._condition, item)
        raise AttributeError(f"'Rule' object has no attribute '{item}'.")

    def __setattr__(self, item, value):
        """If item is private (starts with _), then default behavior. Else, if the item is not yet known by the rule
        but is known by its condition or activation, will set it to the condition or the activation. Else,
        raises AttributeError."""
        if item.startswith("_"):
            super(Rule, self).__setattr__(item, value)
            return
        if not hasattr(self, item):
            if hasattr(self._activation, item):
                setattr(self._activation, item, value)
            elif hasattr(self._condition, item):
                setattr(self._condition, item, value)
            else:
                raise AttributeError(f"Can not set attribute '{item}' in object Rule.")
        else:
            super(Rule, self).__setattr__(item, value)

    def __eq__(self, other) -> bool:
        """Two rules are equal if their conditions are equal."""
        if not isinstance(other, Rule):
            return False
        else:
            return self._condition == other._condition

    def __contains__(self, other: "Rule") -> bool:
        """A Rule contains another Rule if the second rule's activated points are also all activated by the first rule.
        """
        if not self._activation or not other._activation:
            return False
        return other._activation in self._activation

    def __str__(self) -> str:
        prediction = "<prediction unset>"
        if self._prediction is not None:
            prediction = self._prediction
        if self._condition is None:
            return "empty rule"
        return f"If {self._condition.__str__()} Then {prediction}."

    @property
    def to_hash(self) -> Tuple[str]:
        return ("r",) + self._condition.to_hash[1:]

    def __hash__(self) -> hash:
        return hash(frozenset(self.to_hash))

    def __len__(self):
        """A Rule's length is the number of features it talks about"""
        return len(self._condition)

    def evaluate(self, xs: Union["pd.DataFrame", np.ndarray]) -> Activation:
        """Computes and returns the activation vector from an array of features.

        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.

        Returns
        -------
        Activation
        """
        arr = self._condition.evaluate(xs)
        # noinspection PyTypeChecker
        a = Activation(arr, to_file=Rule.LOCAL_ACTIVATION)
        return a

    # noinspection PyUnusedLocal
    def fit(self, xs: Union["pd.DataFrame", np.ndarray] = None, y: np.ndarray = None, **kwargs):
        """Computes activation, and other criteria dependant on the nature of the daughter class of the Rule,
        for a given xs and y.

        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.
        y: np.ndarray
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray.
        kwargs: dict
            Other arguments used by daughter class
        """
        t0 = time()
        if xs is not None:
            self.calc_activation(xs)

        if self._activation is None:
            raise ValueError("If fitting without specifying xs, activation must have been computed already.")
        self.calc_attributes(y, **kwargs)
        if self.prediction is None:
            raise ValueError("'fit' did not set 'prediction' : did you overload 'calc_attributes' correctly ?")

        self._time_fit = time() - t0

    def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """Implement in daughter class. Must set self._prediction."""
        raise NotImplementedError("To implement in daughter class")

    def calc_activation(self, xs: Union["pd.DataFrame", np.ndarray]):
        """Uses self.evaluate to set self._activation.

        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.
        """
        t0 = time()
        self._activation = self.evaluate(xs)
        self._time_calc_activation = time() - t0
        self.check_thresholds("coverage")

    def predict(self, xs: Optional[Union["pd.DataFrame", np.ndarray]] = None) -> Union[np.ndarray, "pd.Series"]:
        """Returns the prediction vector. If xs is not given, will use existing activation vector.
        Will raise ValueError is xs is None and activation is not yet known.

        Parameters
        ----------
        xs: Optional[Union[pd:DataFrame, np.ndarray]]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame. If not specified the rule's activation vector must have been computed already.

        Returns
        -------
        Union[np.ndarray, pd.Series]
            np.nan where rule is not activated, rule's prediction where it is. If xs vas given and it was a dataframe,
            return a pd.Series. Else, a np.ndarray.
        """
        t0 = time()
        if xs is not None:
            self.calc_activation(xs)
        elif self.activation is None:
            raise ValueError("If the activation vector has not been computed yet, xs can not be None.")
        act = self.activation
        to_ret = np.array([np.nan] * len(act))
        if isinstance(self.prediction, str):
            if self.prediction == "nan":
                raise ValueError("Prediction should not be the 'nan' string, it will conflict with NaNs."
                                 "Rename your class.")
            to_ret = to_ret.astype(str)
        to_ret[act == 1] = self.prediction
        self._time_predict = time() - t0
        if xs is not None and not isinstance(xs, np.ndarray):
            return xs.__class__(index=xs.index, data=to_ret).squeeze()  # So not to requier pandas explicitly
        return to_ret

    def get_correlation(self, other: "Rule") -> float:
        """ Computes the correlation between self and other
        Correlation is the number of points in common between the two vectors divided by their length, times the product
        of the rules' signs.
        Both vectors must have the same length.
        """
        if not len(self) == len(other):
            raise ValueError("Both vectors must have the same length")

        sign = (self.prediction / abs(self.prediction)) * (other.prediction / abs(other.prediction))
        return self._activation.get_correlation(other._activation) * sign


# noinspection PyUnresolvedReferences
class RegressionRule(Rule):

    """Rule applied on continuous target data."""

    def __init__(
        self, condition: Optional[Condition] = None, activation: Optional[Activation] = None,
    ):
        super().__init__(condition, activation)
        self._std = None
        self._criterion = None

        # Inspection / Audit attributs
        self._time_calc_criterion = -1
        self._time_calc_prediction = -1
        self._time_calc_std = -1

    @property
    def std(self) -> float:
        return self._std

    @property
    def criterion(self) -> float:
        return self._criterion

    @property
    def time_calc_prediction(self):
        return self._time_calc_prediction

    @property
    def time_calc_criterion(self):
        return self._time_calc_criterion

    @property
    def time_calc_std(self):
        return self._time_calc_std

    def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """Computes prediction, standard deviation, and regression criterion
        
        Parameters
        ----------
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        kwargs: dict
            Arguments for calc_regression_criterion
        """
        self.calc_prediction(y)
        self.calc_std(y)
        prediction_vector = self.prediction * self.activation
        self.calc_criterion(prediction_vector, y, **kwargs)

    def calc_prediction(self, y: [np.ndarray, "pd.Series"]):
        """Computes the mean of all activated points in target y and use it as prediction
        
        Parameters
        ----------
        y: [np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series
        """
        t0 = time()
        if self.activation is None:
            return None
        self._prediction = functions.conditional_mean(self.activation, y)
        self._time_calc_prediction = time() - t0
        self.check_thresholds("prediction")

    def calc_std(self, y: Union[np.ndarray, "pd.Series"]):
        """Computes the standard deviation of all activated points in target y
        
        Parameters
        ----------
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        """
        t0 = time()
        if self.activation is None:
            return None
        self._std = functions.conditional_std(self.activation, y)
        self._time_calc_std = time() - t0
        self.check_thresholds("std")

    def calc_criterion(self, p: Union[np.ndarray, "pd.Series"], y: Union[np.ndarray, "pd.Series"], **kwargs):
        """
        Parameters
        ----------
        p: Union[np.ndarray, pd.Series]
            Prediction vector. Must be a 1-D np.ndarray or pd.Series.
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        kwargs: dict
            Arguments for calc_regression_criterion
        """
        t0 = time()
        self._criterion = functions.calc_regression_criterion(p, y, **kwargs)
        self._time_calc_criterion = time() - t0
        self.check_thresholds("criterion")


# noinspection PyUnresolvedReferences
class ClassificationRule(Rule):

    """Rule applied on discret target data."""

    def __init__(
        self, condition: Optional[Condition] = None, activation: Optional[Activation] = None,
    ):
        super().__init__(condition, activation)

        self._criterion = None

        self._time_calc_criterion = -1
        self._time_calc_prediction = -1

    @property
    def prediction(self) -> Union[int, str, None]:
        if self._prediction is not None:
            if isinstance(self._prediction, (float, int, str)):
                return self._prediction
            prop = [p[1] for p in self._prediction]
            idx = prop.index(max(prop))
            return self._prediction[idx][0]
        else:
            return None

    @property
    def criterion(self) -> float:
        return self._criterion

    def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """
        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        kwargs: dict
            Arguments for calc_classification_criterion
        """
        self.calc_prediction(y)
        self.calc_criterion(y, **kwargs)

    def calc_prediction(self, y: [np.ndarray, "pd.Series"]):
        """
        Parameters
        ----------
        y: [np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        """
        t0 = time()
        if self.activation is None:
            raise ValueError("The activation vector has not been computed yet.")
        self._prediction = functions.most_common_class(self.activation, y)
        self._time_calc_prediction = time() - t0
        self.check_thresholds("prediction")

    def calc_criterion(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """
        Parameters
        ----------
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series
        kwargs: dict
            Arguments for calc_classification_criterion
        """
        t0 = time()
        self._criterion = functions.calc_classification_criterion(self.activation, self.prediction, y, **kwargs)
        self._time_calc_criterion = time() - t0
        self.check_thresholds("criterion")

Classes

class ClassificationRule (condition: Optional[Condition] = None, activation: Optional[Activation] = None)

Rule applied on discret target data.

Expand source code
class ClassificationRule(Rule):

    """Rule applied on discret target data."""

    def __init__(
        self, condition: Optional[Condition] = None, activation: Optional[Activation] = None,
    ):
        super().__init__(condition, activation)

        self._criterion = None

        self._time_calc_criterion = -1
        self._time_calc_prediction = -1

    @property
    def prediction(self) -> Union[int, str, None]:
        if self._prediction is not None:
            if isinstance(self._prediction, (float, int, str)):
                return self._prediction
            prop = [p[1] for p in self._prediction]
            idx = prop.index(max(prop))
            return self._prediction[idx][0]
        else:
            return None

    @property
    def criterion(self) -> float:
        return self._criterion

    def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """
        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        kwargs: dict
            Arguments for calc_classification_criterion
        """
        self.calc_prediction(y)
        self.calc_criterion(y, **kwargs)

    def calc_prediction(self, y: [np.ndarray, "pd.Series"]):
        """
        Parameters
        ----------
        y: [np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        """
        t0 = time()
        if self.activation is None:
            raise ValueError("The activation vector has not been computed yet.")
        self._prediction = functions.most_common_class(self.activation, y)
        self._time_calc_prediction = time() - t0
        self.check_thresholds("prediction")

    def calc_criterion(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """
        Parameters
        ----------
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series
        kwargs: dict
            Arguments for calc_classification_criterion
        """
        t0 = time()
        self._criterion = functions.calc_classification_criterion(self.activation, self.prediction, y, **kwargs)
        self._time_calc_criterion = time() - t0
        self.check_thresholds("criterion")

Ancestors

Instance variables

var criterion : float
Expand source code
@property
def criterion(self) -> float:
    return self._criterion
var prediction : Union[int, str, ForwardRef(None)]
Expand source code
@property
def prediction(self) -> Union[int, str, None]:
    if self._prediction is not None:
        if isinstance(self._prediction, (float, int, str)):
            return self._prediction
        prop = [p[1] for p in self._prediction]
        idx = prop.index(max(prop))
        return self._prediction[idx][0]
    else:
        return None

Methods

def calc_attributes(self, y: Union[numpy.ndarray, ForwardRef('pd.Series')], **kwargs)

Parameters

xs : Union[pd:DataFrame, np.ndarray]
The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray or pd:DataFrame.
y : Union[np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series.
kwargs : dict
Arguments for calc_classification_criterion
Expand source code
def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
    """
    Parameters
    ----------
    xs: Union[pd:DataFrame, np.ndarray]
        The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
        or pd:DataFrame.
    y: Union[np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series.
    kwargs: dict
        Arguments for calc_classification_criterion
    """
    self.calc_prediction(y)
    self.calc_criterion(y, **kwargs)
def calc_criterion(self, y: Union[numpy.ndarray, ForwardRef('pd.Series')], **kwargs)

Parameters

y : Union[np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series
kwargs : dict
Arguments for calc_classification_criterion
Expand source code
def calc_criterion(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
    """
    Parameters
    ----------
    y: Union[np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series
    kwargs: dict
        Arguments for calc_classification_criterion
    """
    t0 = time()
    self._criterion = functions.calc_classification_criterion(self.activation, self.prediction, y, **kwargs)
    self._time_calc_criterion = time() - t0
    self.check_thresholds("criterion")
def calc_prediction(self, y: [, 'pd.Series'])

Parameters

y : [np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series.
Expand source code
def calc_prediction(self, y: [np.ndarray, "pd.Series"]):
    """
    Parameters
    ----------
    y: [np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series.
    """
    t0 = time()
    if self.activation is None:
        raise ValueError("The activation vector has not been computed yet.")
    self._prediction = functions.most_common_class(self.activation, y)
    self._time_calc_prediction = time() - t0
    self.check_thresholds("prediction")

Inherited members

class RegressionRule (condition: Optional[Condition] = None, activation: Optional[Activation] = None)

Rule applied on continuous target data.

Expand source code
class RegressionRule(Rule):

    """Rule applied on continuous target data."""

    def __init__(
        self, condition: Optional[Condition] = None, activation: Optional[Activation] = None,
    ):
        super().__init__(condition, activation)
        self._std = None
        self._criterion = None

        # Inspection / Audit attributs
        self._time_calc_criterion = -1
        self._time_calc_prediction = -1
        self._time_calc_std = -1

    @property
    def std(self) -> float:
        return self._std

    @property
    def criterion(self) -> float:
        return self._criterion

    @property
    def time_calc_prediction(self):
        return self._time_calc_prediction

    @property
    def time_calc_criterion(self):
        return self._time_calc_criterion

    @property
    def time_calc_std(self):
        return self._time_calc_std

    def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """Computes prediction, standard deviation, and regression criterion
        
        Parameters
        ----------
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        kwargs: dict
            Arguments for calc_regression_criterion
        """
        self.calc_prediction(y)
        self.calc_std(y)
        prediction_vector = self.prediction * self.activation
        self.calc_criterion(prediction_vector, y, **kwargs)

    def calc_prediction(self, y: [np.ndarray, "pd.Series"]):
        """Computes the mean of all activated points in target y and use it as prediction
        
        Parameters
        ----------
        y: [np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series
        """
        t0 = time()
        if self.activation is None:
            return None
        self._prediction = functions.conditional_mean(self.activation, y)
        self._time_calc_prediction = time() - t0
        self.check_thresholds("prediction")

    def calc_std(self, y: Union[np.ndarray, "pd.Series"]):
        """Computes the standard deviation of all activated points in target y
        
        Parameters
        ----------
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        """
        t0 = time()
        if self.activation is None:
            return None
        self._std = functions.conditional_std(self.activation, y)
        self._time_calc_std = time() - t0
        self.check_thresholds("std")

    def calc_criterion(self, p: Union[np.ndarray, "pd.Series"], y: Union[np.ndarray, "pd.Series"], **kwargs):
        """
        Parameters
        ----------
        p: Union[np.ndarray, pd.Series]
            Prediction vector. Must be a 1-D np.ndarray or pd.Series.
        y: Union[np.ndarray, pd.Series]
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
            or pd.Series.
        kwargs: dict
            Arguments for calc_regression_criterion
        """
        t0 = time()
        self._criterion = functions.calc_regression_criterion(p, y, **kwargs)
        self._time_calc_criterion = time() - t0
        self.check_thresholds("criterion")

Ancestors

Instance variables

var criterion : float
Expand source code
@property
def criterion(self) -> float:
    return self._criterion
var std : float
Expand source code
@property
def std(self) -> float:
    return self._std
var time_calc_criterion
Expand source code
@property
def time_calc_criterion(self):
    return self._time_calc_criterion
var time_calc_prediction
Expand source code
@property
def time_calc_prediction(self):
    return self._time_calc_prediction
var time_calc_std
Expand source code
@property
def time_calc_std(self):
    return self._time_calc_std

Methods

def calc_attributes(self, y: Union[numpy.ndarray, ForwardRef('pd.Series')], **kwargs)

Computes prediction, standard deviation, and regression criterion

Parameters

y : Union[np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series.
kwargs : dict
Arguments for calc_regression_criterion
Expand source code
def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
    """Computes prediction, standard deviation, and regression criterion
    
    Parameters
    ----------
    y: Union[np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series.
    kwargs: dict
        Arguments for calc_regression_criterion
    """
    self.calc_prediction(y)
    self.calc_std(y)
    prediction_vector = self.prediction * self.activation
    self.calc_criterion(prediction_vector, y, **kwargs)
def calc_criterion(self, p: Union[numpy.ndarray, ForwardRef('pd.Series')], y: Union[numpy.ndarray, ForwardRef('pd.Series')], **kwargs)

Parameters

p : Union[np.ndarray, pd.Series]
Prediction vector. Must be a 1-D np.ndarray or pd.Series.
y : Union[np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series.
kwargs : dict
Arguments for calc_regression_criterion
Expand source code
def calc_criterion(self, p: Union[np.ndarray, "pd.Series"], y: Union[np.ndarray, "pd.Series"], **kwargs):
    """
    Parameters
    ----------
    p: Union[np.ndarray, pd.Series]
        Prediction vector. Must be a 1-D np.ndarray or pd.Series.
    y: Union[np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series.
    kwargs: dict
        Arguments for calc_regression_criterion
    """
    t0 = time()
    self._criterion = functions.calc_regression_criterion(p, y, **kwargs)
    self._time_calc_criterion = time() - t0
    self.check_thresholds("criterion")
def calc_prediction(self, y: [, 'pd.Series'])

Computes the mean of all activated points in target y and use it as prediction

Parameters

y : [np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series
Expand source code
def calc_prediction(self, y: [np.ndarray, "pd.Series"]):
    """Computes the mean of all activated points in target y and use it as prediction
    
    Parameters
    ----------
    y: [np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series
    """
    t0 = time()
    if self.activation is None:
        return None
    self._prediction = functions.conditional_mean(self.activation, y)
    self._time_calc_prediction = time() - t0
    self.check_thresholds("prediction")
def calc_std(self, y: Union[numpy.ndarray, ForwardRef('pd.Series')])

Computes the standard deviation of all activated points in target y

Parameters

y : Union[np.ndarray, pd.Series]
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray or pd.Series.
Expand source code
def calc_std(self, y: Union[np.ndarray, "pd.Series"]):
    """Computes the standard deviation of all activated points in target y
    
    Parameters
    ----------
    y: Union[np.ndarray, pd.Series]
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray
        or pd.Series.
    """
    t0 = time()
    if self.activation is None:
        return None
    self._std = functions.conditional_std(self.activation, y)
    self._time_calc_std = time() - t0
    self.check_thresholds("std")

Inherited members

class Rule (condition: Optional[Condition] = None, activation: Optional[Activation] = None)

An abstract Rule object.

A Rule is a condition (represented by any daughter class of ruleskit.Condition), applied on real features and target data. The Rule contains, in addition to the Condition object, many attributes dependent on the features data, such as the activation vector (a 1-D np.ndarray with 0 when the rule is activated - condition is met - and 0 when it is not) but also the rule's prediction (computed in the daughter class).

Daughter classes can remember more attributes (precision, user-definded criterion…).

Rule also include metrics that can be used for profiling the code : it will remember the time taken to fit the rule (fitting is the computation of the rule's attribute from the condition and the features data), the time taken to compute the activation vector and the time taken to make a prediction.

To compute those metrics, one must use the rule's "fit" methods. Once this is done, one cas use the "predict" methods on a different set of features data.

The Rule object can access any attribute of its condition as if it was its own : rule.features_indexes will return the features_indexes attribute's value of the condition in the Rule object. See Condition class for more details.

The Rule object can also access any attribute of its activation vector as if it was its own. See Activation class for more details.

Expand source code
class Rule(ABC):

    """An abstract Rule object.

    A Rule is a condition (represented by any daughter class of ruleskit.Condition), applied on real features and target
    data.
    The Rule contains, in addition to the Condition object, many attributes dependent on the features data, such as
    the activation vector (a 1-D np.ndarray with 0 when the rule is activated - condition is met - and 0 when it is not)
    but also the rule's prediction (computed in the daughter class).

    Daughter classes can remember more attributes (precision, user-definded criterion...).

    Rule also include metrics that can be used for profiling the code : it will remember the time taken to fit the rule
    (fitting is the computation of the rule's attribute from the condition and the features data), the time taken
    to compute the activation vector and the time taken to make a prediction.

    To compute those metrics, one must use the rule's "fit" methods. Once this is done, one cas use the "predict"
     methods on a different set of features data.

    The Rule object can access any attribute of its condition as if it was its own : rule.features_indexes will return
    the features_indexes attribute's value of the condition in the Rule object. See Condition class for more details.

    The Rule object can also access any attribute of its activation vector as if it was its own. See Activation class
    for more details.
    """

    LOCAL_ACTIVATION = True
    THRESHOLDS = None
    """Thresholds that the Rule must meet to be good. See `ruleskit.thresholds.Thresholds` for more details."""

    @classmethod
    def SET_THRESHOLDS(cls, path: Union[str, Path, "TransparentPath"], show=False):
        """Set thresholds globally for all futur Rules"""
        cls.THRESHOLDS = Thresholds(path, show)

    def __init__(
        self, condition: Optional[Condition] = None, activation: Optional[Activation] = None,
    ):

        if condition is not None and not isinstance(condition, Condition):
            raise TypeError("Argument 'condition' must derive from Condition or be None.")
        if activation is not None and not isinstance(activation, Activation):
            raise TypeError("Argument 'activation' must derive from Activation or be None.")
        if activation is not None and condition is None:
            raise ValueError("Condition can not be None if activation is not None")

        self._condition = condition
        self._activation = activation
        self._thresholds = Rule.THRESHOLDS
        if self._activation is not None:
            self.check_thresholds("coverage")

        self._coverage = None
        self._prediction = None

        self._time_fit = -1
        self._time_calc_activation = -1
        self._time_predict = -1

        self._good = True
        self._bad_because = None

    def set_thresholds(self, path: Union[str, Path, "TransparentPath"], show=False):
        """Set thresholds for this rule only"""
        self._thresholds = Thresholds(path, show)

    def check_thresholds(self, attribute: Optional[str] = None) -> None:
        """If `ruleskit.rule.Rule.THRESHOLDS` is specified, will check that this rule is good regarding those
        thresholds, and set the flags *good* and *bad_because* accordingly

        Parameters
        ----------
        attribute: Optional[str]
            If specified, will only check the threshold of this rule attribute. If not, will test every rule attributes
            for which a threshold is defined.
        """

        if Rule.THRESHOLDS is None:
            return

        if attribute is not None:
            if not Rule.THRESHOLDS(attribute, self):
                self._bad_because = attribute
                self._good = False
            return

        for attribute in dir(self):
            if attribute.startswith("__"):
                continue
            if not Rule.THRESHOLDS(attribute, self):
                self._bad_because = attribute
                self._good = False
                return
        logger.debug(f"Rule {self} is good")

    @property
    def coverage(self) -> float:
        if self._activation is not None:
            self._coverage = self._activation.coverage
            return self._activation.coverage
        return self._coverage

    @coverage.setter
    def coverage(self, value):
        if self._activation is not None:
            self._activation.coverage = value
        self._coverage = value

    def __and__(self, other: "Rule") -> "Rule":
        """Logical AND (&) of two rules. It is simply the logical AND of the two rule's conditions and activations. """
        condition = self._condition & other._condition
        activation = self._activation & other._activation
        return self.__class__(condition, activation)

    def __add__(self, other: "Rule") -> "Rule":
        return NotImplemented("Can not add rules (seen as 'logical OR'). You can use logical AND however.")

    # def __del__(self):
    #     self.del_activation()

    def del_activation(self):
        """Deletes the activation vector's data, but not the object itself, so any computed attributes will remain
        available"""
        if hasattr(self, "_activation") and self._activation is not None:
            self._activation.delete()

    @property
    def activation_available(self) -> bool:
        """Returns True if the rule has an activation vector, and if this Activation's object data is available."""
        if self._activation is None:
            return False
        if self._activation.data_format == "file":
            return self._activation.data.is_file()
        else:
            return self._activation.data is not None

    @property
    def condition(self) -> Condition:
        return self._condition

    @property
    def activation(self) -> Union[None, np.ndarray]:
        """Returns the Activation vector's data in a form of a 1-D np.ndarray, or None if not available.

        Returns
        -------
        np.ndarray
            of the form [0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...]
        """
        if self._activation:
            return self._activation.raw
        return None

    @property
    def prediction(self) -> Union[str, float]:
        return self._prediction

    @property
    def thresholds(self) -> Thresholds:
        return self._thresholds

    @property
    def good(self) -> bool:
        return self._good

    @property
    def bad_because(self) -> str:
        return self._bad_because

    @property
    def time_fit(self) -> float:
        """Profiling attribute. Time in seconds taken to fit the rule"""
        return self._time_fit

    @property
    def time_predict(self) -> float:
        """Profiling attribute. Time in seconds taken by the rule to make a prediction"""
        return self._time_predict

    @property
    def time_calc_activation(self) -> float:
        """Profiling attribute. Time in seconds taken to comptue the activation vector"""
        return self._time_calc_activation

    def __getattr__(self, item):
        """If item is not found in self, try to fetch it from its activation or condition."""
        if item == "_activation" or item == "_condition":
            raise AttributeError(f"'Rule' object has no attribute '{item}'.")

        if hasattr(self._activation, item):
            return getattr(self._activation, item)
        if hasattr(self._condition, item):
            return getattr(self._condition, item)
        raise AttributeError(f"'Rule' object has no attribute '{item}'.")

    def __setattr__(self, item, value):
        """If item is private (starts with _), then default behavior. Else, if the item is not yet known by the rule
        but is known by its condition or activation, will set it to the condition or the activation. Else,
        raises AttributeError."""
        if item.startswith("_"):
            super(Rule, self).__setattr__(item, value)
            return
        if not hasattr(self, item):
            if hasattr(self._activation, item):
                setattr(self._activation, item, value)
            elif hasattr(self._condition, item):
                setattr(self._condition, item, value)
            else:
                raise AttributeError(f"Can not set attribute '{item}' in object Rule.")
        else:
            super(Rule, self).__setattr__(item, value)

    def __eq__(self, other) -> bool:
        """Two rules are equal if their conditions are equal."""
        if not isinstance(other, Rule):
            return False
        else:
            return self._condition == other._condition

    def __contains__(self, other: "Rule") -> bool:
        """A Rule contains another Rule if the second rule's activated points are also all activated by the first rule.
        """
        if not self._activation or not other._activation:
            return False
        return other._activation in self._activation

    def __str__(self) -> str:
        prediction = "<prediction unset>"
        if self._prediction is not None:
            prediction = self._prediction
        if self._condition is None:
            return "empty rule"
        return f"If {self._condition.__str__()} Then {prediction}."

    @property
    def to_hash(self) -> Tuple[str]:
        return ("r",) + self._condition.to_hash[1:]

    def __hash__(self) -> hash:
        return hash(frozenset(self.to_hash))

    def __len__(self):
        """A Rule's length is the number of features it talks about"""
        return len(self._condition)

    def evaluate(self, xs: Union["pd.DataFrame", np.ndarray]) -> Activation:
        """Computes and returns the activation vector from an array of features.

        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.

        Returns
        -------
        Activation
        """
        arr = self._condition.evaluate(xs)
        # noinspection PyTypeChecker
        a = Activation(arr, to_file=Rule.LOCAL_ACTIVATION)
        return a

    # noinspection PyUnusedLocal
    def fit(self, xs: Union["pd.DataFrame", np.ndarray] = None, y: np.ndarray = None, **kwargs):
        """Computes activation, and other criteria dependant on the nature of the daughter class of the Rule,
        for a given xs and y.

        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.
        y: np.ndarray
            The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray.
        kwargs: dict
            Other arguments used by daughter class
        """
        t0 = time()
        if xs is not None:
            self.calc_activation(xs)

        if self._activation is None:
            raise ValueError("If fitting without specifying xs, activation must have been computed already.")
        self.calc_attributes(y, **kwargs)
        if self.prediction is None:
            raise ValueError("'fit' did not set 'prediction' : did you overload 'calc_attributes' correctly ?")

        self._time_fit = time() - t0

    def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
        """Implement in daughter class. Must set self._prediction."""
        raise NotImplementedError("To implement in daughter class")

    def calc_activation(self, xs: Union["pd.DataFrame", np.ndarray]):
        """Uses self.evaluate to set self._activation.

        Parameters
        ----------
        xs: Union[pd:DataFrame, np.ndarray]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame.
        """
        t0 = time()
        self._activation = self.evaluate(xs)
        self._time_calc_activation = time() - t0
        self.check_thresholds("coverage")

    def predict(self, xs: Optional[Union["pd.DataFrame", np.ndarray]] = None) -> Union[np.ndarray, "pd.Series"]:
        """Returns the prediction vector. If xs is not given, will use existing activation vector.
        Will raise ValueError is xs is None and activation is not yet known.

        Parameters
        ----------
        xs: Optional[Union[pd:DataFrame, np.ndarray]]
            The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
            or pd:DataFrame. If not specified the rule's activation vector must have been computed already.

        Returns
        -------
        Union[np.ndarray, pd.Series]
            np.nan where rule is not activated, rule's prediction where it is. If xs vas given and it was a dataframe,
            return a pd.Series. Else, a np.ndarray.
        """
        t0 = time()
        if xs is not None:
            self.calc_activation(xs)
        elif self.activation is None:
            raise ValueError("If the activation vector has not been computed yet, xs can not be None.")
        act = self.activation
        to_ret = np.array([np.nan] * len(act))
        if isinstance(self.prediction, str):
            if self.prediction == "nan":
                raise ValueError("Prediction should not be the 'nan' string, it will conflict with NaNs."
                                 "Rename your class.")
            to_ret = to_ret.astype(str)
        to_ret[act == 1] = self.prediction
        self._time_predict = time() - t0
        if xs is not None and not isinstance(xs, np.ndarray):
            return xs.__class__(index=xs.index, data=to_ret).squeeze()  # So not to requier pandas explicitly
        return to_ret

    def get_correlation(self, other: "Rule") -> float:
        """ Computes the correlation between self and other
        Correlation is the number of points in common between the two vectors divided by their length, times the product
        of the rules' signs.
        Both vectors must have the same length.
        """
        if not len(self) == len(other):
            raise ValueError("Both vectors must have the same length")

        sign = (self.prediction / abs(self.prediction)) * (other.prediction / abs(other.prediction))
        return self._activation.get_correlation(other._activation) * sign

Ancestors

  • abc.ABC

Subclasses

Class variables

var LOCAL_ACTIVATION
var THRESHOLDS

Thresholds that the Rule must meet to be good. See Thresholds for more details.

Static methods

def SET_THRESHOLDS(path: Union[str, pathlib.Path, ForwardRef('TransparentPath')], show=False)

Set thresholds globally for all futur Rules

Expand source code
@classmethod
def SET_THRESHOLDS(cls, path: Union[str, Path, "TransparentPath"], show=False):
    """Set thresholds globally for all futur Rules"""
    cls.THRESHOLDS = Thresholds(path, show)

Instance variables

var activation : Optional[None]

Returns the Activation vector's data in a form of a 1-D np.ndarray, or None if not available.

Returns

np.ndarray
of the form [0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, …]
Expand source code
@property
def activation(self) -> Union[None, np.ndarray]:
    """Returns the Activation vector's data in a form of a 1-D np.ndarray, or None if not available.

    Returns
    -------
    np.ndarray
        of the form [0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...]
    """
    if self._activation:
        return self._activation.raw
    return None
var activation_available : bool

Returns True if the rule has an activation vector, and if this Activation's object data is available.

Expand source code
@property
def activation_available(self) -> bool:
    """Returns True if the rule has an activation vector, and if this Activation's object data is available."""
    if self._activation is None:
        return False
    if self._activation.data_format == "file":
        return self._activation.data.is_file()
    else:
        return self._activation.data is not None
var bad_because : str
Expand source code
@property
def bad_because(self) -> str:
    return self._bad_because
var conditionCondition
Expand source code
@property
def condition(self) -> Condition:
    return self._condition
var coverage : float
Expand source code
@property
def coverage(self) -> float:
    if self._activation is not None:
        self._coverage = self._activation.coverage
        return self._activation.coverage
    return self._coverage
var good : bool
Expand source code
@property
def good(self) -> bool:
    return self._good
var prediction : Union[str, float]
Expand source code
@property
def prediction(self) -> Union[str, float]:
    return self._prediction
var thresholdsThresholds
Expand source code
@property
def thresholds(self) -> Thresholds:
    return self._thresholds
var time_calc_activation : float

Profiling attribute. Time in seconds taken to comptue the activation vector

Expand source code
@property
def time_calc_activation(self) -> float:
    """Profiling attribute. Time in seconds taken to comptue the activation vector"""
    return self._time_calc_activation
var time_fit : float

Profiling attribute. Time in seconds taken to fit the rule

Expand source code
@property
def time_fit(self) -> float:
    """Profiling attribute. Time in seconds taken to fit the rule"""
    return self._time_fit
var time_predict : float

Profiling attribute. Time in seconds taken by the rule to make a prediction

Expand source code
@property
def time_predict(self) -> float:
    """Profiling attribute. Time in seconds taken by the rule to make a prediction"""
    return self._time_predict
var to_hash : Tuple[str]
Expand source code
@property
def to_hash(self) -> Tuple[str]:
    return ("r",) + self._condition.to_hash[1:]

Methods

def calc_activation(self, xs: Union[ForwardRef('pd.DataFrame'), numpy.ndarray])

Uses self.evaluate to set self._activation.

Parameters

xs : Union[pd:DataFrame, np.ndarray]
The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray or pd:DataFrame.
Expand source code
def calc_activation(self, xs: Union["pd.DataFrame", np.ndarray]):
    """Uses self.evaluate to set self._activation.

    Parameters
    ----------
    xs: Union[pd:DataFrame, np.ndarray]
        The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
        or pd:DataFrame.
    """
    t0 = time()
    self._activation = self.evaluate(xs)
    self._time_calc_activation = time() - t0
    self.check_thresholds("coverage")
def calc_attributes(self, y: Union[numpy.ndarray, ForwardRef('pd.Series')], **kwargs)

Implement in daughter class. Must set self._prediction.

Expand source code
def calc_attributes(self, y: Union[np.ndarray, "pd.Series"], **kwargs):
    """Implement in daughter class. Must set self._prediction."""
    raise NotImplementedError("To implement in daughter class")
def check_thresholds(self, attribute: Optional[str] = None) ‑> None

If Rule.THRESHOLDS is specified, will check that this rule is good regarding those thresholds, and set the flags good and bad_because accordingly

Parameters

attribute : Optional[str]
If specified, will only check the threshold of this rule attribute. If not, will test every rule attributes for which a threshold is defined.
Expand source code
def check_thresholds(self, attribute: Optional[str] = None) -> None:
    """If `ruleskit.rule.Rule.THRESHOLDS` is specified, will check that this rule is good regarding those
    thresholds, and set the flags *good* and *bad_because* accordingly

    Parameters
    ----------
    attribute: Optional[str]
        If specified, will only check the threshold of this rule attribute. If not, will test every rule attributes
        for which a threshold is defined.
    """

    if Rule.THRESHOLDS is None:
        return

    if attribute is not None:
        if not Rule.THRESHOLDS(attribute, self):
            self._bad_because = attribute
            self._good = False
        return

    for attribute in dir(self):
        if attribute.startswith("__"):
            continue
        if not Rule.THRESHOLDS(attribute, self):
            self._bad_because = attribute
            self._good = False
            return
    logger.debug(f"Rule {self} is good")
def del_activation(self)

Deletes the activation vector's data, but not the object itself, so any computed attributes will remain available

Expand source code
def del_activation(self):
    """Deletes the activation vector's data, but not the object itself, so any computed attributes will remain
    available"""
    if hasattr(self, "_activation") and self._activation is not None:
        self._activation.delete()
def evaluate(self, xs: Union[ForwardRef('pd.DataFrame'), numpy.ndarray]) ‑> Activation

Computes and returns the activation vector from an array of features.

Parameters

xs : Union[pd:DataFrame, np.ndarray]
The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray or pd:DataFrame.

Returns

Activation
 
Expand source code
def evaluate(self, xs: Union["pd.DataFrame", np.ndarray]) -> Activation:
    """Computes and returns the activation vector from an array of features.

    Parameters
    ----------
    xs: Union[pd:DataFrame, np.ndarray]
        The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
        or pd:DataFrame.

    Returns
    -------
    Activation
    """
    arr = self._condition.evaluate(xs)
    # noinspection PyTypeChecker
    a = Activation(arr, to_file=Rule.LOCAL_ACTIVATION)
    return a
def fit(self, xs: Union[ForwardRef('pd.DataFrame'), numpy.ndarray] = None, y: numpy.ndarray = None, **kwargs)

Computes activation, and other criteria dependant on the nature of the daughter class of the Rule, for a given xs and y.

Parameters

xs : Union[pd:DataFrame, np.ndarray]
The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray or pd:DataFrame.
y : np.ndarray
The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray.
kwargs : dict
Other arguments used by daughter class
Expand source code
def fit(self, xs: Union["pd.DataFrame", np.ndarray] = None, y: np.ndarray = None, **kwargs):
    """Computes activation, and other criteria dependant on the nature of the daughter class of the Rule,
    for a given xs and y.

    Parameters
    ----------
    xs: Union[pd:DataFrame, np.ndarray]
        The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
        or pd:DataFrame.
    y: np.ndarray
        The targets on which to evaluate the rule prediction, and possibly other criteria. Must be a 1-D np.ndarray.
    kwargs: dict
        Other arguments used by daughter class
    """
    t0 = time()
    if xs is not None:
        self.calc_activation(xs)

    if self._activation is None:
        raise ValueError("If fitting without specifying xs, activation must have been computed already.")
    self.calc_attributes(y, **kwargs)
    if self.prediction is None:
        raise ValueError("'fit' did not set 'prediction' : did you overload 'calc_attributes' correctly ?")

    self._time_fit = time() - t0
def get_correlation(self, other: Rule) ‑> float

Computes the correlation between self and other Correlation is the number of points in common between the two vectors divided by their length, times the product of the rules' signs. Both vectors must have the same length.

Expand source code
def get_correlation(self, other: "Rule") -> float:
    """ Computes the correlation between self and other
    Correlation is the number of points in common between the two vectors divided by their length, times the product
    of the rules' signs.
    Both vectors must have the same length.
    """
    if not len(self) == len(other):
        raise ValueError("Both vectors must have the same length")

    sign = (self.prediction / abs(self.prediction)) * (other.prediction / abs(other.prediction))
    return self._activation.get_correlation(other._activation) * sign
def predict(self, xs: Union[ForwardRef('pd.DataFrame'), numpy.ndarray, ForwardRef(None)] = None) ‑> Union[numpy.ndarray, pd.Series]

Returns the prediction vector. If xs is not given, will use existing activation vector. Will raise ValueError is xs is None and activation is not yet known.

Parameters

xs : Optional[Union[pd:DataFrame, np.ndarray]]
The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray or pd:DataFrame. If not specified the rule's activation vector must have been computed already.

Returns

Union[np.ndarray, pd.Series]
np.nan where rule is not activated, rule's prediction where it is. If xs vas given and it was a dataframe, return a pd.Series. Else, a np.ndarray.
Expand source code
def predict(self, xs: Optional[Union["pd.DataFrame", np.ndarray]] = None) -> Union[np.ndarray, "pd.Series"]:
    """Returns the prediction vector. If xs is not given, will use existing activation vector.
    Will raise ValueError is xs is None and activation is not yet known.

    Parameters
    ----------
    xs: Optional[Union[pd:DataFrame, np.ndarray]]
        The features on which the check whether the rule is activated or not. Must be a 2-D np.ndarray
        or pd:DataFrame. If not specified the rule's activation vector must have been computed already.

    Returns
    -------
    Union[np.ndarray, pd.Series]
        np.nan where rule is not activated, rule's prediction where it is. If xs vas given and it was a dataframe,
        return a pd.Series. Else, a np.ndarray.
    """
    t0 = time()
    if xs is not None:
        self.calc_activation(xs)
    elif self.activation is None:
        raise ValueError("If the activation vector has not been computed yet, xs can not be None.")
    act = self.activation
    to_ret = np.array([np.nan] * len(act))
    if isinstance(self.prediction, str):
        if self.prediction == "nan":
            raise ValueError("Prediction should not be the 'nan' string, it will conflict with NaNs."
                             "Rename your class.")
        to_ret = to_ret.astype(str)
    to_ret[act == 1] = self.prediction
    self._time_predict = time() - t0
    if xs is not None and not isinstance(xs, np.ndarray):
        return xs.__class__(index=xs.index, data=to_ret).squeeze()  # So not to requier pandas explicitly
    return to_ret
def set_thresholds(self, path: Union[str, pathlib.Path, ForwardRef('TransparentPath')], show=False)

Set thresholds for this rule only

Expand source code
def set_thresholds(self, path: Union[str, Path, "TransparentPath"], show=False):
    """Set thresholds for this rule only"""
    self._thresholds = Thresholds(path, show)