Module ifra.fitters

Expand source code
import os
from typing import Optional, List

import joblib
import numpy as np
from ruleskit import RuleSet
from sklearn import tree
from ruleskit.utils.rule_utils import extract_rules_from_tree
from ruleskit import Rule
import logging

from transparentpath import TransparentPath

from .configs import NodeLearningConfig, NodeDataConfig

logger = logging.getLogger(__name__)


class Fitter:
    """Abstract class of fitter object."""

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
        **kwargs
    ):
        """

        Parameters
        ----------
        learning_configs: NodeLearningConfig
            `ifra.node.Node`'s *learning_config*
        data: NodeDataConfig
            `ifra.node.Node`'s *data*
        kwargs:
            Any additionnal keyword argument that the overleading class accepts. Those arguments will become attributes.
        """
        self.learning_configs = learning_configs
        self.model = None
        for arg in kwargs:
            setattr(self, arg, kwargs[arg])

    def fit(self, x: np.ndarray, y: np.ndarray) -> RuleSet:
        """To be implemented in daughter class"""
        pass


class DecisionTreeFitter(Fitter):

    """Overloads the Fitter class. Fits a DecisionTreeFitter on some data.
     Overleaded again to implement regression or classification by `ifra.fitters.DecisionTreeClassificationFitter`
     and `ifra.fitters.DecisionTreeRegressionFitter`.
    """

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
    ):
        super().__init__(learning_configs)
        self.tree = None

    def fit(self, x: np.ndarray, y: np.ndarray) -> RuleSet:
        """Fits the decision tree on the data pointed by `ifra.fitters.DecisionTreeFitter` *data.x_path* and
        `ifra.fitters.DecisionTreeFitter` *data.y_path*, sets
        `ifra.fitters.DecisionTreeFitter`*tree* saves it as a .dot, .svg and .joblib file in the same place
        the node will save its model. Those files will be unique for each time the fit function is called.
        Also sets `ifra.fitters.DecisionTreeFitter` *model* and returns it.

        Parameters
        ----------
        x: np.ndarray
            The features on which to learn
        y: np.ndarray
            The target to predict

        Returns
        -------
        RuleSet
            `ifra.fitters.DecisionTreeFitter` *model*
        """
        # This needs to be done here in case we are using multiprocessing : spawned processes re-import everything
        # from scrach, so Rule.THRESHOLDS are reset
        Rule.SET_THRESHOLDS(self.learning_configs.thresholds_path)
        self.make_fit(
            x=x,
            y=y,
            max_depth=self.learning_configs.max_depth,
            get_leaf=self.learning_configs.get_leaf,
            x_mins=self.learning_configs.x_mins,
            x_maxs=self.learning_configs.x_maxs,
            features_names=self.learning_configs.features_names,
            classes_names=self.learning_configs.classes_names,
        )
        return self.model

    def save(self, path: TransparentPath):
        """Calls `ifra.fitters.DecisionTreeFitter.tree_to_graph` and `ifra.fitters.DecisionTreeFitter.tree_to_joblib`"""
        self.tree_to_graph(path)
        self.tree_to_joblib(path)

    # noinspection PyArgumentList
    def make_fit(
        self,
        x: np.array,
        y: np.array,
        max_depth: int,
        get_leaf: bool,
        x_mins: Optional[List[float]],
        x_maxs: Optional[List[float]],
        features_names: Optional[List[str]],
        classes_names: Optional[List[str]],
    ):
        """Implement in daughter class"""
        pass

    def tree_to_graph(
        self,
        path: TransparentPath
    ):
        """Saves `ifra.fitters.DecisionTreeFitter` *tree* to a .dot file and a .svg file. Does not do anything if
        `ifra.fitters.DecisionTreeFitter` *tree* is None.

        Parameters
        ----------
        path: TransparentPath
            File path where the files should be written. No matter the extension, one file with .dot and another with
            .svg will be created.
        """
        thetree = self.tree
        features_names = self.learning_configs.features_names
        path = path.with_suffix(".dot")

        with open(path, "w") as dotfile:
            tree.export_graphviz(
                decision_tree=thetree,
                out_file=dotfile,
                feature_names=features_names,
                filled=True,
                rounded=True,
                special_characters=True,
            )

        # joblib.dump(self.tree, self.__trees_path / (Y_name + ".joblib"))
        os.system(f'dot -Tsvg "{path}" -o "{path.with_suffix(".svg")}"')

    def tree_to_joblib(
        self,
        path: TransparentPath
    ):
        """Saves `ifra.fitters.DecisionTreeFitter` *tree* to a .joblib file. Does not do anything if
        `ifra.fitters.DecisionTreeFitter` *tree* is None.

        Parameters
        ----------
        path: TransparentPath
            File path where the files should be written. No matter the extension, a file with .joblib will be created.
        """

        thetree = self.tree
        path = path.with_suffix(".joblib")
        joblib.dump(value=thetree, filename=path)


class DecisionTreeClassificationFitter(DecisionTreeFitter):
    """
    Can be used by giving *decisiontreeclassification* as *fitter* configuration when creating a `ifra.node.Node`


    Attributes
    ----------
    learning_configs: `ifra.configs.NodeLearningConfig`
        The learning configuration of the node using this fitter
    model: Union[None, RuleSet]
        Fitted model, or None if fit not done yet
    tree: Union[None, DecisionTreeClassifier]
        Fitted tree, or None if fit not done yet
    """

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
    ):
        super().__init__(learning_configs)

    # noinspection PyArgumentList
    def make_fit(
        self,
        x: np.array,
        y: np.array,
        max_depth: int,
        get_leaf: bool,
        x_mins: Optional[List[float]],
        x_maxs: Optional[List[float]],
        features_names: Optional[List[str]],
        classes_names: Optional[List[str]],
    ):
        """Fits x and y using a decision tree cassifier, setting
         `ifra.fitters.DecisionTreeFitter` *tree* and
         `ifra.fitters.DecisionTreeFitter` *model*

        x array must contain one column for each feature that can exist across all nodes. Some columns can contain
        only NaNs.

        Parameters
        ----------
        x: np.ndarray
            Must be of shape (# observations, # features)
        y: np.ndarray
            Must be of shape (# observations,)
        max_depth: int
            Maximum tree depth
        get_leaf: bool
            If True, only considers tree's leaves to make rules, else also considers its nodes
        x_mins: Optional[List[float]]
            Lower limits of features. If not specified, will use x.min(axis=0)
        x_maxs: Optional[List[float]]
            Upper limits of features. If not specified, will use x.max(axis=0)
        features_names: Optional[List[str]]
            Names of features
        classes_names: Optional[List[str]]
            Names of the classes
        """

        if x_mins is None:
            x_mins = x.min(axis=0)
        elif not isinstance(x_mins, np.ndarray):
            x_mins = np.array(x_mins)
        if x_maxs is None:
            x_maxs = x.max(axis=0)
        elif not isinstance(x_maxs, np.ndarray):
            x_maxs = np.array(x_maxs)

        self.tree = tree.DecisionTreeClassifier(max_depth=max_depth).fit(X=x, y=y)
        self.model = extract_rules_from_tree(
            self.tree,
            xmins=x_mins,
            xmaxs=x_maxs,
            features_names=features_names,
            classes_names=classes_names,
            get_leaf=get_leaf,
        )

        if len(self.model) > 0:
            # Compute each rule's activation vector and the model's, and will stack the rules' activation vectors if
            # stack_activation is True
            self.model.fit(y=y, xs=x)
            # self.model.check_duplicated_rules(self.model.rules, name_or_index="name")


class DecisionTreeRegressionFitter(DecisionTreeFitter):
    """
    Can be used by giving *decisiontreeregression* as *fitter* configuration when creating a `ifra.node.Node`


    Attributes
    ----------
    learning_configs: `ifra.configs.NodeLearningConfig`
        The learning configuration of the node using this fitter
    model: Union[None, RuleSet]
        Fitted model, or None if fit not done yet
    tree: Union[None, DecisionTreeRegressor]
        Fitted tree, or None if fit not done yet
    """

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
    ):
        super().__init__(learning_configs)

    # noinspection PyArgumentList
    def make_fit(
        self,
        x: np.array,
        y: np.array,
        max_depth: int,
        get_leaf: bool,
        x_mins: Optional[List[float]],
        x_maxs: Optional[List[float]],
        features_names: Optional[List[str]],
        classes_names: Optional[List[str]],
    ):
        """Fits x and y using a decision tree regressor, setting
         `ifra.fitters.DecisionTreeRegressionFitter` *tree* and
         `ifra.fitters.DecisionTreeRegressionFitter` *model*

        x array must contain one column for each feature that can exist across all nodes. Some columns can contain
        only NaNs.

        Parameters
        ----------
        x: np.ndarray
            Must be of shape (# observations, # features)
        y: np.ndarray
            Must be of shape (# observations,)
        max_depth: int
            Maximum tree depth
        get_leaf: bool
            If True, only considers tree's leaves to make rules, else also considers its nodes
        x_mins: Optional[List[float]]
            Lower limits of features. If not specified, will use x.min(axis=0)
        x_maxs: Optional[List[float]]
            Upper limits of features. If not specified, will use x.max(axis=0)
        features_names: Optional[List[str]]
            Names of features
        classes_names: Optional[List[str]]
            Names of the classes. Unused here, but present to be compatible with parent class. Can be anything,
            will be ignored in 'extract_rules_from_tree' anyway.
        """

        if x_mins is None:
            x_mins = x.min(axis=0)
        elif not isinstance(x_mins, np.ndarray):
            x_mins = np.array(x_mins)
        if x_maxs is None:
            x_maxs = x.max(axis=0)
        elif not isinstance(x_maxs, np.ndarray):
            x_maxs = np.array(x_maxs)

        self.tree = tree.DecisionTreeRegressor(max_depth=max_depth).fit(X=x, y=y)
        self.model = extract_rules_from_tree(
            self.tree,
            xmins=x_mins,
            xmaxs=x_maxs,
            features_names=features_names,
            classes_names=classes_names,
            get_leaf=get_leaf,
            stack_activation=True,
        )

        if len(self.model) > 0:
            # Compute each rule's activation vector, and the model's and will
            # stack the rules' activation vectors if stack_activation is True
            self.model.fit(y=y, xs=x)
            # self.model.check_duplicated_rules(self.model.rules, name_or_index="name")

Classes

class DecisionTreeClassificationFitter (learning_configs: NodeLearningConfig)

Can be used by giving decisiontreeclassification as fitter configuration when creating a Node

Attributes

learning_configs : NodeLearningConfig
The learning configuration of the node using this fitter
model : Union[None, RuleSet]
Fitted model, or None if fit not done yet
tree : Union[None, DecisionTreeClassifier]
Fitted tree, or None if fit not done yet

Parameters

learning_configs : NodeLearningConfig
Node's learning_config
data : NodeDataConfig
Node's data

kwargs: Any additionnal keyword argument that the overleading class accepts. Those arguments will become attributes.

Expand source code
class DecisionTreeClassificationFitter(DecisionTreeFitter):
    """
    Can be used by giving *decisiontreeclassification* as *fitter* configuration when creating a `ifra.node.Node`


    Attributes
    ----------
    learning_configs: `ifra.configs.NodeLearningConfig`
        The learning configuration of the node using this fitter
    model: Union[None, RuleSet]
        Fitted model, or None if fit not done yet
    tree: Union[None, DecisionTreeClassifier]
        Fitted tree, or None if fit not done yet
    """

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
    ):
        super().__init__(learning_configs)

    # noinspection PyArgumentList
    def make_fit(
        self,
        x: np.array,
        y: np.array,
        max_depth: int,
        get_leaf: bool,
        x_mins: Optional[List[float]],
        x_maxs: Optional[List[float]],
        features_names: Optional[List[str]],
        classes_names: Optional[List[str]],
    ):
        """Fits x and y using a decision tree cassifier, setting
         `ifra.fitters.DecisionTreeFitter` *tree* and
         `ifra.fitters.DecisionTreeFitter` *model*

        x array must contain one column for each feature that can exist across all nodes. Some columns can contain
        only NaNs.

        Parameters
        ----------
        x: np.ndarray
            Must be of shape (# observations, # features)
        y: np.ndarray
            Must be of shape (# observations,)
        max_depth: int
            Maximum tree depth
        get_leaf: bool
            If True, only considers tree's leaves to make rules, else also considers its nodes
        x_mins: Optional[List[float]]
            Lower limits of features. If not specified, will use x.min(axis=0)
        x_maxs: Optional[List[float]]
            Upper limits of features. If not specified, will use x.max(axis=0)
        features_names: Optional[List[str]]
            Names of features
        classes_names: Optional[List[str]]
            Names of the classes
        """

        if x_mins is None:
            x_mins = x.min(axis=0)
        elif not isinstance(x_mins, np.ndarray):
            x_mins = np.array(x_mins)
        if x_maxs is None:
            x_maxs = x.max(axis=0)
        elif not isinstance(x_maxs, np.ndarray):
            x_maxs = np.array(x_maxs)

        self.tree = tree.DecisionTreeClassifier(max_depth=max_depth).fit(X=x, y=y)
        self.model = extract_rules_from_tree(
            self.tree,
            xmins=x_mins,
            xmaxs=x_maxs,
            features_names=features_names,
            classes_names=classes_names,
            get_leaf=get_leaf,
        )

        if len(self.model) > 0:
            # Compute each rule's activation vector and the model's, and will stack the rules' activation vectors if
            # stack_activation is True
            self.model.fit(y=y, xs=x)
            # self.model.check_duplicated_rules(self.model.rules, name_or_index="name")

Ancestors

Methods

def make_fit(self, x: , y: , max_depth: int, get_leaf: bool, x_mins: Optional[List[float]], x_maxs: Optional[List[float]], features_names: Optional[List[str]], classes_names: Optional[List[str]])

Fits x and y using a decision tree cassifier, setting DecisionTreeFitter tree and DecisionTreeFitter model

x array must contain one column for each feature that can exist across all nodes. Some columns can contain only NaNs.

Parameters

x : np.ndarray
Must be of shape (# observations, # features)
y : np.ndarray
Must be of shape (# observations,)
max_depth : int
Maximum tree depth
get_leaf : bool
If True, only considers tree's leaves to make rules, else also considers its nodes
x_mins : Optional[List[float]]
Lower limits of features. If not specified, will use x.min(axis=0)
x_maxs : Optional[List[float]]
Upper limits of features. If not specified, will use x.max(axis=0)
features_names : Optional[List[str]]
Names of features
classes_names : Optional[List[str]]
Names of the classes
Expand source code
def make_fit(
    self,
    x: np.array,
    y: np.array,
    max_depth: int,
    get_leaf: bool,
    x_mins: Optional[List[float]],
    x_maxs: Optional[List[float]],
    features_names: Optional[List[str]],
    classes_names: Optional[List[str]],
):
    """Fits x and y using a decision tree cassifier, setting
     `ifra.fitters.DecisionTreeFitter` *tree* and
     `ifra.fitters.DecisionTreeFitter` *model*

    x array must contain one column for each feature that can exist across all nodes. Some columns can contain
    only NaNs.

    Parameters
    ----------
    x: np.ndarray
        Must be of shape (# observations, # features)
    y: np.ndarray
        Must be of shape (# observations,)
    max_depth: int
        Maximum tree depth
    get_leaf: bool
        If True, only considers tree's leaves to make rules, else also considers its nodes
    x_mins: Optional[List[float]]
        Lower limits of features. If not specified, will use x.min(axis=0)
    x_maxs: Optional[List[float]]
        Upper limits of features. If not specified, will use x.max(axis=0)
    features_names: Optional[List[str]]
        Names of features
    classes_names: Optional[List[str]]
        Names of the classes
    """

    if x_mins is None:
        x_mins = x.min(axis=0)
    elif not isinstance(x_mins, np.ndarray):
        x_mins = np.array(x_mins)
    if x_maxs is None:
        x_maxs = x.max(axis=0)
    elif not isinstance(x_maxs, np.ndarray):
        x_maxs = np.array(x_maxs)

    self.tree = tree.DecisionTreeClassifier(max_depth=max_depth).fit(X=x, y=y)
    self.model = extract_rules_from_tree(
        self.tree,
        xmins=x_mins,
        xmaxs=x_maxs,
        features_names=features_names,
        classes_names=classes_names,
        get_leaf=get_leaf,
    )

    if len(self.model) > 0:
        # Compute each rule's activation vector and the model's, and will stack the rules' activation vectors if
        # stack_activation is True
        self.model.fit(y=y, xs=x)
        # self.model.check_duplicated_rules(self.model.rules, name_or_index="name")

Inherited members

class DecisionTreeFitter (learning_configs: NodeLearningConfig)

Overloads the Fitter class. Fits a DecisionTreeFitter on some data. Overleaded again to implement regression or classification by DecisionTreeClassificationFitter and DecisionTreeRegressionFitter.

Parameters

learning_configs : NodeLearningConfig
Node's learning_config
data : NodeDataConfig
Node's data

kwargs: Any additionnal keyword argument that the overleading class accepts. Those arguments will become attributes.

Expand source code
class DecisionTreeFitter(Fitter):

    """Overloads the Fitter class. Fits a DecisionTreeFitter on some data.
     Overleaded again to implement regression or classification by `ifra.fitters.DecisionTreeClassificationFitter`
     and `ifra.fitters.DecisionTreeRegressionFitter`.
    """

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
    ):
        super().__init__(learning_configs)
        self.tree = None

    def fit(self, x: np.ndarray, y: np.ndarray) -> RuleSet:
        """Fits the decision tree on the data pointed by `ifra.fitters.DecisionTreeFitter` *data.x_path* and
        `ifra.fitters.DecisionTreeFitter` *data.y_path*, sets
        `ifra.fitters.DecisionTreeFitter`*tree* saves it as a .dot, .svg and .joblib file in the same place
        the node will save its model. Those files will be unique for each time the fit function is called.
        Also sets `ifra.fitters.DecisionTreeFitter` *model* and returns it.

        Parameters
        ----------
        x: np.ndarray
            The features on which to learn
        y: np.ndarray
            The target to predict

        Returns
        -------
        RuleSet
            `ifra.fitters.DecisionTreeFitter` *model*
        """
        # This needs to be done here in case we are using multiprocessing : spawned processes re-import everything
        # from scrach, so Rule.THRESHOLDS are reset
        Rule.SET_THRESHOLDS(self.learning_configs.thresholds_path)
        self.make_fit(
            x=x,
            y=y,
            max_depth=self.learning_configs.max_depth,
            get_leaf=self.learning_configs.get_leaf,
            x_mins=self.learning_configs.x_mins,
            x_maxs=self.learning_configs.x_maxs,
            features_names=self.learning_configs.features_names,
            classes_names=self.learning_configs.classes_names,
        )
        return self.model

    def save(self, path: TransparentPath):
        """Calls `ifra.fitters.DecisionTreeFitter.tree_to_graph` and `ifra.fitters.DecisionTreeFitter.tree_to_joblib`"""
        self.tree_to_graph(path)
        self.tree_to_joblib(path)

    # noinspection PyArgumentList
    def make_fit(
        self,
        x: np.array,
        y: np.array,
        max_depth: int,
        get_leaf: bool,
        x_mins: Optional[List[float]],
        x_maxs: Optional[List[float]],
        features_names: Optional[List[str]],
        classes_names: Optional[List[str]],
    ):
        """Implement in daughter class"""
        pass

    def tree_to_graph(
        self,
        path: TransparentPath
    ):
        """Saves `ifra.fitters.DecisionTreeFitter` *tree* to a .dot file and a .svg file. Does not do anything if
        `ifra.fitters.DecisionTreeFitter` *tree* is None.

        Parameters
        ----------
        path: TransparentPath
            File path where the files should be written. No matter the extension, one file with .dot and another with
            .svg will be created.
        """
        thetree = self.tree
        features_names = self.learning_configs.features_names
        path = path.with_suffix(".dot")

        with open(path, "w") as dotfile:
            tree.export_graphviz(
                decision_tree=thetree,
                out_file=dotfile,
                feature_names=features_names,
                filled=True,
                rounded=True,
                special_characters=True,
            )

        # joblib.dump(self.tree, self.__trees_path / (Y_name + ".joblib"))
        os.system(f'dot -Tsvg "{path}" -o "{path.with_suffix(".svg")}"')

    def tree_to_joblib(
        self,
        path: TransparentPath
    ):
        """Saves `ifra.fitters.DecisionTreeFitter` *tree* to a .joblib file. Does not do anything if
        `ifra.fitters.DecisionTreeFitter` *tree* is None.

        Parameters
        ----------
        path: TransparentPath
            File path where the files should be written. No matter the extension, a file with .joblib will be created.
        """

        thetree = self.tree
        path = path.with_suffix(".joblib")
        joblib.dump(value=thetree, filename=path)

Ancestors

Subclasses

Methods

def fit(self, x: numpy.ndarray, y: numpy.ndarray) ‑> ruleskit.ruleset.RuleSet

Fits the decision tree on the data pointed by DecisionTreeFitter data.x_path and DecisionTreeFitter data.y_path, sets DecisionTreeFittertree saves it as a .dot, .svg and .joblib file in the same place the node will save its model. Those files will be unique for each time the fit function is called. Also sets DecisionTreeFitter model and returns it.

Parameters

x : np.ndarray
The features on which to learn
y : np.ndarray
The target to predict

Returns

RuleSet
DecisionTreeFitter model
Expand source code
def fit(self, x: np.ndarray, y: np.ndarray) -> RuleSet:
    """Fits the decision tree on the data pointed by `ifra.fitters.DecisionTreeFitter` *data.x_path* and
    `ifra.fitters.DecisionTreeFitter` *data.y_path*, sets
    `ifra.fitters.DecisionTreeFitter`*tree* saves it as a .dot, .svg and .joblib file in the same place
    the node will save its model. Those files will be unique for each time the fit function is called.
    Also sets `ifra.fitters.DecisionTreeFitter` *model* and returns it.

    Parameters
    ----------
    x: np.ndarray
        The features on which to learn
    y: np.ndarray
        The target to predict

    Returns
    -------
    RuleSet
        `ifra.fitters.DecisionTreeFitter` *model*
    """
    # This needs to be done here in case we are using multiprocessing : spawned processes re-import everything
    # from scrach, so Rule.THRESHOLDS are reset
    Rule.SET_THRESHOLDS(self.learning_configs.thresholds_path)
    self.make_fit(
        x=x,
        y=y,
        max_depth=self.learning_configs.max_depth,
        get_leaf=self.learning_configs.get_leaf,
        x_mins=self.learning_configs.x_mins,
        x_maxs=self.learning_configs.x_maxs,
        features_names=self.learning_configs.features_names,
        classes_names=self.learning_configs.classes_names,
    )
    return self.model
def make_fit(self, x: , y: , max_depth: int, get_leaf: bool, x_mins: Optional[List[float]], x_maxs: Optional[List[float]], features_names: Optional[List[str]], classes_names: Optional[List[str]])

Implement in daughter class

Expand source code
def make_fit(
    self,
    x: np.array,
    y: np.array,
    max_depth: int,
    get_leaf: bool,
    x_mins: Optional[List[float]],
    x_maxs: Optional[List[float]],
    features_names: Optional[List[str]],
    classes_names: Optional[List[str]],
):
    """Implement in daughter class"""
    pass
def save(self, path: transparentpath.gcsutils.transparentpath.TransparentPath)
Expand source code
def save(self, path: TransparentPath):
    """Calls `ifra.fitters.DecisionTreeFitter.tree_to_graph` and `ifra.fitters.DecisionTreeFitter.tree_to_joblib`"""
    self.tree_to_graph(path)
    self.tree_to_joblib(path)
def tree_to_graph(self, path: transparentpath.gcsutils.transparentpath.TransparentPath)

Saves DecisionTreeFitter tree to a .dot file and a .svg file. Does not do anything if DecisionTreeFitter tree is None.

Parameters

path : TransparentPath
File path where the files should be written. No matter the extension, one file with .dot and another with .svg will be created.
Expand source code
def tree_to_graph(
    self,
    path: TransparentPath
):
    """Saves `ifra.fitters.DecisionTreeFitter` *tree* to a .dot file and a .svg file. Does not do anything if
    `ifra.fitters.DecisionTreeFitter` *tree* is None.

    Parameters
    ----------
    path: TransparentPath
        File path where the files should be written. No matter the extension, one file with .dot and another with
        .svg will be created.
    """
    thetree = self.tree
    features_names = self.learning_configs.features_names
    path = path.with_suffix(".dot")

    with open(path, "w") as dotfile:
        tree.export_graphviz(
            decision_tree=thetree,
            out_file=dotfile,
            feature_names=features_names,
            filled=True,
            rounded=True,
            special_characters=True,
        )

    # joblib.dump(self.tree, self.__trees_path / (Y_name + ".joblib"))
    os.system(f'dot -Tsvg "{path}" -o "{path.with_suffix(".svg")}"')
def tree_to_joblib(self, path: transparentpath.gcsutils.transparentpath.TransparentPath)

Saves DecisionTreeFitter tree to a .joblib file. Does not do anything if DecisionTreeFitter tree is None.

Parameters

path : TransparentPath
File path where the files should be written. No matter the extension, a file with .joblib will be created.
Expand source code
def tree_to_joblib(
    self,
    path: TransparentPath
):
    """Saves `ifra.fitters.DecisionTreeFitter` *tree* to a .joblib file. Does not do anything if
    `ifra.fitters.DecisionTreeFitter` *tree* is None.

    Parameters
    ----------
    path: TransparentPath
        File path where the files should be written. No matter the extension, a file with .joblib will be created.
    """

    thetree = self.tree
    path = path.with_suffix(".joblib")
    joblib.dump(value=thetree, filename=path)
class DecisionTreeRegressionFitter (learning_configs: NodeLearningConfig)

Can be used by giving decisiontreeregression as fitter configuration when creating a Node

Attributes

learning_configs : NodeLearningConfig
The learning configuration of the node using this fitter
model : Union[None, RuleSet]
Fitted model, or None if fit not done yet
tree : Union[None, DecisionTreeRegressor]
Fitted tree, or None if fit not done yet

Parameters

learning_configs : NodeLearningConfig
Node's learning_config
data : NodeDataConfig
Node's data

kwargs: Any additionnal keyword argument that the overleading class accepts. Those arguments will become attributes.

Expand source code
class DecisionTreeRegressionFitter(DecisionTreeFitter):
    """
    Can be used by giving *decisiontreeregression* as *fitter* configuration when creating a `ifra.node.Node`


    Attributes
    ----------
    learning_configs: `ifra.configs.NodeLearningConfig`
        The learning configuration of the node using this fitter
    model: Union[None, RuleSet]
        Fitted model, or None if fit not done yet
    tree: Union[None, DecisionTreeRegressor]
        Fitted tree, or None if fit not done yet
    """

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
    ):
        super().__init__(learning_configs)

    # noinspection PyArgumentList
    def make_fit(
        self,
        x: np.array,
        y: np.array,
        max_depth: int,
        get_leaf: bool,
        x_mins: Optional[List[float]],
        x_maxs: Optional[List[float]],
        features_names: Optional[List[str]],
        classes_names: Optional[List[str]],
    ):
        """Fits x and y using a decision tree regressor, setting
         `ifra.fitters.DecisionTreeRegressionFitter` *tree* and
         `ifra.fitters.DecisionTreeRegressionFitter` *model*

        x array must contain one column for each feature that can exist across all nodes. Some columns can contain
        only NaNs.

        Parameters
        ----------
        x: np.ndarray
            Must be of shape (# observations, # features)
        y: np.ndarray
            Must be of shape (# observations,)
        max_depth: int
            Maximum tree depth
        get_leaf: bool
            If True, only considers tree's leaves to make rules, else also considers its nodes
        x_mins: Optional[List[float]]
            Lower limits of features. If not specified, will use x.min(axis=0)
        x_maxs: Optional[List[float]]
            Upper limits of features. If not specified, will use x.max(axis=0)
        features_names: Optional[List[str]]
            Names of features
        classes_names: Optional[List[str]]
            Names of the classes. Unused here, but present to be compatible with parent class. Can be anything,
            will be ignored in 'extract_rules_from_tree' anyway.
        """

        if x_mins is None:
            x_mins = x.min(axis=0)
        elif not isinstance(x_mins, np.ndarray):
            x_mins = np.array(x_mins)
        if x_maxs is None:
            x_maxs = x.max(axis=0)
        elif not isinstance(x_maxs, np.ndarray):
            x_maxs = np.array(x_maxs)

        self.tree = tree.DecisionTreeRegressor(max_depth=max_depth).fit(X=x, y=y)
        self.model = extract_rules_from_tree(
            self.tree,
            xmins=x_mins,
            xmaxs=x_maxs,
            features_names=features_names,
            classes_names=classes_names,
            get_leaf=get_leaf,
            stack_activation=True,
        )

        if len(self.model) > 0:
            # Compute each rule's activation vector, and the model's and will
            # stack the rules' activation vectors if stack_activation is True
            self.model.fit(y=y, xs=x)
            # self.model.check_duplicated_rules(self.model.rules, name_or_index="name")

Ancestors

Methods

def make_fit(self, x: , y: , max_depth: int, get_leaf: bool, x_mins: Optional[List[float]], x_maxs: Optional[List[float]], features_names: Optional[List[str]], classes_names: Optional[List[str]])

Fits x and y using a decision tree regressor, setting DecisionTreeRegressionFitter tree and DecisionTreeRegressionFitter model

x array must contain one column for each feature that can exist across all nodes. Some columns can contain only NaNs.

Parameters

x : np.ndarray
Must be of shape (# observations, # features)
y : np.ndarray
Must be of shape (# observations,)
max_depth : int
Maximum tree depth
get_leaf : bool
If True, only considers tree's leaves to make rules, else also considers its nodes
x_mins : Optional[List[float]]
Lower limits of features. If not specified, will use x.min(axis=0)
x_maxs : Optional[List[float]]
Upper limits of features. If not specified, will use x.max(axis=0)
features_names : Optional[List[str]]
Names of features
classes_names : Optional[List[str]]
Names of the classes. Unused here, but present to be compatible with parent class. Can be anything, will be ignored in 'extract_rules_from_tree' anyway.
Expand source code
def make_fit(
    self,
    x: np.array,
    y: np.array,
    max_depth: int,
    get_leaf: bool,
    x_mins: Optional[List[float]],
    x_maxs: Optional[List[float]],
    features_names: Optional[List[str]],
    classes_names: Optional[List[str]],
):
    """Fits x and y using a decision tree regressor, setting
     `ifra.fitters.DecisionTreeRegressionFitter` *tree* and
     `ifra.fitters.DecisionTreeRegressionFitter` *model*

    x array must contain one column for each feature that can exist across all nodes. Some columns can contain
    only NaNs.

    Parameters
    ----------
    x: np.ndarray
        Must be of shape (# observations, # features)
    y: np.ndarray
        Must be of shape (# observations,)
    max_depth: int
        Maximum tree depth
    get_leaf: bool
        If True, only considers tree's leaves to make rules, else also considers its nodes
    x_mins: Optional[List[float]]
        Lower limits of features. If not specified, will use x.min(axis=0)
    x_maxs: Optional[List[float]]
        Upper limits of features. If not specified, will use x.max(axis=0)
    features_names: Optional[List[str]]
        Names of features
    classes_names: Optional[List[str]]
        Names of the classes. Unused here, but present to be compatible with parent class. Can be anything,
        will be ignored in 'extract_rules_from_tree' anyway.
    """

    if x_mins is None:
        x_mins = x.min(axis=0)
    elif not isinstance(x_mins, np.ndarray):
        x_mins = np.array(x_mins)
    if x_maxs is None:
        x_maxs = x.max(axis=0)
    elif not isinstance(x_maxs, np.ndarray):
        x_maxs = np.array(x_maxs)

    self.tree = tree.DecisionTreeRegressor(max_depth=max_depth).fit(X=x, y=y)
    self.model = extract_rules_from_tree(
        self.tree,
        xmins=x_mins,
        xmaxs=x_maxs,
        features_names=features_names,
        classes_names=classes_names,
        get_leaf=get_leaf,
        stack_activation=True,
    )

    if len(self.model) > 0:
        # Compute each rule's activation vector, and the model's and will
        # stack the rules' activation vectors if stack_activation is True
        self.model.fit(y=y, xs=x)
        # self.model.check_duplicated_rules(self.model.rules, name_or_index="name")

Inherited members

class Fitter (learning_configs: NodeLearningConfig, **kwargs)

Abstract class of fitter object.

Parameters

learning_configs : NodeLearningConfig
Node's learning_config
data : NodeDataConfig
Node's data

kwargs: Any additionnal keyword argument that the overleading class accepts. Those arguments will become attributes.

Expand source code
class Fitter:
    """Abstract class of fitter object."""

    def __init__(
        self,
        learning_configs: NodeLearningConfig,
        **kwargs
    ):
        """

        Parameters
        ----------
        learning_configs: NodeLearningConfig
            `ifra.node.Node`'s *learning_config*
        data: NodeDataConfig
            `ifra.node.Node`'s *data*
        kwargs:
            Any additionnal keyword argument that the overleading class accepts. Those arguments will become attributes.
        """
        self.learning_configs = learning_configs
        self.model = None
        for arg in kwargs:
            setattr(self, arg, kwargs[arg])

    def fit(self, x: np.ndarray, y: np.ndarray) -> RuleSet:
        """To be implemented in daughter class"""
        pass

Subclasses

Methods

def fit(self, x: numpy.ndarray, y: numpy.ndarray) ‑> ruleskit.ruleset.RuleSet

To be implemented in daughter class

Expand source code
def fit(self, x: np.ndarray, y: np.ndarray) -> RuleSet:
    """To be implemented in daughter class"""
    pass