Module ruleskit.utils.rule_utils

Expand source code
from typing import List, Union
import copy
import numpy as np
from ..rule import RegressionRule, ClassificationRule
from ..ruleset import RuleSet
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from ..condition import HyperrectangleCondition

# noinspection PyProtectedMember
from sklearn.tree import _tree
import logging

logger = logging.getLogger(__name__)


def extract_rules_from_tree(
    tree: Union[DecisionTreeClassifier, DecisionTreeRegressor],
    xmins: Union[List[float], np.ndarray],
    xmaxs: Union[List[float], np.ndarray],
    features_names: List[str] = None,
    classes_names: List[str] = None,
    get_leaf: bool = False,
    compute_activation: bool = True,
    stack_activation: bool = False,
) -> RuleSet:
    """To extract rules from a sklearn decision tree

    features are the list of X names.
    In the tree ('decision_tree' later in the code), the list of relevant Xs are represented
    by their position in the list of Xs that was fed to the tree at learning.
    So to access the name of the feature relevant for a given node in the tree,
    one has first to do decision_tree.feature[node] to access the feature index in x_names,
    then do x_names[decision_tree.feature[node]]

    xmin and xmax are Series.
    Index is a X name, values are the min and the man value of the X for a given Y.
    The Y was specified before calling this function so it does not appear here.

    If all_rules_list is not None, it will be appended with the extracted rules,
    and nothing is returned. Else, returns the extracted rules

    Later in the code:
    bmins is, for a given condition, the minimum value that X must have to be selected
    By analogy, bmaxs is the max value.

    Hence bmaxs must always be >= to bmins, and bmins and bmaxs must be inside the interval
    [xmin, xmax].

    When selecting Ys based on conditions, the criterion will be X > bmins and X <= bmaxs.
    bmins is excluded and bmaxs included. So when creating bmins, if it is equal to bmins,
    change it to be a bit less than xmins so that xmins will pass the criterion.

    In the tree, to each node corresponds a maximum value for the associated X (feature).
    This is accessed by decision_tree.threshold[node] and will be the bmaxs of one condition of a
    rule.

    It will later be the bmins of one condition of the "opposite" rule. Indeed if there
    is a condition saying something like 'If X < 3 then do that', there is also a
    condition saying 'if X >= 3 then do this'.

    At the end there will be 2 rules per node. So not only the leaves give a rule, but
    each branching in the tree.
    Rules do not contain any value for Ys. Just conditions on Xs. RICE will set the
    values in calc_stat.

    If print_visitor is specified, will return a string describing what happened step
    by step

    Parameters
    ----------
    tree : Union[sklearn.tree.DecisionTreeRegressor, sklearn.tree.DecisionTreeClassifier]
    features_names: List[str],
        the list of X names
    classes_names: List[str],
        the list of Y classes names
    xmins: Union[List[int], List[float], np.ndarray]
        min values of each xs, one entry per x
    xmaxs: Union[List[int], List[float], np.ndarray]
        max values of each xs, one entry per x
    get_leaf: Boolean type
        To return only the leaf of the tree.
    compute_activation: bool
        The activation of the RuleSet is the logical OR of the activation of all its rules. It is only computed
        if compute_activation is True. (default value = True)
    stack_activation: bool
        If True, the RuleSet will keep in memory 2-D np.ndarray containing the activations of all its rules. This
        can take a lot of memory, but can save time if you apply numpy methods on this stacked vector instead of on
        each rule separately. (default value = False)

    Returns
    -------
    rule_list: List[rule.Rule]
        The extracted rules for mthe tree.

    """
    decision_tree = tree.tree_
    if features_names is None:
        features_names = ["X_" + str(i) for i in range(tree.n_features_)]

    def visitor(node, depth, condition=None, rules_list=None):
        if rules_list is None:
            rules_list = RuleSet(compute_activation=compute_activation, stack_activation=stack_activation)
        if decision_tree.feature[node] != _tree.TREE_UNDEFINED:
            # If
            new_condition = HyperrectangleCondition(
                features_indexes=[decision_tree.feature[node]],
                bmins=[xmins[decision_tree.feature[node]]],
                bmaxs=[decision_tree.threshold[node]],
                features_names=[features_names[decision_tree.feature[node]]],
            )

            # If cond is not None, means we are not at the first node,
            # so need to expand the list of conditions, unless the current X is
            # already in our list of conditions. In that case, update the
            # corresponding condition by setting its bmaxs to be the min of the
            # current and new thresh.olds
            if condition is not None:
                if decision_tree.feature[node] not in condition.features_indexes:
                    # Will concatenate current condition list and new conditions by their
                    # attributes (which are lists)
                    new_condition = condition & new_condition
                else:
                    new_bmaxs = decision_tree.threshold[node]
                    new_condition = copy.deepcopy(condition)
                    place = condition.features_indexes.index(decision_tree.feature[node])
                    new_condition.bmaxs[place] = min(new_bmaxs, new_condition.bmaxs[place])

            # Create a new Rule with all the condition and append it to the rules
            # list. So the rule list is actually a history of how one rule evolved.
            if get_leaf is False:
                if isinstance(tree, DecisionTreeClassifier):
                    new_rule = ClassificationRule(copy.deepcopy(new_condition))
                    if classes_names is None:
                        new_rule._prediction = [pred for pred in enumerate(decision_tree.value[node][0])]
                    else:
                        new_rule._prediction = [
                            (classes_names[i], pred) for i, pred in enumerate(decision_tree.value[node][0])
                        ]
                else:
                    new_rule = RegressionRule(copy.deepcopy(new_condition))
                    new_rule._prediction = decision_tree.value[node][0][0]
                rules_list.append(new_rule)

            # Execute the current function on the left of the current node
            # (the "True" side)
            rules_list = visitor(decision_tree.children_left[node], depth + 1, new_condition, rules_list)

            # At this point, any rule found on the left of the current node will be
            # in rules list and new_cond will contain the corresponding conditions.

            # Create a new condition corresponding to the opposite of the current
            # node's threshold, i.e bmins is now the previous bmaxs
            # Here we do not have to alter bmins, because we explicitly want
            # node_thresh to not pass the criterion since it must have passed it
            # for the opposite condition.
            new_condition = HyperrectangleCondition(
                features_indexes=[decision_tree.feature[node]],
                bmins=[decision_tree.threshold[node]],
                bmaxs=[xmaxs[decision_tree.feature[node]]],
                features_names=[features_names[decision_tree.feature[node]]],
            )

            # Means we are not at first node. Same logic as previously, except this time
            # if Xi is in the list of features then the new bmins is now the threshold
            # and bmaxs is modified to be the X maximum. Then keep the max of current
            # and new bmins as bmins, same for bmaxs.
            # TODO there is something weird here, for X_i will necessarily be in cond, since
            # it was added at the very beginning of the function.
            if condition is not None:
                if decision_tree.feature[node] not in condition.features_indexes:
                    new_condition = condition & new_condition
                else:
                    new_bmins = decision_tree.threshold[node]
                    new_bmaxs = xmaxs[decision_tree.feature[node]]
                    new_condition = copy.deepcopy(condition)
                    place = new_condition.features_indexes.index(decision_tree.feature[node])
                    new_condition.bmins[place] = max(new_bmins, new_condition.bmins[place])
                    new_condition.bmaxs[place] = max(new_bmaxs, new_condition.bmaxs[place])

            # Create the rule for the right side of the node then apply this
            # function on the right side of the node
            if get_leaf is False:
                if isinstance(tree, DecisionTreeClassifier):
                    new_rule = ClassificationRule(copy.deepcopy(new_condition))
                    if classes_names is None:
                        new_rule._prediction = [pred for pred in enumerate(decision_tree.value[node][0])]
                    else:
                        new_rule._prediction = [
                            (classes_names[i], pred) for i, pred in enumerate(decision_tree.value[node][0])
                        ]
                else:
                    new_rule = RegressionRule(copy.deepcopy(new_condition))
                    new_rule._prediction = decision_tree.value[node][0][0]
                rules_list.append(new_rule)

            rules_list = visitor(decision_tree.children_right[node], depth + 1, new_condition, rules_list)

        elif get_leaf:
            if isinstance(tree, DecisionTreeClassifier):
                new_rule = ClassificationRule(copy.deepcopy(condition))
                if classes_names is None:
                    new_rule._prediction = [pred for pred in enumerate(decision_tree.value[node][0])]
                else:
                    new_rule._prediction = [
                        (classes_names[i], pred) for i, pred in enumerate(decision_tree.value[node][0])
                    ]
                rules_list.append(new_rule)
            else:
                new_rule = RegressionRule(copy.deepcopy(condition))
                new_rule._prediction = decision_tree.value[node][0][0]
                rules_list.append(new_rule)

        return rules_list

    rule_list = visitor(0, 1)
    return rule_list

Functions

def extract_rules_from_tree(tree: Union[sklearn.tree._classes.DecisionTreeClassifier, sklearn.tree._classes.DecisionTreeRegressor], xmins: Union[List[float], numpy.ndarray], xmaxs: Union[List[float], numpy.ndarray], features_names: List[str] = None, classes_names: List[str] = None, get_leaf: bool = False, compute_activation: bool = True, stack_activation: bool = False) ‑> RuleSet

To extract rules from a sklearn decision tree

features are the list of X names. In the tree ('decision_tree' later in the code), the list of relevant Xs are represented by their position in the list of Xs that was fed to the tree at learning. So to access the name of the feature relevant for a given node in the tree, one has first to do decision_tree.feature[node] to access the feature index in x_names, then do x_names[decision_tree.feature[node]]

xmin and xmax are Series. Index is a X name, values are the min and the man value of the X for a given Y. The Y was specified before calling this function so it does not appear here.

If all_rules_list is not None, it will be appended with the extracted rules, and nothing is returned. Else, returns the extracted rules

Later in the code: bmins is, for a given condition, the minimum value that X must have to be selected By analogy, bmaxs is the max value.

Hence bmaxs must always be >= to bmins, and bmins and bmaxs must be inside the interval [xmin, xmax].

When selecting Ys based on conditions, the criterion will be X > bmins and X <= bmaxs. bmins is excluded and bmaxs included. So when creating bmins, if it is equal to bmins, change it to be a bit less than xmins so that xmins will pass the criterion.

In the tree, to each node corresponds a maximum value for the associated X (feature). This is accessed by decision_tree.threshold[node] and will be the bmaxs of one condition of a rule.

It will later be the bmins of one condition of the "opposite" rule. Indeed if there is a condition saying something like 'If X < 3 then do that', there is also a condition saying 'if X >= 3 then do this'.

At the end there will be 2 rules per node. So not only the leaves give a rule, but each branching in the tree. Rules do not contain any value for Ys. Just conditions on Xs. RICE will set the values in calc_stat.

If print_visitor is specified, will return a string describing what happened step by step

Parameters

tree : Union[sklearn.tree.DecisionTreeRegressor, sklearn.tree.DecisionTreeClassifier]
 
features_names : List[str],
the list of X names
classes_names : List[str],
the list of Y classes names
xmins : Union[List[int], List[float], np.ndarray]
min values of each xs, one entry per x
xmaxs : Union[List[int], List[float], np.ndarray]
max values of each xs, one entry per x
get_leaf : Boolean type
To return only the leaf of the tree.
compute_activation : bool
The activation of the RuleSet is the logical OR of the activation of all its rules. It is only computed if compute_activation is True. (default value = True)
stack_activation : bool
If True, the RuleSet will keep in memory 2-D np.ndarray containing the activations of all its rules. This can take a lot of memory, but can save time if you apply numpy methods on this stacked vector instead of on each rule separately. (default value = False)

Returns

rule_list : List[rule.Rule]
The extracted rules for mthe tree.
Expand source code
def extract_rules_from_tree(
    tree: Union[DecisionTreeClassifier, DecisionTreeRegressor],
    xmins: Union[List[float], np.ndarray],
    xmaxs: Union[List[float], np.ndarray],
    features_names: List[str] = None,
    classes_names: List[str] = None,
    get_leaf: bool = False,
    compute_activation: bool = True,
    stack_activation: bool = False,
) -> RuleSet:
    """To extract rules from a sklearn decision tree

    features are the list of X names.
    In the tree ('decision_tree' later in the code), the list of relevant Xs are represented
    by their position in the list of Xs that was fed to the tree at learning.
    So to access the name of the feature relevant for a given node in the tree,
    one has first to do decision_tree.feature[node] to access the feature index in x_names,
    then do x_names[decision_tree.feature[node]]

    xmin and xmax are Series.
    Index is a X name, values are the min and the man value of the X for a given Y.
    The Y was specified before calling this function so it does not appear here.

    If all_rules_list is not None, it will be appended with the extracted rules,
    and nothing is returned. Else, returns the extracted rules

    Later in the code:
    bmins is, for a given condition, the minimum value that X must have to be selected
    By analogy, bmaxs is the max value.

    Hence bmaxs must always be >= to bmins, and bmins and bmaxs must be inside the interval
    [xmin, xmax].

    When selecting Ys based on conditions, the criterion will be X > bmins and X <= bmaxs.
    bmins is excluded and bmaxs included. So when creating bmins, if it is equal to bmins,
    change it to be a bit less than xmins so that xmins will pass the criterion.

    In the tree, to each node corresponds a maximum value for the associated X (feature).
    This is accessed by decision_tree.threshold[node] and will be the bmaxs of one condition of a
    rule.

    It will later be the bmins of one condition of the "opposite" rule. Indeed if there
    is a condition saying something like 'If X < 3 then do that', there is also a
    condition saying 'if X >= 3 then do this'.

    At the end there will be 2 rules per node. So not only the leaves give a rule, but
    each branching in the tree.
    Rules do not contain any value for Ys. Just conditions on Xs. RICE will set the
    values in calc_stat.

    If print_visitor is specified, will return a string describing what happened step
    by step

    Parameters
    ----------
    tree : Union[sklearn.tree.DecisionTreeRegressor, sklearn.tree.DecisionTreeClassifier]
    features_names: List[str],
        the list of X names
    classes_names: List[str],
        the list of Y classes names
    xmins: Union[List[int], List[float], np.ndarray]
        min values of each xs, one entry per x
    xmaxs: Union[List[int], List[float], np.ndarray]
        max values of each xs, one entry per x
    get_leaf: Boolean type
        To return only the leaf of the tree.
    compute_activation: bool
        The activation of the RuleSet is the logical OR of the activation of all its rules. It is only computed
        if compute_activation is True. (default value = True)
    stack_activation: bool
        If True, the RuleSet will keep in memory 2-D np.ndarray containing the activations of all its rules. This
        can take a lot of memory, but can save time if you apply numpy methods on this stacked vector instead of on
        each rule separately. (default value = False)

    Returns
    -------
    rule_list: List[rule.Rule]
        The extracted rules for mthe tree.

    """
    decision_tree = tree.tree_
    if features_names is None:
        features_names = ["X_" + str(i) for i in range(tree.n_features_)]

    def visitor(node, depth, condition=None, rules_list=None):
        if rules_list is None:
            rules_list = RuleSet(compute_activation=compute_activation, stack_activation=stack_activation)
        if decision_tree.feature[node] != _tree.TREE_UNDEFINED:
            # If
            new_condition = HyperrectangleCondition(
                features_indexes=[decision_tree.feature[node]],
                bmins=[xmins[decision_tree.feature[node]]],
                bmaxs=[decision_tree.threshold[node]],
                features_names=[features_names[decision_tree.feature[node]]],
            )

            # If cond is not None, means we are not at the first node,
            # so need to expand the list of conditions, unless the current X is
            # already in our list of conditions. In that case, update the
            # corresponding condition by setting its bmaxs to be the min of the
            # current and new thresh.olds
            if condition is not None:
                if decision_tree.feature[node] not in condition.features_indexes:
                    # Will concatenate current condition list and new conditions by their
                    # attributes (which are lists)
                    new_condition = condition & new_condition
                else:
                    new_bmaxs = decision_tree.threshold[node]
                    new_condition = copy.deepcopy(condition)
                    place = condition.features_indexes.index(decision_tree.feature[node])
                    new_condition.bmaxs[place] = min(new_bmaxs, new_condition.bmaxs[place])

            # Create a new Rule with all the condition and append it to the rules
            # list. So the rule list is actually a history of how one rule evolved.
            if get_leaf is False:
                if isinstance(tree, DecisionTreeClassifier):
                    new_rule = ClassificationRule(copy.deepcopy(new_condition))
                    if classes_names is None:
                        new_rule._prediction = [pred for pred in enumerate(decision_tree.value[node][0])]
                    else:
                        new_rule._prediction = [
                            (classes_names[i], pred) for i, pred in enumerate(decision_tree.value[node][0])
                        ]
                else:
                    new_rule = RegressionRule(copy.deepcopy(new_condition))
                    new_rule._prediction = decision_tree.value[node][0][0]
                rules_list.append(new_rule)

            # Execute the current function on the left of the current node
            # (the "True" side)
            rules_list = visitor(decision_tree.children_left[node], depth + 1, new_condition, rules_list)

            # At this point, any rule found on the left of the current node will be
            # in rules list and new_cond will contain the corresponding conditions.

            # Create a new condition corresponding to the opposite of the current
            # node's threshold, i.e bmins is now the previous bmaxs
            # Here we do not have to alter bmins, because we explicitly want
            # node_thresh to not pass the criterion since it must have passed it
            # for the opposite condition.
            new_condition = HyperrectangleCondition(
                features_indexes=[decision_tree.feature[node]],
                bmins=[decision_tree.threshold[node]],
                bmaxs=[xmaxs[decision_tree.feature[node]]],
                features_names=[features_names[decision_tree.feature[node]]],
            )

            # Means we are not at first node. Same logic as previously, except this time
            # if Xi is in the list of features then the new bmins is now the threshold
            # and bmaxs is modified to be the X maximum. Then keep the max of current
            # and new bmins as bmins, same for bmaxs.
            # TODO there is something weird here, for X_i will necessarily be in cond, since
            # it was added at the very beginning of the function.
            if condition is not None:
                if decision_tree.feature[node] not in condition.features_indexes:
                    new_condition = condition & new_condition
                else:
                    new_bmins = decision_tree.threshold[node]
                    new_bmaxs = xmaxs[decision_tree.feature[node]]
                    new_condition = copy.deepcopy(condition)
                    place = new_condition.features_indexes.index(decision_tree.feature[node])
                    new_condition.bmins[place] = max(new_bmins, new_condition.bmins[place])
                    new_condition.bmaxs[place] = max(new_bmaxs, new_condition.bmaxs[place])

            # Create the rule for the right side of the node then apply this
            # function on the right side of the node
            if get_leaf is False:
                if isinstance(tree, DecisionTreeClassifier):
                    new_rule = ClassificationRule(copy.deepcopy(new_condition))
                    if classes_names is None:
                        new_rule._prediction = [pred for pred in enumerate(decision_tree.value[node][0])]
                    else:
                        new_rule._prediction = [
                            (classes_names[i], pred) for i, pred in enumerate(decision_tree.value[node][0])
                        ]
                else:
                    new_rule = RegressionRule(copy.deepcopy(new_condition))
                    new_rule._prediction = decision_tree.value[node][0][0]
                rules_list.append(new_rule)

            rules_list = visitor(decision_tree.children_right[node], depth + 1, new_condition, rules_list)

        elif get_leaf:
            if isinstance(tree, DecisionTreeClassifier):
                new_rule = ClassificationRule(copy.deepcopy(condition))
                if classes_names is None:
                    new_rule._prediction = [pred for pred in enumerate(decision_tree.value[node][0])]
                else:
                    new_rule._prediction = [
                        (classes_names[i], pred) for i, pred in enumerate(decision_tree.value[node][0])
                    ]
                rules_list.append(new_rule)
            else:
                new_rule = RegressionRule(copy.deepcopy(condition))
                new_rule._prediction = decision_tree.value[node][0][0]
                rules_list.append(new_rule)

        return rules_list

    rule_list = visitor(0, 1)
    return rule_list