Module ruleskit.condition
Expand source code
import ast
from abc import ABC
from copy import copy
from typing import List, Union, Tuple
import numpy as np
import pandas as pd
from numbers import Number
from .activation import Activation
import logging
logger = logging.getLogger(__name__)
class DuplicatedFeatures(Exception):
pass
class Condition(ABC):
"""Abstract class for Condition object. Used by Rule objets.
A condition is a list of variable (here represented by their indexes in an array) and of conditions on those
variables.
A condition can be "imossible" to meet, in that case self.impossible is True. This is set automatically.
One can add conditions and use logical AND (&) operations on two conditions (same thing as add). In that case, the
two conditions are combined into a new one.
"""
def __init__(self, features_indexes: Union[List[int], None] = None, empty: bool = False):
if empty:
self._features_indexes = None
else:
if features_indexes is None:
raise ValueError("Must specify features_indexes")
if any([not np.issubdtype(type(a), np.integer) for a in features_indexes]):
raise TypeError(
f"features_indexes must be integers. You gave {[(f, type(f)) for f in features_indexes]}"
)
self._features_indexes = features_indexes
if len(set(self._features_indexes)) != len(self._features_indexes):
raise DuplicatedFeatures
self.impossible = False
def sort(self):
self._features_indexes = sorted(self._features_indexes)
def __and__(self, other: "Condition") -> "Condition":
"""To be implemented in daughter classes"""
pass
def __add__(self, other: "Condition") -> "Condition":
return NotImplemented("Can not add conditions (seen as 'logical OR'). You can use logical AND however.")
@property
def to_hash(self):
return ("c",) + tuple(self._features_indexes)
def __hash__(self):
return hash(frozenset(self.to_hash))
@property
def features_indexes(self) -> List[int]:
return self._features_indexes
@features_indexes.setter
def features_indexes(self, value: Union[List[int], str]):
if isinstance(value, str):
value = ast.literal_eval(value)
if len(set(value)) != len(value):
raise DuplicatedFeatures
if len(self) > 0 and len(self) != len(value):
raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} indexes")
self._features_indexes = value
def __len__(self):
"""A Condition's length is the number of features it talks about"""
return len(self._features_indexes)
def evaluate(self, xs: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
"""
To be implemented in daughter class.
Evaluates where a condition if fullfilled. In this abstract class that does not have any acutal condition,
it is always fullfilled.
Parameters
----------
xs: Union[pd.DataFrame, np.ndarray]
shape (n, d), n number of line, d number of features
Returns
-------
activation: Activation
Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not.
"""
pass
def normalize_features_indexes(self):
"""In some daughter classes, features indexes are optional. however since the attribute 'features_indexes'
must be specified, a default value is set automatically"""
self._features_indexes = list(range(len(self.features_indexes)))
class HyperrectangleCondition(Condition):
"""Condition class for Hyper Rectangle conditions.
An Hyper Rectangle condition is a condition where each feature is associated to a min and a max (self.bmins and
self.bmaxs). The condition is met when all features are within their respective bmin and bmax.
For example, if the condition has :
features_indexes = [0, 1]
bminx = [0, 1]
mnaxs = [0, 2]
Then the condition is met when feature 0 is equal to 0 and feature 2 is between 1 and 2.
In this condition, features indexes are optional if features names are given.
Such a condition can be sorted either according to features indexes (smaller features first) or by features names in
alphabetical order. This is set through the class attribute SORT_ACCORDING_TO that can be either "index" or "name".
"""
SORT_ACCORDING_TO = "index"
def __init__(
self,
features_indexes: Union[List[int], None] = None,
bmins: Union[List[Union[int, float]], None] = None,
bmaxs: Union[List[Union[int, float]], None] = None,
features_names: Union[List[str], None] = None,
empty: bool = False,
sort: bool = True,
):
if empty:
super().__init__(empty=True)
self._bmins = None
self._bmaxs = None
self._features_names = None
else:
if bmins is None:
raise ValueError("bmins can not be None if 'empty' is False")
if bmaxs is None:
raise ValueError("bmaxs can not be None if 'empty' is False")
if features_indexes is None:
if features_names is None:
raise ValueError("Must specify at least one of features_indexes and features_names")
features_indexes = list(range(len(features_names)))
length = len(features_indexes)
if len(bmaxs) != length:
raise ValueError(f"Specifed {length} features but {len(bmaxs)} bmaxs")
if len(bmins) != length:
raise ValueError(f"Specifed {length} features but {len(bmins)} bmins")
if features_names is not None and len(features_names) != length:
raise ValueError(f"Specifed {length} features but {len(features_names)} bmaxs")
if features_names is not None and any([not isinstance(a, str) for a in features_names]):
raise TypeError(f"Names must be strings. You gave {[(f, type(f)) for f in features_names]}")
if any([not isinstance(a, Number) for a in bmins]):
raise TypeError(f"bmins must be integers or floats. You gave {[(f, type(f)) for f in bmins]}")
if any([not isinstance(a, Number) for a in bmaxs]):
raise TypeError(f"bmaxs must be integers or floats. You gave {[(f, type(f)) for f in bmaxs]}")
super().__init__(features_indexes)
if any([a > b for a, b in zip(bmins, bmaxs)]):
# If a bmin is above its associated bmax, then the rule is impossible.
self.impossible = True
self._bmins = bmins
self._bmaxs = bmaxs
if features_names is not None:
self._features_names = features_names
else:
self._features_names = ["X_" + str(i) for i in self._features_indexes]
if len(set(self._features_names)) != len(self._features_names):
raise DuplicatedFeatures
if sort:
self.sort()
def __and__(self, other: "HyperrectangleCondition") -> "HyperrectangleCondition":
"""Logical and (&) or two HyperrectangleCondition objects
If the two conditions do not talk about the same features, then the AND is obvious. For common features,
the bmin of the feature in the new condition is set to be the greatest of bmins in parent conditions, and the
bmax the smallest of bmaxs.
This can give impossible conditions : if in condition 1 feature A must be between 0 and 10 and in condition 2 it
must be between 20 and 30, then in the new condition it must be between 20 and 10, assumin 20 being the minimum.
In that case, the new condition, upon creation, will have self.impossible = True. This does not corrupt the
object nor the code : the condition's method "evaluate", which returns the activation vector, will return a
vector with only zeros since the condition will never be met.
"""
self_clone = HyperrectangleCondition(
features_indexes=copy(self.features_indexes),
bmins=copy(self.bmins),
bmaxs=copy(self.bmaxs),
features_names=copy(self.features_names),
)
common_features = [f for f in self_clone.features_names if f in other.features_names]
if len(common_features) > 0:
# If the two conditions have features in common, the new conditons will have as range the intersection of
# each condition's range for those features. The new condition can possibly never be met.
common_features_positions_in_self = [self_clone.features_names.index(f) for f in common_features]
common_features_positions_in_other = [other.features_names.index(f) for f in common_features]
(common_features_indexes_in_self, common_features_bmins_in_self, common_features_bmaxs_in_self) = list(zip(
*[
(
self_clone.features_indexes[i],
self_clone.bmins[i],
self_clone.bmaxs[i]
)
for i in common_features_positions_in_self
]
))
(common_features_indexes_in_other, common_features_bmins_in_other, common_features_bmaxs_in_other) = list(
zip(
*[
(
other.features_indexes[i],
other.bmins[i],
other.bmaxs[i]
)
for i in common_features_positions_in_other
]
)
)
if common_features_indexes_in_self != common_features_indexes_in_other:
raise IndexError("Some features present in both conditions in __and__ have different indexes : \n "
f"{common_features_indexes_in_self}\n "
f"{common_features_indexes_in_other}")
common_features_bmins = [
max(bmin0, bmin1) for bmin0, bmin1 in zip(common_features_bmins_in_self, common_features_bmins_in_other)
]
common_features_bmaxs = [
min(bmax0, bmax1) for bmax0, bmax1 in zip(common_features_bmaxs_in_self, common_features_bmaxs_in_other)
]
features_indexes = [
other.features_indexes[i]
for i in range(len(other.features_indexes))
if i not in common_features_positions_in_other
]
features_names = [f for f in other.features_names if f not in common_features]
bmins = [
other.bmins[i]
for i in range(len(other.bmins))
if i not in common_features_positions_in_other
]
bmaxs = [
other.bmaxs[i]
for i in range(len(other.bmaxs))
if i not in common_features_positions_in_other
]
other_clone = HyperrectangleCondition(
features_indexes=features_indexes,
bmins=bmins,
bmaxs=bmaxs,
features_names=features_names,
)
for i, index in enumerate(common_features_positions_in_self):
self_clone.bmins[index] = common_features_bmins[i]
self_clone.bmaxs[index] = common_features_bmaxs[i]
else:
other_clone = other
args = [i + j for i, j in zip(self_clone.getattr, other_clone.getattr)]
if len(set(args[0])) != len(args[0]):
raise IndexError("Some features with different names had same index in both conditions in __and__:\n "
f"{args}")
to_ret = HyperrectangleCondition(
features_indexes=args[0],
bmins=args[1],
bmaxs=args[2],
features_names=args[3],
empty=False,
)
return to_ret
@property
def getattr(self) -> List[list]:
return [self.features_indexes, self.bmins, self.bmaxs, self.features_names]
@property
def features_names(self) -> List[str]:
return self._features_names
@property
def bmins(self) -> List[Union[int, float]]:
return self._bmins
@property
def bmaxs(self) -> List[Union[int, float]]:
return self._bmaxs
@features_names.setter
def features_names(self, value: Union[List[str], str]):
if isinstance(value, str):
value = ast.literal_eval(value)
if len(set(value)) != len(value):
raise DuplicatedFeatures
if len(self) > 0 and len(self) != len(value):
raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} names")
self._features_names = value
@bmins.setter
def bmins(self, value: Union[List[Union[int, float]], str]):
if isinstance(value, str):
value = [int(v) for v in ast.literal_eval(value)]
if len(self) > 0 and len(self) != len(value):
raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} bmins")
self._bmins = value
if any([a > b for a, b in zip(self.bmins, self.bmaxs)]):
# If a bmin is above its associated bmax, then the rule is impossible.
self.impossible = True
@bmaxs.setter
def bmaxs(self, value: Union[List[Union[int, float]], str]):
if isinstance(value, str):
value = [int(v) for v in ast.literal_eval(value)]
if len(self) > 0 and len(self) != len(value):
raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} bmaxs")
self._bmaxs = value
if any([a > b for a, b in zip(self.bmins, self.bmaxs)]):
# If a bmin is above its associated bmax, then the rule is impossible.
self.impossible = True
def __repr__(self):
return self.__str__()
def __str__(self):
if self._features_names is None:
return "empty condition"
str_output = f"{self._features_names[0]} in [{self._bmins[0]}, {self._bmaxs[0]}]"
if len(self) > 1:
for i in range(1, len(self)):
str_output += " AND "
str_output += f"{self._features_names[i]} in [{self._bmins[i]}, {self._bmaxs[i]}]"
return str_output
def __eq__(self, other):
"""Two HyperrectangleConditions are equal if they talk about the same features, and if the bmins and bmaxs
are the same from one rule to another. Features indexes can be different."""
return self.__hash__() == other.__hash__()
@property
def to_hash(self) -> Tuple[str]:
return ("c",) + tuple(
(self._features_names[i], self._bmins[i], self._bmaxs[i]) for i in range(len(self._features_names))
)
def __hash__(self):
return hash(frozenset(self.to_hash))
def __getitem__(self, item):
return (
self._features_names[item],
self._features_indexes[item],
self._bmins[item],
self._bmaxs[item],
)
def __len__(self) -> int:
"""A HyperrectangleCondition's length is the number of features it talks about"""
if self._features_names is not None:
return len(self._features_names)
if self._features_indexes is not None:
return len(self._features_indexes)
if self._bmins is not None:
return len(self._bmins)
if self._bmaxs is not None:
return len(self._bmaxs)
return 0
def sort(self):
if len(self) > 1:
if self.__class__.SORT_ACCORDING_TO == "index":
self._bmins = [x for _, x in sorted(zip(self._features_indexes, self._bmins))]
self._bmaxs = [x for _, x in sorted(zip(self._features_indexes, self._bmaxs))]
self._features_names = [x for _, x in sorted(zip(self._features_indexes, self._features_names))]
self._features_indexes = sorted(self._features_indexes)
elif self.__class__.SORT_ACCORDING_TO == "name":
self._bmins = [x for _, x in sorted(zip(self._features_names, self._bmins))]
self._bmaxs = [x for _, x in sorted(zip(self._features_names, self._bmaxs))]
self._features_indexes = [x for _, x in sorted(zip(self._features_names, self._features_indexes))]
self._features_names = sorted(self._features_names)
else:
raise ValueError(
"HyperrectangleCondition's SORT_ACCORDING_TO"
f" can be 'index' or 'name', not {self.__class__.SORT_ACCORDING_TO}"
)
def evaluate(self, xs: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
"""
Evaluates where a condition if fullfilled, by returning a vector of the form [0, 1, 0, 0, ...]
Parameters
----------
xs: Union[pd.DataFrame, np.ndarray]
shape (n, d), n number of line, d number of features. If is a pd.DataFrame, will use self.features_names to
select features to use in xs
Returns
-------
activation: np.ndarray
Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not.
Examples
--------
>>> xs_ = np.array([[1, 3], [3, 4], [2, np.nan]])
>>> c1 = HyperrectangleCondition([0], bmins=[1], bmaxs=[2])
>>> c1.evaluate(xs_)
np.array([1, 0, 1])
>>> c2 = HyperrectangleCondition([1], bmins=[3], bmaxs=[5])
>>> c2.evaluate(xs_)
np.array([1, 1, 0])
>>> c3 = HyperrectangleCondition([0, 1], bmins=[1, 3], bmaxs=[2, 5])
>>> c3.evaluate(xs_)
np.array([1, 0, 0])
"""
if self.impossible:
return np.zeros(xs.shape[0], dtype=np.ubyte)
if isinstance(xs, np.ndarray):
if any([i >= xs.shape[1] for i in self.features_indexes]):
raise IndexError("Some features indexes in self are greater than the size of the given xs array")
geq_min = leq_min = not_nan = np.ones(xs.shape[0], dtype=np.ubyte)
for i, j in enumerate(self._features_indexes):
geq_min &= np.greater_equal(xs[:, j], self._bmins[i])
leq_min &= np.less_equal(xs[:, j], self._bmaxs[i])
not_nan &= np.isfinite(xs[:, j])
activation = geq_min & leq_min & not_nan
else:
if any([i not in xs.columns for i in self.features_names]):
raise IndexError("Some features names in self were not in xs DataFrame columns")
geq_min = leq_min = not_nan = np.ones(xs.shape[0], dtype=np.ubyte)
for i, n in enumerate(self._features_names):
geq_min &= np.greater_equal(xs[n], self._bmins[i])
leq_min &= np.less_equal(xs[n], self._bmaxs[i])
not_nan &= np.isfinite(xs[n])
activation = (geq_min & leq_min & not_nan).values
return activation
Classes
class Condition (features_indexes: Optional[List[int]] = None, empty: bool = False)
-
Abstract class for Condition object. Used by Rule objets. A condition is a list of variable (here represented by their indexes in an array) and of conditions on those variables. A condition can be "imossible" to meet, in that case self.impossible is True. This is set automatically.
One can add conditions and use logical AND (&) operations on two conditions (same thing as add). In that case, the two conditions are combined into a new one.
Expand source code
class Condition(ABC): """Abstract class for Condition object. Used by Rule objets. A condition is a list of variable (here represented by their indexes in an array) and of conditions on those variables. A condition can be "imossible" to meet, in that case self.impossible is True. This is set automatically. One can add conditions and use logical AND (&) operations on two conditions (same thing as add). In that case, the two conditions are combined into a new one. """ def __init__(self, features_indexes: Union[List[int], None] = None, empty: bool = False): if empty: self._features_indexes = None else: if features_indexes is None: raise ValueError("Must specify features_indexes") if any([not np.issubdtype(type(a), np.integer) for a in features_indexes]): raise TypeError( f"features_indexes must be integers. You gave {[(f, type(f)) for f in features_indexes]}" ) self._features_indexes = features_indexes if len(set(self._features_indexes)) != len(self._features_indexes): raise DuplicatedFeatures self.impossible = False def sort(self): self._features_indexes = sorted(self._features_indexes) def __and__(self, other: "Condition") -> "Condition": """To be implemented in daughter classes""" pass def __add__(self, other: "Condition") -> "Condition": return NotImplemented("Can not add conditions (seen as 'logical OR'). You can use logical AND however.") @property def to_hash(self): return ("c",) + tuple(self._features_indexes) def __hash__(self): return hash(frozenset(self.to_hash)) @property def features_indexes(self) -> List[int]: return self._features_indexes @features_indexes.setter def features_indexes(self, value: Union[List[int], str]): if isinstance(value, str): value = ast.literal_eval(value) if len(set(value)) != len(value): raise DuplicatedFeatures if len(self) > 0 and len(self) != len(value): raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} indexes") self._features_indexes = value def __len__(self): """A Condition's length is the number of features it talks about""" return len(self._features_indexes) def evaluate(self, xs: Union[pd.DataFrame, np.ndarray]) -> np.ndarray: """ To be implemented in daughter class. Evaluates where a condition if fullfilled. In this abstract class that does not have any acutal condition, it is always fullfilled. Parameters ---------- xs: Union[pd.DataFrame, np.ndarray] shape (n, d), n number of line, d number of features Returns ------- activation: Activation Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not. """ pass def normalize_features_indexes(self): """In some daughter classes, features indexes are optional. however since the attribute 'features_indexes' must be specified, a default value is set automatically""" self._features_indexes = list(range(len(self.features_indexes)))
Ancestors
- abc.ABC
Subclasses
Instance variables
var features_indexes : List[int]
-
Expand source code
@property def features_indexes(self) -> List[int]: return self._features_indexes
var to_hash
-
Expand source code
@property def to_hash(self): return ("c",) + tuple(self._features_indexes)
Methods
def evaluate(self, xs: Union[pandas.core.frame.DataFrame, numpy.ndarray]) ‑> numpy.ndarray
-
To be implemented in daughter class. Evaluates where a condition if fullfilled. In this abstract class that does not have any acutal condition, it is always fullfilled.
Parameters
xs
:Union[pd.DataFrame, np.ndarray]
- shape (n, d), n number of line, d number of features
Returns
activation
:Activation
- Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not.
Expand source code
def evaluate(self, xs: Union[pd.DataFrame, np.ndarray]) -> np.ndarray: """ To be implemented in daughter class. Evaluates where a condition if fullfilled. In this abstract class that does not have any acutal condition, it is always fullfilled. Parameters ---------- xs: Union[pd.DataFrame, np.ndarray] shape (n, d), n number of line, d number of features Returns ------- activation: Activation Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not. """ pass
def normalize_features_indexes(self)
-
In some daughter classes, features indexes are optional. however since the attribute 'features_indexes' must be specified, a default value is set automatically
Expand source code
def normalize_features_indexes(self): """In some daughter classes, features indexes are optional. however since the attribute 'features_indexes' must be specified, a default value is set automatically""" self._features_indexes = list(range(len(self.features_indexes)))
def sort(self)
-
Expand source code
def sort(self): self._features_indexes = sorted(self._features_indexes)
class DuplicatedFeatures (*args, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class DuplicatedFeatures(Exception): pass
Ancestors
- builtins.Exception
- builtins.BaseException
class HyperrectangleCondition (features_indexes: Optional[List[int]] = None, bmins: Optional[List[Union[int, float]]] = None, bmaxs: Optional[List[Union[int, float]]] = None, features_names: Optional[List[str]] = None, empty: bool = False, sort: bool = True)
-
Condition class for Hyper Rectangle conditions.
An Hyper Rectangle condition is a condition where each feature is associated to a min and a max (self.bmins and self.bmaxs). The condition is met when all features are within their respective bmin and bmax.
For example, if the condition has :
features_indexes = [0, 1] bminx = [0, 1] mnaxs = [0, 2]
Then the condition is met when feature 0 is equal to 0 and feature 2 is between 1 and 2.
In this condition, features indexes are optional if features names are given.
Such a condition can be sorted either according to features indexes (smaller features first) or by features names in alphabetical order. This is set through the class attribute SORT_ACCORDING_TO that can be either "index" or "name".
Expand source code
class HyperrectangleCondition(Condition): """Condition class for Hyper Rectangle conditions. An Hyper Rectangle condition is a condition where each feature is associated to a min and a max (self.bmins and self.bmaxs). The condition is met when all features are within their respective bmin and bmax. For example, if the condition has : features_indexes = [0, 1] bminx = [0, 1] mnaxs = [0, 2] Then the condition is met when feature 0 is equal to 0 and feature 2 is between 1 and 2. In this condition, features indexes are optional if features names are given. Such a condition can be sorted either according to features indexes (smaller features first) or by features names in alphabetical order. This is set through the class attribute SORT_ACCORDING_TO that can be either "index" or "name". """ SORT_ACCORDING_TO = "index" def __init__( self, features_indexes: Union[List[int], None] = None, bmins: Union[List[Union[int, float]], None] = None, bmaxs: Union[List[Union[int, float]], None] = None, features_names: Union[List[str], None] = None, empty: bool = False, sort: bool = True, ): if empty: super().__init__(empty=True) self._bmins = None self._bmaxs = None self._features_names = None else: if bmins is None: raise ValueError("bmins can not be None if 'empty' is False") if bmaxs is None: raise ValueError("bmaxs can not be None if 'empty' is False") if features_indexes is None: if features_names is None: raise ValueError("Must specify at least one of features_indexes and features_names") features_indexes = list(range(len(features_names))) length = len(features_indexes) if len(bmaxs) != length: raise ValueError(f"Specifed {length} features but {len(bmaxs)} bmaxs") if len(bmins) != length: raise ValueError(f"Specifed {length} features but {len(bmins)} bmins") if features_names is not None and len(features_names) != length: raise ValueError(f"Specifed {length} features but {len(features_names)} bmaxs") if features_names is not None and any([not isinstance(a, str) for a in features_names]): raise TypeError(f"Names must be strings. You gave {[(f, type(f)) for f in features_names]}") if any([not isinstance(a, Number) for a in bmins]): raise TypeError(f"bmins must be integers or floats. You gave {[(f, type(f)) for f in bmins]}") if any([not isinstance(a, Number) for a in bmaxs]): raise TypeError(f"bmaxs must be integers or floats. You gave {[(f, type(f)) for f in bmaxs]}") super().__init__(features_indexes) if any([a > b for a, b in zip(bmins, bmaxs)]): # If a bmin is above its associated bmax, then the rule is impossible. self.impossible = True self._bmins = bmins self._bmaxs = bmaxs if features_names is not None: self._features_names = features_names else: self._features_names = ["X_" + str(i) for i in self._features_indexes] if len(set(self._features_names)) != len(self._features_names): raise DuplicatedFeatures if sort: self.sort() def __and__(self, other: "HyperrectangleCondition") -> "HyperrectangleCondition": """Logical and (&) or two HyperrectangleCondition objects If the two conditions do not talk about the same features, then the AND is obvious. For common features, the bmin of the feature in the new condition is set to be the greatest of bmins in parent conditions, and the bmax the smallest of bmaxs. This can give impossible conditions : if in condition 1 feature A must be between 0 and 10 and in condition 2 it must be between 20 and 30, then in the new condition it must be between 20 and 10, assumin 20 being the minimum. In that case, the new condition, upon creation, will have self.impossible = True. This does not corrupt the object nor the code : the condition's method "evaluate", which returns the activation vector, will return a vector with only zeros since the condition will never be met. """ self_clone = HyperrectangleCondition( features_indexes=copy(self.features_indexes), bmins=copy(self.bmins), bmaxs=copy(self.bmaxs), features_names=copy(self.features_names), ) common_features = [f for f in self_clone.features_names if f in other.features_names] if len(common_features) > 0: # If the two conditions have features in common, the new conditons will have as range the intersection of # each condition's range for those features. The new condition can possibly never be met. common_features_positions_in_self = [self_clone.features_names.index(f) for f in common_features] common_features_positions_in_other = [other.features_names.index(f) for f in common_features] (common_features_indexes_in_self, common_features_bmins_in_self, common_features_bmaxs_in_self) = list(zip( *[ ( self_clone.features_indexes[i], self_clone.bmins[i], self_clone.bmaxs[i] ) for i in common_features_positions_in_self ] )) (common_features_indexes_in_other, common_features_bmins_in_other, common_features_bmaxs_in_other) = list( zip( *[ ( other.features_indexes[i], other.bmins[i], other.bmaxs[i] ) for i in common_features_positions_in_other ] ) ) if common_features_indexes_in_self != common_features_indexes_in_other: raise IndexError("Some features present in both conditions in __and__ have different indexes : \n " f"{common_features_indexes_in_self}\n " f"{common_features_indexes_in_other}") common_features_bmins = [ max(bmin0, bmin1) for bmin0, bmin1 in zip(common_features_bmins_in_self, common_features_bmins_in_other) ] common_features_bmaxs = [ min(bmax0, bmax1) for bmax0, bmax1 in zip(common_features_bmaxs_in_self, common_features_bmaxs_in_other) ] features_indexes = [ other.features_indexes[i] for i in range(len(other.features_indexes)) if i not in common_features_positions_in_other ] features_names = [f for f in other.features_names if f not in common_features] bmins = [ other.bmins[i] for i in range(len(other.bmins)) if i not in common_features_positions_in_other ] bmaxs = [ other.bmaxs[i] for i in range(len(other.bmaxs)) if i not in common_features_positions_in_other ] other_clone = HyperrectangleCondition( features_indexes=features_indexes, bmins=bmins, bmaxs=bmaxs, features_names=features_names, ) for i, index in enumerate(common_features_positions_in_self): self_clone.bmins[index] = common_features_bmins[i] self_clone.bmaxs[index] = common_features_bmaxs[i] else: other_clone = other args = [i + j for i, j in zip(self_clone.getattr, other_clone.getattr)] if len(set(args[0])) != len(args[0]): raise IndexError("Some features with different names had same index in both conditions in __and__:\n " f"{args}") to_ret = HyperrectangleCondition( features_indexes=args[0], bmins=args[1], bmaxs=args[2], features_names=args[3], empty=False, ) return to_ret @property def getattr(self) -> List[list]: return [self.features_indexes, self.bmins, self.bmaxs, self.features_names] @property def features_names(self) -> List[str]: return self._features_names @property def bmins(self) -> List[Union[int, float]]: return self._bmins @property def bmaxs(self) -> List[Union[int, float]]: return self._bmaxs @features_names.setter def features_names(self, value: Union[List[str], str]): if isinstance(value, str): value = ast.literal_eval(value) if len(set(value)) != len(value): raise DuplicatedFeatures if len(self) > 0 and len(self) != len(value): raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} names") self._features_names = value @bmins.setter def bmins(self, value: Union[List[Union[int, float]], str]): if isinstance(value, str): value = [int(v) for v in ast.literal_eval(value)] if len(self) > 0 and len(self) != len(value): raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} bmins") self._bmins = value if any([a > b for a, b in zip(self.bmins, self.bmaxs)]): # If a bmin is above its associated bmax, then the rule is impossible. self.impossible = True @bmaxs.setter def bmaxs(self, value: Union[List[Union[int, float]], str]): if isinstance(value, str): value = [int(v) for v in ast.literal_eval(value)] if len(self) > 0 and len(self) != len(value): raise IndexError(f"Condition has {len(self)} features but you gave {len(value)} bmaxs") self._bmaxs = value if any([a > b for a, b in zip(self.bmins, self.bmaxs)]): # If a bmin is above its associated bmax, then the rule is impossible. self.impossible = True def __repr__(self): return self.__str__() def __str__(self): if self._features_names is None: return "empty condition" str_output = f"{self._features_names[0]} in [{self._bmins[0]}, {self._bmaxs[0]}]" if len(self) > 1: for i in range(1, len(self)): str_output += " AND " str_output += f"{self._features_names[i]} in [{self._bmins[i]}, {self._bmaxs[i]}]" return str_output def __eq__(self, other): """Two HyperrectangleConditions are equal if they talk about the same features, and if the bmins and bmaxs are the same from one rule to another. Features indexes can be different.""" return self.__hash__() == other.__hash__() @property def to_hash(self) -> Tuple[str]: return ("c",) + tuple( (self._features_names[i], self._bmins[i], self._bmaxs[i]) for i in range(len(self._features_names)) ) def __hash__(self): return hash(frozenset(self.to_hash)) def __getitem__(self, item): return ( self._features_names[item], self._features_indexes[item], self._bmins[item], self._bmaxs[item], ) def __len__(self) -> int: """A HyperrectangleCondition's length is the number of features it talks about""" if self._features_names is not None: return len(self._features_names) if self._features_indexes is not None: return len(self._features_indexes) if self._bmins is not None: return len(self._bmins) if self._bmaxs is not None: return len(self._bmaxs) return 0 def sort(self): if len(self) > 1: if self.__class__.SORT_ACCORDING_TO == "index": self._bmins = [x for _, x in sorted(zip(self._features_indexes, self._bmins))] self._bmaxs = [x for _, x in sorted(zip(self._features_indexes, self._bmaxs))] self._features_names = [x for _, x in sorted(zip(self._features_indexes, self._features_names))] self._features_indexes = sorted(self._features_indexes) elif self.__class__.SORT_ACCORDING_TO == "name": self._bmins = [x for _, x in sorted(zip(self._features_names, self._bmins))] self._bmaxs = [x for _, x in sorted(zip(self._features_names, self._bmaxs))] self._features_indexes = [x for _, x in sorted(zip(self._features_names, self._features_indexes))] self._features_names = sorted(self._features_names) else: raise ValueError( "HyperrectangleCondition's SORT_ACCORDING_TO" f" can be 'index' or 'name', not {self.__class__.SORT_ACCORDING_TO}" ) def evaluate(self, xs: Union[pd.DataFrame, np.ndarray]) -> np.ndarray: """ Evaluates where a condition if fullfilled, by returning a vector of the form [0, 1, 0, 0, ...] Parameters ---------- xs: Union[pd.DataFrame, np.ndarray] shape (n, d), n number of line, d number of features. If is a pd.DataFrame, will use self.features_names to select features to use in xs Returns ------- activation: np.ndarray Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not. Examples -------- >>> xs_ = np.array([[1, 3], [3, 4], [2, np.nan]]) >>> c1 = HyperrectangleCondition([0], bmins=[1], bmaxs=[2]) >>> c1.evaluate(xs_) np.array([1, 0, 1]) >>> c2 = HyperrectangleCondition([1], bmins=[3], bmaxs=[5]) >>> c2.evaluate(xs_) np.array([1, 1, 0]) >>> c3 = HyperrectangleCondition([0, 1], bmins=[1, 3], bmaxs=[2, 5]) >>> c3.evaluate(xs_) np.array([1, 0, 0]) """ if self.impossible: return np.zeros(xs.shape[0], dtype=np.ubyte) if isinstance(xs, np.ndarray): if any([i >= xs.shape[1] for i in self.features_indexes]): raise IndexError("Some features indexes in self are greater than the size of the given xs array") geq_min = leq_min = not_nan = np.ones(xs.shape[0], dtype=np.ubyte) for i, j in enumerate(self._features_indexes): geq_min &= np.greater_equal(xs[:, j], self._bmins[i]) leq_min &= np.less_equal(xs[:, j], self._bmaxs[i]) not_nan &= np.isfinite(xs[:, j]) activation = geq_min & leq_min & not_nan else: if any([i not in xs.columns for i in self.features_names]): raise IndexError("Some features names in self were not in xs DataFrame columns") geq_min = leq_min = not_nan = np.ones(xs.shape[0], dtype=np.ubyte) for i, n in enumerate(self._features_names): geq_min &= np.greater_equal(xs[n], self._bmins[i]) leq_min &= np.less_equal(xs[n], self._bmaxs[i]) not_nan &= np.isfinite(xs[n]) activation = (geq_min & leq_min & not_nan).values return activation
Ancestors
- Condition
- abc.ABC
Class variables
var SORT_ACCORDING_TO
Instance variables
var bmaxs : List[Union[int, float]]
-
Expand source code
@property def bmaxs(self) -> List[Union[int, float]]: return self._bmaxs
var bmins : List[Union[int, float]]
-
Expand source code
@property def bmins(self) -> List[Union[int, float]]: return self._bmins
var features_names : List[str]
-
Expand source code
@property def features_names(self) -> List[str]: return self._features_names
var getattr : List[list]
-
Expand source code
@property def getattr(self) -> List[list]: return [self.features_indexes, self.bmins, self.bmaxs, self.features_names]
var to_hash : Tuple[str]
-
Expand source code
@property def to_hash(self) -> Tuple[str]: return ("c",) + tuple( (self._features_names[i], self._bmins[i], self._bmaxs[i]) for i in range(len(self._features_names)) )
Methods
def evaluate(self, xs: Union[pandas.core.frame.DataFrame, numpy.ndarray]) ‑> numpy.ndarray
-
Evaluates where a condition if fullfilled, by returning a vector of the form [0, 1, 0, 0, …]
Parameters
xs
:Union[pd.DataFrame, np.ndarray]
- shape (n, d), n number of line, d number of features. If is a pd.DataFrame, will use self.features_names to select features to use in xs
Returns
activation
:np.ndarray
- Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not.
Examples
>>> xs_ = np.array([[1, 3], [3, 4], [2, np.nan]]) >>> c1 = HyperrectangleCondition([0], bmins=[1], bmaxs=[2]) >>> c1.evaluate(xs_) np.array([1, 0, 1]) >>> c2 = HyperrectangleCondition([1], bmins=[3], bmaxs=[5]) >>> c2.evaluate(xs_) np.array([1, 1, 0]) >>> c3 = HyperrectangleCondition([0, 1], bmins=[1, 3], bmaxs=[2, 5]) >>> c3.evaluate(xs_) np.array([1, 0, 0])
Expand source code
def evaluate(self, xs: Union[pd.DataFrame, np.ndarray]) -> np.ndarray: """ Evaluates where a condition if fullfilled, by returning a vector of the form [0, 1, 0, 0, ...] Parameters ---------- xs: Union[pd.DataFrame, np.ndarray] shape (n, d), n number of line, d number of features. If is a pd.DataFrame, will use self.features_names to select features to use in xs Returns ------- activation: np.ndarray Shape (n, 1). The activation vector, filled with 1 where the condition is met and 0 where it is not. Examples -------- >>> xs_ = np.array([[1, 3], [3, 4], [2, np.nan]]) >>> c1 = HyperrectangleCondition([0], bmins=[1], bmaxs=[2]) >>> c1.evaluate(xs_) np.array([1, 0, 1]) >>> c2 = HyperrectangleCondition([1], bmins=[3], bmaxs=[5]) >>> c2.evaluate(xs_) np.array([1, 1, 0]) >>> c3 = HyperrectangleCondition([0, 1], bmins=[1, 3], bmaxs=[2, 5]) >>> c3.evaluate(xs_) np.array([1, 0, 0]) """ if self.impossible: return np.zeros(xs.shape[0], dtype=np.ubyte) if isinstance(xs, np.ndarray): if any([i >= xs.shape[1] for i in self.features_indexes]): raise IndexError("Some features indexes in self are greater than the size of the given xs array") geq_min = leq_min = not_nan = np.ones(xs.shape[0], dtype=np.ubyte) for i, j in enumerate(self._features_indexes): geq_min &= np.greater_equal(xs[:, j], self._bmins[i]) leq_min &= np.less_equal(xs[:, j], self._bmaxs[i]) not_nan &= np.isfinite(xs[:, j]) activation = geq_min & leq_min & not_nan else: if any([i not in xs.columns for i in self.features_names]): raise IndexError("Some features names in self were not in xs DataFrame columns") geq_min = leq_min = not_nan = np.ones(xs.shape[0], dtype=np.ubyte) for i, n in enumerate(self._features_names): geq_min &= np.greater_equal(xs[n], self._bmins[i]) leq_min &= np.less_equal(xs[n], self._bmaxs[i]) not_nan &= np.isfinite(xs[n]) activation = (geq_min & leq_min & not_nan).values return activation
def sort(self)
-
Expand source code
def sort(self): if len(self) > 1: if self.__class__.SORT_ACCORDING_TO == "index": self._bmins = [x for _, x in sorted(zip(self._features_indexes, self._bmins))] self._bmaxs = [x for _, x in sorted(zip(self._features_indexes, self._bmaxs))] self._features_names = [x for _, x in sorted(zip(self._features_indexes, self._features_names))] self._features_indexes = sorted(self._features_indexes) elif self.__class__.SORT_ACCORDING_TO == "name": self._bmins = [x for _, x in sorted(zip(self._features_names, self._bmins))] self._bmaxs = [x for _, x in sorted(zip(self._features_names, self._bmaxs))] self._features_indexes = [x for _, x in sorted(zip(self._features_names, self._features_indexes))] self._features_names = sorted(self._features_names) else: raise ValueError( "HyperrectangleCondition's SORT_ACCORDING_TO" f" can be 'index' or 'name', not {self.__class__.SORT_ACCORDING_TO}" )
Inherited members