Source code for tablemage._src.ml.predict.regression.linear

from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    HuberRegressor,
    RANSACRegressor,
)
from typing import Mapping, Literal, Iterable
from .base import BaseR, HyperparameterSearcher
from ....feature_selection import BaseFSR
from optuna.distributions import (
    FloatDistribution,
    CategoricalDistribution,
    BaseDistribution,
)


[docs] class LinearR(BaseR): """Linear regression. Hyperparameter optimization is performed automatically during training. The hyperparameter search process can be modified by the user. """
[docs] def __init__( self, type: Literal["ols", "l1", "l2", "elasticnet"] = "ols", hyperparam_search_method: Literal["optuna", "grid"] | None = None, hyperparam_search_space: ( Mapping[str, Iterable | BaseDistribution] | None ) = None, feature_selectors: list[BaseFSR] | None = None, max_n_features: int | None = None, model_random_state: int = 42, name: str | None = None, **kwargs, ): """ Initializes a LinearR object. Parameters ---------- type : Literal['ols', 'l1', 'l2', 'elasticnet'] Default: 'ols'. The type of linear regression to be used. hyperparam_search_method : Literal[None, 'grid', 'optuna'] Default: None. If None, a model-specific default hyperparameter search is conducted. hyperparam_search_space : Mapping[str, Iterable | BaseDistribution] Default: None. If None, a model-specific default hyperparameter search is conducted. feature_selectors : list[BaseFSC] Default: None. If not None, specifies the feature selectors for the VotingSelectionReport. max_n_features : int | None Default: None. Only useful if feature_selectors is not None. If None, then all features with at least 50% support are selected. model_random_state : int Default: 42. Random seed for the model. name : str Default: None. Determines how the model shows up in the reports. If None, the name is set to be the class name. **kwargs : dict Key word arguments are passed directly into the intialization of the HyperparameterSearcher class. See below for options. inner_cv : int | BaseCrossValidator Default: 5. Number of inner cross validation folds. Inner cross validation is used for hyperparameter optimization. inner_cv_seed : int Default: 42. Random seed for inner cross validation. n_jobs : int Default: 1. Number of parallel jobs to run. verbose : int Default: 0. Sets the sklearn verbosity level for the sklearn estimator. 2 is the most verbose. n_trials : int Default: 100. Number of trials for hyperparameter optimization. Only used if hyperparam_search_method is 'optuna'. """ super().__init__() self._dropfirst = True # we want to drop first for linear models self._feature_selectors = feature_selectors self._max_n_features = max_n_features self._type = type if name is None: self._name = f"LinearR({self._type})" else: self._name = name if type == "ols": self._best_estimator = LinearRegression() if (hyperparam_search_method is None) or (hyperparam_search_space is None): hyperparam_search_method = None hyperparam_search_space = None elif type == "l1": self._best_estimator = Lasso( selection="random", random_state=model_random_state ) if (hyperparam_search_method is None) or (hyperparam_search_space is None): hyperparam_search_method = "optuna" hyperparam_search_space = { "alpha": FloatDistribution(1e-5, 1e1, log=True) } elif type == "l2": self._best_estimator = Ridge(random_state=model_random_state) if (hyperparam_search_method is None) or (hyperparam_search_space is None): hyperparam_search_method = "optuna" hyperparam_search_space = { "alpha": FloatDistribution(1e-5, 1e1, log=True) } elif type == "elasticnet": self._best_estimator = ElasticNet( selection="random", random_state=model_random_state ) if (hyperparam_search_method is None) or (hyperparam_search_space is None): hyperparam_search_method = "optuna" hyperparam_search_space = { "alpha": FloatDistribution(1e-5, 1e1, log=True), "l1_ratio": FloatDistribution(0.0, 1.0), } else: raise ValueError(f"Invalid value for type: {type}.") self._hyperparam_searcher = HyperparameterSearcher( estimator=self._best_estimator, method=hyperparam_search_method, hyperparam_grid=hyperparam_search_space, estimator_name=self._name, **kwargs, ) self._validate_inputs()
[docs] class RobustLinearR(BaseR): """Robust linear regressor. Hyperparameter optimization is performed automatically during training. The hyperparameter search process can be modified by the user. """
[docs] def __init__( self, type: Literal["huber", "ransac"] = "huber", hyperparam_search_method: Literal["optuna", "grid"] | None = None, hyperparam_search_space: ( Mapping[str, Iterable | BaseDistribution] | None ) = None, feature_selectors: list[BaseFSR] | None = None, max_n_features: int | None = None, model_random_state: int = 42, name: str | None = None, **kwargs, ): """ Initializes a RobustLinearR object. Parameters ---------- type : Literal['huber', 'ransac'] Default: 'huber'. hyperparam_search_method : Literal[None, 'grid', 'optuna'] Default: None. If None, a model-specific default hyperparameter search is conducted. hyperparam_search_space : Mapping[str, Iterable | BaseDistribution] Default: None. If None, a model-specific default hyperparameter search is conducted. feature_selectors : list[BaseFSC] Default: None. If not None, specifies the feature selectors for the VotingSelectionReport. max_n_features : int | None Default: None. Only useful if feature_selectors is not None. If None, then all features with at least 50% support are selected. model_random_state : int Default: 42. Random seed for the model. name : str Default: None. Determines how the model shows up in the reports. If None, the name is set to be the class name. **kwargs : dict Key word arguments are passed directly into the intialization of the HyperparameterSearcher class. See below for options. inner_cv : int | BaseCrossValidator Default: 5. Number of inner cross validation folds. Inner cross validation is used for hyperparameter optimization. inner_cv_seed : int Default: 42. Random seed for inner cross validation. n_jobs : int Default: 1. Number of parallel jobs to run. verbose : int Default: 0. Sets the sklearn verbosity level for the sklearn estimator. 2 is the most verbose. n_trials : int Default: 100. Number of trials for hyperparameter optimization. Only used if hyperparam_search_method is 'optuna'. """ super().__init__() self._dropfirst = True self._feature_selectors = feature_selectors self._max_n_features = max_n_features self._type = type if name is None: self._name = f"RobustLinearR({type})" else: self._name = name if type == "huber": self._best_estimator = HuberRegressor() if (hyperparam_search_method is None) or (hyperparam_search_space is None): hyperparam_search_method = "optuna" hyperparam_search_space = { "epsilon": FloatDistribution(1.0, 2.0), "alpha": FloatDistribution(1e-5, 1e1, log=True), } elif type == "ransac": self._best_estimator = RANSACRegressor(random_state=model_random_state) if (hyperparam_search_method is None) or (hyperparam_search_space is None): hyperparam_search_method = "optuna" hyperparam_search_space = { "min_samples": FloatDistribution(0.1, 0.9), "residual_threshold": FloatDistribution(1.0, 10.0), "max_trials": CategoricalDistribution([100, 500, 1000]), } else: raise ValueError(f"Invalid value for type: {type}.") self._hyperparam_searcher = HyperparameterSearcher( estimator=self._best_estimator, method=hyperparam_search_method, hyperparam_grid=hyperparam_search_space, estimator_name=self._name, **kwargs, ) self._validate_inputs()