[docs]classMeanNormalScaler(BaseEstimator,TransformerMixin):"""Custom Scikit-learn transformer for mean normalization. MeanNormalization involves subtracting the mean of each feature from the feature values and then dividing by the range (maximum value minus minimum value) of that feature. The transformation is given by: X_scaled = (X - X.mean()) / (X.max() - X.min()) """def__init__(self:object,copy:bool=True):self.copy=copyself.mean_=Noneself.scale_=None
[docs]deffit(self:object,X:pd.DataFrame,y:Optional[pd.DataFrame]=None)->object:""" Compute the mean and range (max - min) for each feature. Parameters ---------- X : pd.DataFrame The input dataframe where each column represents a feature. y : pd.DataFrame, optional (default: None) Ignored. Returns ------- self : object Fitted transformer. """self.mean_=np.mean(X,axis=0)self.scale_=np.std(X,axis=0)returnself
[docs]deftransform(self:object,X:pd.DataFrame,y:Optional[pd.DataFrame]=None,copy:bool=None)->np.ndarray:""" Apply mean normalization to the data. Parameters ---------- X : pd.DataFrame The input dataframe where each column represents a feature. y : pd.DataFrame, optional (default: None) Ignored. copy : bool, optional (default: None) Copy the input X or not. Returns ------- X_tr : np.ndarray The normalized data. """copy=copyifcopyisnotNoneelseself.copyX=Xifnotself.copyelseX.copy()return(X-self.mean_)/self.scale_
[docs]definverse_transform(self:object,X:pd.DataFrame)->np.ndarray:""" Reverse the mean normalization transformation. Parameters ---------- X : pd.DataFrame The input dataframe where each column represents a feature. Returns ------- X_tr : np.ndarray The original data. """X=Xifnotself.copyelseX.copy()returnX*self.scale_+self.mean_
[docs]deffeature_scaler(X:pd.DataFrame,method:List[str],method_idx:int)->tuple[dict,np.ndarray]:"""Apply feature scaling methods. Parameters ---------- X : pd.DataFrame The dataset. method : str The feature scaling methods. method_idx : int The index of methods. Returns ------- feature_scaling_config : dict The feature scaling configuration. X_scaled : np.ndarray The dataset after imputing. """ifmethod[method_idx]=="Min-max Scaling":scaler=MinMaxScaler()elifmethod[method_idx]=="Standardization":scaler=StandardScaler()elifmethod[method_idx]=="Mean Normalization":scaler=MeanNormalScaler()try:X_scaled=scaler.fit_transform(X)exceptValueError:print("The selected feature scaling method is not applicable to the dataset!")print("Please check the dataset to find the reason.")feature_scaling_config={type(scaler).__name__:scaler.get_params()}returnfeature_scaling_config,X_scaled
[docs]deffeature_selector(X:pd.DataFrame,y:pd.DataFrame,feature_selection_task:int,method:List[str],method_idx:int)->tuple[dict,pd.DataFrame]:"""Apply feature selection methods. Parameters ---------- X : pd.DataFrame The feature dataset. y : pd.DataFrame The label dataset. feature_selection_task : int Feature selection for regression or classification tasks. method : str The feature selection methods. method_idx : int The index of methods. Returns ------- feature_selection_config : dict The feature selection configuration. X_selected : pd.DataFrame The feature dataset after selecting. """print("-- Original Features --")show_data_columns(X.columns)features_num=len(X.columns)print(f"The original number of features is {features_num}, and your input must be less than {features_num}.")features_retain_num=int(input("Please enter the number of features to retain.\n""@input: "))iffeature_selection_task==1:score_func=f_regressioneliffeature_selection_task==2:score_func=f_classififmethod[method_idx]=="Generic Univariate Select":selector=GenericUnivariateSelect(score_func=score_func,mode="k_best",param=features_retain_num)elifmethod[method_idx]=="Select K Best":selector=SelectKBest(score_func=score_func,k=features_retain_num)try:selector.fit(X,y)features_selected=selector.get_feature_names_out()X=X[features_selected]exceptValueError:print("The selected feature selection method is not applicable to the dataset!")print("Please check the dataset to find the reason.")feature_selection_config={type(selector).__name__:selector.get_params()}returnfeature_selection_config,X