Ds LAB Preprocessing
Mon Jul 29 2024 03:55:08 GMT+0000 (Coordinated Universal Time)
Saved by @signup
import pandas as pd from sklearn.preprocessing import LabelEncoder data={ 'Car':['Toyota','Ford','BMW','Audi','Toyota'], } df=pd.DataFrame(data) label_encoder=LabelEncoder() df['Car_Label']= label_encoder.fit_transform(df['Car']) print(df) *********************************** import pandas as pd from sklearn.preprocessing import LabelEncoder data={ 'Student':['Alica','Bob','Charlie','David','Eva'], 'Score':[85,67,90,45,76], 'Pass/Fail':['Pass','Pass','Pass','Fail','Pass'] } df=pd.DataFrame(data) print(df) ******************************** #label encoding for the pass/fail column label_encoder=LabelEncoder() df['Pass/Fail_label']= label_encoder.fit_transform(df['Pass/Fail']) print('data frame after encoding:') print(df) ******************************************* import pandas as pd from sklearn.preprocessing import OneHotEncoder data={'City':['New York','Los Angels','Chicago','Houston','Phoenix']} df=pd.DataFrame(data) print(df) ************************************* one_hot_encoder=OneHotEncoder(sparse=False) one_hot_encoded=one_hot_encoder.fit_transform(df[['City']]) one_hot_df=pd.DataFrame(one_hot_encoded,columns=one_hot_encoder.get_feature_names_out(['City'])) df=pd.concat([df,one_hot_df],axis=1) print(df) ***************************************************************** O_h_e=OneHotEncoder(sparse=False) One_h_e=O_h_e.fit_transform(df[['City']]) o_h_df=pd.DataFrame(One_h_e,columns=O_h_e.get_feature_names_out(['City'])) pd=pd.concat([df,o_h_df],axis=1) print(pd) ************************************************************ import pandas as pd from sklearn.preprocessing import OneHotEncoder data={ 'name':['Alice','Bob','Charlie','David','Eve'], 'gender':['Female','Male','Male','Male','Female'], 'age':[24,30,22,35,28] } df=pd.DataFrame(data) print(df) O_h_e=OneHotEncoder(sparse=False) One_h_e=O_h_e.fit_transform(df[['gender']]) o_h_df=pd.DataFrame(One_h_e,columns=O_h_e.get_feature_names_out(['gender'])) pd=pd.concat([df,o_h_df],axis=1) print(pd) **************************************************** import pandas as pd from sklearn.preprocessing import OrdinalEncoder data={ 'Student':['Alice','Bob','Charlie','David','Eve'], 'Grade':['A','B','A','C','B'] } df=pd.DataFrame(data) df a=OrdinalEncoder(categories=[['C','B','A']]) df['Grade_Ordinal']=a.fit_transform(df[['Grade']]) df ************************************************ %matplotlib import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import sklearn data=pd.read_csv("/content/Data.csv") data.head() **************** data.shape df=pd.DataFrame(data) df X=df.iloc[:,:-1].values Y=df.iloc[:,-1].values print(X) ******************************************** df2=df.copy() df2.fillna(df2["Age"].mean(),inplace=True) df2.fillna(df2["Salary"].mean(),inplace=True) print(df2.isnull().sum()) ***************************************** from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])],remainder='passthrough') x=np.array(ct.fit_transform(x)) print(x) ***************************** from sklearn.preprocessing import LabelEncoder le=LabelEncoder() y=le.fit_transform(y) y ************************************* from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1) print(x_train)
Comments