Ds LAB Preprocessing

PHOTO EMBED

Mon Jul 29 2024 03:55:08 GMT+0000 (Coordinated Universal Time)

Saved by @signup

import pandas as pd
from sklearn.preprocessing import LabelEncoder

data={
    'Car':['Toyota','Ford','BMW','Audi','Toyota'],
}
df=pd.DataFrame(data)

label_encoder=LabelEncoder()
df['Car_Label']= label_encoder.fit_transform(df['Car'])
print(df)
***********************************
  import pandas as pd
from sklearn.preprocessing import LabelEncoder

data={
    'Student':['Alica','Bob','Charlie','David','Eva'],
    'Score':[85,67,90,45,76],
    'Pass/Fail':['Pass','Pass','Pass','Fail','Pass']
}
df=pd.DataFrame(data)
print(df)

********************************
  #label encoding for the pass/fail column
label_encoder=LabelEncoder()
df['Pass/Fail_label']= label_encoder.fit_transform(df['Pass/Fail'])
print('data frame after encoding:')
print(df)
*******************************************
  import pandas as pd
from sklearn.preprocessing import OneHotEncoder

data={'City':['New York','Los Angels','Chicago','Houston','Phoenix']}
df=pd.DataFrame(data)
print(df)
*************************************
  one_hot_encoder=OneHotEncoder(sparse=False)
one_hot_encoded=one_hot_encoder.fit_transform(df[['City']])

one_hot_df=pd.DataFrame(one_hot_encoded,columns=one_hot_encoder.get_feature_names_out(['City']))
df=pd.concat([df,one_hot_df],axis=1)

print(df)
*****************************************************************
  O_h_e=OneHotEncoder(sparse=False)
One_h_e=O_h_e.fit_transform(df[['City']])
o_h_df=pd.DataFrame(One_h_e,columns=O_h_e.get_feature_names_out(['City']))
pd=pd.concat([df,o_h_df],axis=1)
print(pd)
************************************************************
  import pandas as pd
from sklearn.preprocessing import OneHotEncoder
data={
    'name':['Alice','Bob','Charlie','David','Eve'],
    'gender':['Female','Male','Male','Male','Female'],
    'age':[24,30,22,35,28]
}
df=pd.DataFrame(data)
print(df)
O_h_e=OneHotEncoder(sparse=False)
One_h_e=O_h_e.fit_transform(df[['gender']])
o_h_df=pd.DataFrame(One_h_e,columns=O_h_e.get_feature_names_out(['gender']))
pd=pd.concat([df,o_h_df],axis=1)
print(pd)
****************************************************
  import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
data={
    'Student':['Alice','Bob','Charlie','David','Eve'],
    'Grade':['A','B','A','C','B']
}
df=pd.DataFrame(data)
df
a=OrdinalEncoder(categories=[['C','B','A']])
df['Grade_Ordinal']=a.fit_transform(df[['Grade']])
df
************************************************
%matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
data=pd.read_csv("/content/Data.csv")
data.head()
****************
  data.shape
df=pd.DataFrame(data)
df

X=df.iloc[:,:-1].values
Y=df.iloc[:,-1].values
print(X)
********************************************
  df2=df.copy()
df2.fillna(df2["Age"].mean(),inplace=True)
df2.fillna(df2["Salary"].mean(),inplace=True)
print(df2.isnull().sum())
*****************************************
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])],remainder='passthrough')
x=np.array(ct.fit_transform(x))
print(x)
*****************************
  from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
y=le.fit_transform(y)
y
*************************************
  from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1)
print(x_train)
content_copyCOPY