adding_column_,index_label_encode_one_hot_encode_bar_chart EXTERNAL

PHOTO EMBED

Tue Nov 19 2024 04:47:25 GMT+0000 (Coordinated Universal Time)

Saved by @login123

# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
import matplotlib.pyplot as plt

# Step 1: Create the DataFrame
data = {
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah', 'Ian', 'Jane'],
    'score': [85, 92, 88, 74, 95, 67, 78, 81, 89, 90],
    'sport': ['Basketball', 'Soccer', 'Tennis', 'Cricket', 'Baseball', 'Swimming', 'Soccer', 'Basketball', 'Tennis', 'Cricket'],
    'sex': ['F', 'M', 'M', 'M', 'F', 'M', 'F', 'F', 'M', 'F']
}

df = pd.DataFrame(data)
print("Original DataFrame:\n", df)

# Step 2: Add extra columns (gender and age)
df['gender'] = df['sex'].map({'F': 'Female', 'M': 'Male'})  # Map 'F' to 'Female' and 'M' to 'Male'
df['age'] = [20, 22, 19, 21, 23, 18, 22, 20, 24, 21]  # Adding age column
print("\nDataFrame after adding gender and age columns:\n", df)

# Step 3: Create custom index
df.index = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
print("\nDataFrame with custom index:\n", df)

# Step 4: Apply Label Encoding on 'gender' column
label_encoder = LabelEncoder()
df['gender_encoded'] = label_encoder.fit_transform(df['gender'])
print("\nDataFrame after Label Encoding:\n", df)

# Step 5: Apply One Hot Encoding using OneHotEncoder on 'sport' column
one_hot_encoder = OneHotEncoder(sparse_output=False)  # Use sparse=False to get a dense array
sport_encoded = one_hot_encoder.fit_transform(df[['sport']])  # Fit and transform 'sport' column

# Create a DataFrame for the encoded data
sport_encoded_df = pd.DataFrame(sport_encoded, columns=one_hot_encoder.get_feature_names_out(['sport']), index=df.index)

# Combine with the original DataFrame
df = pd.concat([df, sport_encoded_df], axis=1)
print("\nDataFrame after One Hot Encoding using OneHotEncoder:\n", df)

# Step 6: Bar Plot for Categorical Data
# Count the occurrences of each sport
sport_counts = df['sport'].value_counts()

# Plot the bar chart
plt.figure(figsize=(8, 6))
sport_counts.plot(kind='bar', color='skyblue')
plt.title('Frequency of Sports Participation')
plt.xlabel('Sport')
plt.ylabel('Frequency')


plt.show()
content_copyCOPY