# Import necessary libraries import pandas as pd from sklearn.preprocessing import LabelEncoder, OneHotEncoder from sklearn.compose import ColumnTransformer import matplotlib.pyplot as plt # Step 1: Create the DataFrame data = { 'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah', 'Ian', 'Jane'], 'score': [85, 92, 88, 74, 95, 67, 78, 81, 89, 90], 'sport': ['Basketball', 'Soccer', 'Tennis', 'Cricket', 'Baseball', 'Swimming', 'Soccer', 'Basketball', 'Tennis', 'Cricket'], 'sex': ['F', 'M', 'M', 'M', 'F', 'M', 'F', 'F', 'M', 'F'] } df = pd.DataFrame(data) print("Original DataFrame:\n", df) # Step 2: Add extra columns (gender and age) df['gender'] = df['sex'].map({'F': 'Female', 'M': 'Male'}) # Map 'F' to 'Female' and 'M' to 'Male' df['age'] = [20, 22, 19, 21, 23, 18, 22, 20, 24, 21] # Adding age column print("\nDataFrame after adding gender and age columns:\n", df) # Step 3: Create custom index df.index = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"] print("\nDataFrame with custom index:\n", df) # Step 4: Apply Label Encoding on 'gender' column label_encoder = LabelEncoder() df['gender_encoded'] = label_encoder.fit_transform(df['gender']) print("\nDataFrame after Label Encoding:\n", df) # Step 5: Apply One Hot Encoding using OneHotEncoder on 'sport' column one_hot_encoder = OneHotEncoder(sparse_output=False) # Use sparse=False to get a dense array sport_encoded = one_hot_encoder.fit_transform(df[['sport']]) # Fit and transform 'sport' column # Create a DataFrame for the encoded data sport_encoded_df = pd.DataFrame(sport_encoded, columns=one_hot_encoder.get_feature_names_out(['sport']), index=df.index) # Combine with the original DataFrame df = pd.concat([df, sport_encoded_df], axis=1) print("\nDataFrame after One Hot Encoding using OneHotEncoder:\n", df) # Step 6: Bar Plot for Categorical Data # Count the occurrences of each sport sport_counts = df['sport'].value_counts() # Plot the bar chart plt.figure(figsize=(8, 6)) sport_counts.plot(kind='bar', color='skyblue') plt.title('Frequency of Sports Participation') plt.xlabel('Sport') plt.ylabel('Frequency') plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter