missing_updated

PHOTO EMBED

Mon Nov 18 2024 12:13:41 GMT+0000 (Coordinated Universal Time)

Saved by @wtlab

import pandas as pd

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', None, 'David'],
    'Age': [24, None, 32, 28],
    'City': ['New York', 'Los Angeles', 'Chicago', None]
}
df = pd.DataFrame(data)

# Checking for missing values
print(df.isnull())  # True where there are missing values
print(df.isnull().sum())  # Total number of missing values per column

# Drop rows with any missing values
df_dropped_rows = df.dropna()
print("Rows with missing values dropped:\n", df_dropped_rows)

# Drop columns with any missing values
df_dropped_cols = df.dropna(axis=1)
print("Columns with missing values dropped:\n", df_dropped_cols)

# Fill with a specific value (e.g., 0)
df_filled_zeros = df.fillna(0)
print("Missing values filled with 0:\n", df_filled_zeros)

# Fill with the mean (useful for numeric data)
df['Age'] = df['Age'].fillna(df['Age'].mean())
print("Age column with missing values filled by mean:\n", df)

# Fill with the median
df['Age'] = df['Age'].fillna(df['Age'].median())
print("Age column with missing values filled by median:\n", df)

# Fill with the mode (useful for categorical data)
df['City'] = df['City'].fillna(df['City'].mode()[0])
print("City column with missing values filled by mode:\n", df)

# Forward fill (fills using the last valid observation)
df_ffill = df.ffill()
print("Forward fill (missing values filled using last valid observation):\n", df_ffill)

# Backward fill (fills using the next valid observation)
df_bfill = df.bfill()
print("Backward fill (missing values filled using next valid observation):\n", df_bfill)
content_copyCOPY