missing_updated
Mon Nov 18 2024 12:13:41 GMT+0000 (Coordinated Universal Time)
Saved by
@wtlab
import pandas as pd
# Sample DataFrame
data = {
'Name': ['Alice', 'Bob', None, 'David'],
'Age': [24, None, 32, 28],
'City': ['New York', 'Los Angeles', 'Chicago', None]
}
df = pd.DataFrame(data)
# Checking for missing values
print(df.isnull()) # True where there are missing values
print(df.isnull().sum()) # Total number of missing values per column
# Drop rows with any missing values
df_dropped_rows = df.dropna()
print("Rows with missing values dropped:\n", df_dropped_rows)
# Drop columns with any missing values
df_dropped_cols = df.dropna(axis=1)
print("Columns with missing values dropped:\n", df_dropped_cols)
# Fill with a specific value (e.g., 0)
df_filled_zeros = df.fillna(0)
print("Missing values filled with 0:\n", df_filled_zeros)
# Fill with the mean (useful for numeric data)
df['Age'] = df['Age'].fillna(df['Age'].mean())
print("Age column with missing values filled by mean:\n", df)
# Fill with the median
df['Age'] = df['Age'].fillna(df['Age'].median())
print("Age column with missing values filled by median:\n", df)
# Fill with the mode (useful for categorical data)
df['City'] = df['City'].fillna(df['City'].mode()[0])
print("City column with missing values filled by mode:\n", df)
# Forward fill (fills using the last valid observation)
df_ffill = df.ffill()
print("Forward fill (missing values filled using last valid observation):\n", df_ffill)
# Backward fill (fills using the next valid observation)
df_bfill = df.bfill()
print("Backward fill (missing values filled using next valid observation):\n", df_bfill)
content_copyCOPY
Comments