import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Step 1.1: Convert dictionary to DataFrame
data = {
'Feature1': [1, 2, np.nan, 4, 5],
'Feature2': [5, np.nan, np.nan, 8, 9]
}
df = pd.DataFrame(data)
df['Feature1'].fillna(df['Feature1'].mean(), inplace=True)
# Step 1.4: Fill missing values in 'Feature2' with median
df['Feature2'].fillna(df['Feature2'].median(), inplace=True)
df['Target'] = [0, 1, 0, 1, 0] # Example target values for classification
X = df.drop(columns=['Target']) # Features (without 'Target')
y = df['Target'] # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 3: Load Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
# Step 4: Train the model
clf.fit(X_train, y_train)
# Step 5: Make predictions
y_pred = clf.predict(X_test)
# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nDecision Tree Classifier Accuracy: {accuracy * 100:.2f}%")
Comments