import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import classification_report, accuracy_score # Assuming the dataset is in CSV format, replace 'data.csv' with your actual file path. data = pd.read_csv('Soybean.csv') # Display the first few rows of the dataset to understand its structure print(data.head()) # Separate features and target variable X = data.drop('Class', axis=1) # Features y = data['Class'] # Target variable # Split the dataset into training and testing sets (80% train, 20% test) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create a Random Forest classifier rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42) # Fit the model on the training data rf_classifier.fit(X_train, y_train) # Predict the labels for the test set y_pred = rf_classifier.predict(X_test) # Print accuracy and classification report accuracy = accuracy_score(y_test, y_pred) print(f'Accuracy: {accuracy:.2f}') print(classification_report(y_test, y_pred)) # Display feature importances importances = rf_classifier.feature_importances_ feature_importance = pd.DataFrame({'Feature': X.columns, 'Importance': importances}) print(feature_importance.sort_values(by='Importance', ascending=False))
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter