import pandas as pd # Load the dataset df = pd.read_csv('Fish.csv') # Display the first few rows and basic information print(df.head()) print(df.describe()) print(df.info()) from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder # Encode the target variable if it's categorical label_encoder = LabelEncoder() df['Species'] = label_encoder.fit_transform(df['Species']) # Separate the features and target variable X = df.drop(columns=['Species']) y = df['Species'] # Split into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) from sklearn.preprocessing import StandardScaler # Standardize the features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) from sklearn.svm import SVC from sklearn.feature_selection import RFE # Initialize the classifier and RFE svc = SVC(kernel="linear", random_state=42) rfe = RFE(estimator=svc, n_features_to_select=3) # Choose 3 features for demonstrate on # Fit RFE rfe.fit(X_train_scaled, y_train) # Print the ranking of features print("Feature ranking (1 indicates selected features):", rfe.ranking_) print("Selected features:", X.columns[rfe.support_]) from sklearn.metrics import accuracy_score, classification_report # Select the features indicated by RFE X_train_rfe = X_train_scaled[:, rfe.support_] X_test_rfe = X_test_scaled[:, rfe.support_] # Train the classifier on the selected features svc.fit(X_train_rfe, y_train) # Make predictions and evaluate performance y_pred = svc.predict(X_test_rfe) print("Accuracy:", accuracy_score(y_test, y_pred)) print("Classification Report:\n", classification_report(y_test, y_pred))