import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score training_set = pd.read_csv('/content/Facebook_Ads_2.csv',encoding='ISO-8859-1') training_set.head() click = training_set[training_set['Clicked'] == 1 ] no_click = training_set[training_set['Clicked'] == 0] click.head() # no_click.head() print("Total = ",len(training_set)) print("Click = ",len(click)) print("No click = ",len(no_click)) print("% who not clicked ", 1.*len(no_click)/len(training_set)*100.0,"%") print("% who clicked ", 1.*len(click)/len(training_set)*100.0,"%") training_set.Clicked.value_counts().plot(kind='bar') training_set.Clicked.value_counts().plot(kind='pie') sns.scatterplot(x='Time Spent on Site', y='Salary', hue='Clicked', data=training_set) plt.figure(figsize=(5,5)) sns.boxplot(x='Clicked', y='Salary',data= training_set) training_set.drop(['Names','emails','Country'],axis=1,inplace=True) training_set.head() x = training_set.drop('Clicked',axis=1).values y = training_set['Clicked'].values from sklearn.preprocessing import StandardScaler sc = StandardScaler() x = sc.fit_transform(x) from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0) from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state = 0) classifier.fit(x_train, y_train) y_predict_train = classifier.predict(x_train) y_predict_train y_predict_test = classifier.predict(x_test) y_predict_test from sklearn.metrics import classification_report, confusion_matrix cm = confusion_matrix(y_test, y_predict_test) sns.heatmap(cm, annot=True, fmt='d') from sklearn.metrics import classification_report print(classification_report(y_test, y_predict_test))