def detect_outliers(df,n,features): outlier_indices = [] for col in features: Q1 = np.percentile(df[col],25) Q3 = np.percentile(df[col],75) IQR = Q3 - Q1 outlier_step = 1.5 * IQR outlier_list_col = df[(df[col] < Q1 - outlier_step) | (df[col] > Q3 + outlier_step)].index outlier_indices.extend(outlier_list_col) outlier_indices = Counter(outlier_indices) multiple_outliers = list(k for k, v in outlier_indices.items() if v>n) return multiple_outliers Outliers_to_drop = detect_outliers(data1,2,['Age','Parch','Fare','SibSp']) data1.iloc[Outliers_to_drop]
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter