Detect Outliers - one dimensional analysis
Wed Mar 16 2022 06:35:20 GMT+0000 (Coordinated Universal Time)
Saved by
@abhin__dev
def detect_outlier(feature):
outliers = []
data = titanic[feature]
mean = np.mean(data)
std =np.std(data)
for y in data:
z_score= (y - mean)/std
if np.abs(z_score) > 3:
outliers.append(y)
print('\nOutlier caps for {}:'.format(feature))
print(' --95p: {:.1f} / {} values exceed that'.format(data.quantile(.95),
len([i for i in data
if i > data.quantile(.95)])))
print(' --3sd: {:.1f} / {} values exceed that'.format(mean + 3*(std), len(outliers)))
print(' --99p: {:.1f} / {} values exceed that'.format(data.quantile(.99),
len([i for i in data
if i > data.quantile(.99)])))
# Determine what the upperbound should be for continuous features
for feat in ['Age_clean', 'SibSp', 'Parch', 'Fare']:
detect_outlier(feat)
titanic['Age_clean'].clip(upper = titanic['Age_clean'].quantile(0.99),inplace=True)
content_copyCOPY
http://localhost:8888/notebooks/Desktop/Python/EDA/archive/Untitled.ipynb
Comments