2.5 Survey data mining with a gender perspective

PHOTO EMBED

Tue Aug 31 2021 18:43:42 GMT+0000 (Coordinated Universal Time)

Saved by @saraortiz #python

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np

df_gender = pd.read_csv('data/prepared/Survey_on_Gender_Equality_At_Home/Survey_on_Gender_Equality_At_Home_2_cleaned.csv')

df = df_gender.copy()
df.head()

df = df[['Country','Gender','Internet_Penetration','a1_agree','b7_full']]
df.rename(columns={'a1_agree':'Equal_Rights','b7_full':'Access_Money'},inplace=True)

print(df['Internet_Penetration'].unique())

df = df[df['Internet_Penetration']!='.']
df['Internet_Penetration'] = [float(df.loc[i,'Internet_Penetration'][:2]) 
                              if df.loc[i,'Internet_Penetration'][1].isalnum() 
                              else float(df.loc[i,'Internet_Penetration'][:1]) for i in df.index]
                              
print(df['Equal_Rights'].unique())

df[df['Equal_Rights']==19]['Country']

df = df[df['Equal_Rights']!=19] #Remove outlier

figure, axes = plt.subplots(3,1,figsize=(10,15))
sns.regplot(x=df['Internet_Penetration'],y=df['Equal_Rights'],ax=axes[0]).set(
    title='Internet penetration vs Equal rights perception',xlabel='',ylabel='')
df_female = df[df['Gender']=='Female']
sns.regplot(x=df_female['Internet_Penetration'],y=df_female['Access_Money'],ax=axes[1]).set(
    title='Internet penetration vs self access of money',xlabel='',ylabel='')
sns.regplot(x=df_female['Equal_Rights'],y=df_female['Access_Money'],ax=axes[2]).set(
    title='Equal rights perception vs Self access of money',xlabel='',ylabel='')
plt.show()

fig = plt.figure(figsize=(7,6)) 
sns.heatmap(df_female.corr(),annot=True).set_title('Correlation')
plt.show()

df_gender = df

df_business = pd.read_csv('data/prepared/Survey_on_future_business/Survey_on_future_business_good_clean.csv')
df = df_business.copy()
df.head()
df['statistic'] = round(df['statistic']/df['total_asked']*100,2)

df_codes = pd.read_csv('data/prepared/Country_Code.csv')
df_codes.head()

df_codes = df_codes[[ 'Country','Alpha-2 code']]

df = df_codes.merge(df,left_on='Alpha-2 code',right_on='country')

df.drop(columns=['country','who_was_asked','total_asked'],inplace=True)
df = df[(df['variable'] == "gen_opn_1_text") | (df['variable'] == "own_fem_text")]

df.replace(['gen_opn_1_text','own_fem_text'],['Self_perp_equal_rights','Owner_female'],inplace=True)

print(df['value'].unique())

df.dropna(inplace=True)
df.reset_index(drop=True,inplace=True)
df

for var in df['variable'].unique():
    for val in df[df['variable']==var]['value'].unique():
        df[var+'_'+val] = [df.loc[i,'statistic'] if (df.loc[i,'variable']==var) 
                           & (df.loc[i,'value']==val) else np.nan for i in df.index]
df = df.groupby(['Country']).sum().drop(columns=['statistic'])
df

df['Equal_rights_agree'] = df['Self_perp_equal_rights_Agree'] + df['Self_perp_equal_rights_Strongly agree']
df['Equal_rights_disagree'] = df['Self_perp_equal_rights_Disagree'] + df[
                                'Self_perp_equal_rights_Strongly disagree']
df['Female_owner_Half_or_more'] = df['Owner_female_All owners are female'] + df[
                                'Owner_female_Exactly half'] + df['Owner_female_More than half']
df['Female_owner_less_than_half'] = df['Owner_female_Less than half'] + df['Owner_female_None']
df = df[['Equal_rights_agree','Equal_rights_disagree','Female_owner_Half_or_more',
         'Female_owner_less_than_half']]
df

figure, axes = plt.subplots(1,1,figsize=(9,5))
sns.regplot(x=df['Equal_rights_agree'],y=df['Female_owner_Half_or_more']).set(
    title='Equal rights perception vs proportion of female owner of business',xlabel='',ylabel='')
plt.show()

df_business = df.reset_index()
df = df_gender.merge(df_business,on='Country')
df_female = df[df['Gender']=='Female'] 
df_female

figure, axes = plt.subplots(2,1,figsize=(15,12))
sns.regplot(x=df_female['Access_Money'],y=df_female['Female_owner_Half_or_more'],ax=axes[0]).set(
    title='Self access of money vs Female_owner_Half_or_more',xlabel='',ylabel='')
sns.regplot(x=df_female['Internet_Penetration'],y=df_female['Female_owner_Half_or_more'],ax=axes[1]).set(
    title='Internet penetration vs Female_owner_Half_or_more',xlabel='',ylabel='')

plt.show()
content_copyCOPY