# Showcasing the data for cluster 0
cluster_0_df = df_trimmed[df_trimmed['ClustersK'] == 0]
variable_names = [col for col in cluster_0_df.columns if col != 'ClustersK']
colors = ['#2e2237']
n_variables = len(variable_names)
n_rows = (n_variables - 1) // 5 + 1
fig, axes = plt.subplots(n_rows, 5, figsize=(15, 3 * n_rows), squeeze=False)
for i, variable in enumerate(variable_names):
row = i // 5
col = i % 5
ax = axes[row, col]
cluster_0_df[variable].plot.hist(ax=ax, bins=20, color=colors)
ax.set_title(f'Distribution of {variable}')
ax.set_xlabel(variable)
ax.set_ylabel('Frequency')
for i in range(n_variables, n_rows * 5):
fig.delaxes(axes.flatten()[i])
plt.tight_layout()
plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter