Building the dashboard
Tue Nov 21 2023 10:09:34 GMT+0000 (Coordinated Universal Time)
Saved by @GlassFlow
import streamlit as st import pandas as pd import plotly.graph_objects as go from streamlit_autorefresh import st_autorefresh import pinotdb def get_funnel_figure(df): trace = go.Funnel( x=df.agg('sum', numeric_only=1).values, y=['home', 'login', 'cart', 'shop', 'help', 'error', 'checkout', 'OLD_CHECKOUT'] ) layout = go.Layout(margin={"l": 180, "r": 0, "t": 30, "b": 0, "pad": 0}, funnelmode="stack", showlegend=False, hovermode='closest', title='', legend=dict(orientation="v", bgcolor='#E2E2E2', xanchor='left', font=dict(size=12))) fig = go.Figure(trace, layout) fig.update_layout(title_text="Funnel", font_size=10) return fig def get_sankey_figure(df): # Process the data to capture transitions all_transitions = [] for path in df['web_page']: steps = path.split(',') transitions = list(zip(steps[:-1], steps[1:])) all_transitions.extend(transitions) transition_df = pd.DataFrame(all_transitions, columns=['source', 'target']) trans_count = (transition_df.groupby(['source', 'target']) .size() .reset_index(name='value') .sort_values('value', ascending=False)) # Create unique labels for the nodes unique_labels = pd.concat([trans_count['source'], trans_count['target']]).unique() # Map the source and target strings to numeric values trans_count['source'] = trans_count['source'].map( {label: idx for idx, label in enumerate(unique_labels)}) trans_count['target'] = trans_count['target'].map( {label: idx for idx, label in enumerate(unique_labels)}) # Create the Sankey diagram fig = go.Figure(go.Sankey( node=dict(pad=15, thickness=15, line=dict(color="black", width=0.5), label=unique_labels), link=dict(arrowlen=15, source=trans_count['source'], target=trans_count['target'], value=trans_count['value']) )) fig.update_layout(title_text="User Flow", font_size=10) return fig def get_connection(): conn = pinotdb.connect(host='localhost', port=9000, path='/sql', scheme='http') return conn def get_funnel_data(conn): query = """SELECT SUM(case when web_page='home' then 1 else 0 end) as home, SUM(case when web_page='login' then 1 else 0 end) as login, SUM(case when web_page='cart' then 1 else 0 end) as cart, SUM(case when web_page='shop' then 1 else 0 end) as shop, SUM(case when web_page='help' then 1 else 0 end) as help, SUM(case when web_page='error' then 1 else 0 end) as error, SUM(case when web_page='checkout' then 1 else 0 end) as checkout, SUM(case when web_page='OLD_CHECKOUT' then 1 else 0 end) as OLD_CHECKOUT, location, user_id FROM clickstream GROUP BY location, user_id LIMIT 200 """ df = pd.read_sql_query(query, conn) return df def get_sankey_data(conn): df = pd.read_sql_query('SELECT * FROM clickstream LIMIT 200', conn) df = (df.groupby(['location', 'user_id'])['web_page'] .apply(lambda x: ','.join(x)) .reset_index()) return df conn = get_connection() # update every 30 seconds st_autorefresh(interval=30 * 1000, key="dataframerefresh") # Funnel Chart funnel_data = get_funnel_data(conn) funnel_fig = get_funnel_figure(funnel_data) st.plotly_chart(funnel_fig, use_container_width=True) # Sankey Chart sankey_data = get_sankey_data(conn) sankey_fig = get_sankey_figure(sankey_data) st.plotly_chart(sankey_fig, use_container_width=True)
Comments