prep event level

PHOTO EMBED

Wed Mar 22 2023 15:25:37 GMT+0000 (Coordinated Universal Time)

Saved by @Matt_Stone #gigkpi

with prep as 
(select  
--key session data
user_pseudo_id,
event_date as date,
device.web_info.hostname AS Hostname,
device.category as device_category,
(select value.string_value from unnest(event_params) where key = 'ga_session_id') as ga_session_id,
concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as sessions,
(case when event_name = "first_visit" then user_pseudo_id end) as new_user,

--page data
regexp_replace(regexp_replace((select p.value.string_value from unnest(event_params) as p where p.key = 'page_location'),r'^https?://[^/]+',''),r'[\?].*','') as page_path,
regexp_replace((select value.string_value from unnest(event_params) where key = 'page_location'), r'\?.*', '') as page_location,
 (select value.string_value from unnest(event_params) where key = 'page_title') as page_title,

--FIrst traffic data
traffic_source.name AS First_Campaign,
traffic_source.medium AS FIRST_medium,
traffic_source.source AS FIRST_source,

--Engagement metrics
max((select value.string_value from unnest(event_params) where key = 'session_engaged')) as session_engaged,
sum((select value.int_value from unnest(event_params) where key = 'engagement_time_msec'))/1000 as engagement_time_seconds,
max((select value.int_value from unnest(event_params) where key = 'engagement_time_msec')) as engagement_time_msec,
max((select value.int_value from unnest(event_params) where key = 'engagement_time_msec'AND user_pseudo_id is not null)) as USER_engagement_time_msec,
max((select value.string_value from unnest(event_params) where key = 'variant_id')) as variant_id,
max((select value.string_value from unnest(event_params) where key = 'experiment_id')) as experiment_id,
max((select item_name from unnest(items) )) as itemName,

  count(distinct case when event_name = "page_view" then 
      concat(user_pseudo_id,event_timestamp) end) as page_views
   
from  `bigquery-341716.analytics_278788286.events_*`
WHERE _table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
    and format_date('%Y%m%d',date_sub(current_date(), interval 2 day))

group by
user_pseudo_id,
page_path,
page_location,
page_title,
date,
ga_session_id,
sessions,
new_user,
First_Campaign,
FIRST_medium,
FIRST_source,
Hostname,
device_category)

select 
Hostname,
date,
First_medium as medium,
FIRST_source as source,
First_Campaign as Campaign,
itemName as Campaign_code,
page_path,
page_location,
page_title,

case
        when First_Campaign is null and (First_medium = '(not set)' or First_medium is null) then 'Direct'
        when First_medium = '(none)' then 'Direct'
        when First_medium = 'null' then 'Direct'
        when First_medium = 'organic' then 'Organic Search'
        when regexp_contains(First_medium, r'^(facebook|twitter|youtube|social|social-network|social-media|sm|social network|social media)$') then 'Social'
        when regexp_contains(First_medium, r'^(share|shared)$') then 'Shared'
        when regexp_contains(First_medium, r'^(email|emai|email-|ciwf)$') then 'Email'
        when First_medium = 'affiliate' then 'Affiliates'
        when First_medium = 'referral' then 'Referral'
        when First_medium = 'QR-code' then 'QR-code'
        when regexp_contains(First_medium, r'^(cpc|ppc|paidsearch)$') then 'Lead Gen'
        when regexp_contains(First_medium, r' ^(cpv|cpa|cpp|content-text)$') then 'Other Advertising'
        when regexp_contains(First_medium, r'^(display|cpm|banner)$') then 'Display'
        else '(Other)' end as session_default_medium_grouping,

 count(distinct sessions) as sessions,
  --count(distinct concat(user_pseudo_id,ga_session_id)) as sessions, 
  count(distinct case when session_engaged = '1' then concat(user_pseudo_id)END) as Engaged_Sessions,
  COUNT (DISTINCT user_pseudo_id) AS Users,
  COUNT (DISTINCT new_user) AS near_Users,
  sum(page_views) as page_view,
  --(sum(engagement_time_seconds),count(distinct case when session_engaged = '1' then sessions end)) as engagement_time,
  (safe_divide(sum(USER_engagement_time_msec),count(distinct case when session_engaged = '1' then sessions end)))/1000 as USER_engagement_time

from prep
group by
Hostname,
Campaign,
itemName,
date,
First_medium,
FIRST_source,
session_default_medium_grouping,
device_category,
page_path,
page_location,
page_title
content_copyCOPY