page path back up

PHOTO EMBED

Fri Mar 31 2023 13:32:26 GMT+0000 (Coordinated Universal Time)

Saved by @Matt_Stone #gigkpi

with events as (
  select
  device.web_info.hostname AS Hostname,
      -- traffic_source.name (dimension | name of the marketing campaign that first acquired the user)
 (select value.int_value from unnest(event_params) where key = 'ga_session_id') as session_id,
    traffic_source.medium AS First_medium,
    -- traffic_source.source (dimension | name of the network that first acquired the user)
    traffic_source.source as First_Source,
    device.category as device_category,
    user_pseudo_id,
    event_name,
    event_timestamp,
    traffic_source.name AS First_campaign,
    max((select value.string_value from unnest(event_params) where key = 'session_engaged')) as session_engaged,
    max((select value.int_value from unnest(event_params) where key = 'video_duration')) as Video_views,
    regexp_replace(
  regexp_replace(
    (
      select
        p.value.string_value
      from
        unnest(event_params) as p
      where
        p.key = 'page_location'
    ),
    r'^https?://[^/]+',
    ''
  ),
  r'[\?].*',
  ''
) as page_path,
regexp_replace((select value.string_value from unnest(event_params) where key = 'page_location'), r'\?.*', '') as page_location,
(select value.string_value from unnest(event_params) where key = 'page_title') as page_title,
event_date as date

FROM 
`bigquery-341716.analytics_278788286.events_*`

where

 _table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
    and format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
    
    and user_pseudo_id IS NOT null

group by
Hostname,
session_id,
First_medium,
First_Source,
First_campaign,
device_category,
user_pseudo_id,
event_name,
event_timestamp,
page_path,
page_location,
page_title,
date
    )
 


  select
  date,
  Hostname,
  device_category,
  page_path,
  page_location,
  page_title,
  First_campaign,
  countif(event_name = 'page_view') as PageView,
  countif(event_name = 'action_form_submitted') as action_form_submitted,   
  countif(event_name = 'contact_form_submitted') as contact_form_submitted,
 countif(event_name = 'engaged_3_pageviews') as engaged_3_pageviews,
 countif(event_name = 'file_download') as file_download,
 --COUNTIF(Video_views  >1)  as video_progress,
--countif(event_name = 'video_progress') as test,
 count(distinct case when event_name = 'video_progress' then concat(user_pseudo_id,session_id) end) as video_progress,
count(distinct case when session_engaged = '1' then concat(user_pseudo_id,session_id) end) as engaged_sessions,

  -- definitions of the channel grouping based on the source / medium of every session
    case
        when First_Source is null and (First_medium = '(not set)' or First_medium is null) then 'Direct'
        when First_medium = 'organic' then 'Organic Search'
        when regexp_contains(First_medium, r'^(social|social-network|social-media|sm|social network|social media)$') then 'Social'
        when First_medium = 'email' then 'Email'
        when First_medium = 'affiliate' then 'Affiliates'
        when First_medium = 'referral' then 'Referral'
        when regexp_contains(First_medium, r'^(cpc|ppc|paidsearch)$') then 'Lead Gen'
        when regexp_contains(First_medium, r' ^(cpv|cpa|cpp|content-text)$') then 'Other Advertising'
        when regexp_contains(First_medium, r'^(display|cpm|banner)$') then 'Display'
        else '(Other)' end as session_default_channel_grouping,

  from
  events

  group by
  Hostname,
  device_category,
  session_default_channel_grouping,
  page_path,
  page_location,
  page_title,
  First_campaign,
  date
content_copyCOPY