page audit

PHOTO EMBED

Fri Mar 31 2023 13:07:48 GMT+0000 (Coordinated Universal Time)

Saved by @Matt_Stone #gigkpi

with events as (
  select
  device.web_info.hostname AS Hostname,
      -- traffic_source.name (dimension | name of the marketing campaign that first acquired the user)
    traffic_source.name AS First_campaign,
    -- traffic_source.medium (dimension | name of the medium (paid search, organic search, email, etc.) that first acquired the user)
    traffic_source.medium AS First_medium,
    -- traffic_source.source (dimension | name of the network that first acquired the user)
    traffic_source.source as First_Source,
    device.category as device_category,
    -- device.mobile_brand_name (dimension | the device brand name)
    device.mobile_brand_name as brand,
--page data

regexp_replace((select value.string_value from unnest(event_params) where key = 'page_location'), r'\?.*', '') as page_location,
(select value.string_value from unnest(event_params) where key = 'page_title') as page_title,

  user_pseudo_id,
   
    concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as unique_session_id,

     -- (case when(
       -- select
        --  value.string_value
        --from
          --unnest(event_params)
        --where
         -- key = 'session_engaged' AND event_name = 'page_view'
      --) ="1" then 1 else 0 end) as engaged_sessions_id,

(case when (select value.string_value from unnest(event_params) where key = 'session_engaged') = '1' then concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) end) as engaged_sessions_id,    
event_name,
    event_timestamp,
    regexp_replace(regexp_replace((select p.value.string_value from unnest(event_params) as p where p.key = 'page_location'), r'^https?://[^/]+',''), r'[\?].*','') as page_path,
event_date as date

FROM 
`bigquery-341716.analytics_278788286.events_*`

where

  _table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
    and format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
    AND user_pseudo_id IS NOT null)
 ,

flow as
(select
  unique_session_id,
  user_pseudo_id,
  engaged_sessions_id,
  page_location,
  page_title,
  device_category,
  brand,
  Hostname,
  event_name,
  page_path,
  date,
  event_timestamp,
    First_Source,
     First_medium,
     First_campaign,
  -- look for the previous page_path
  if(
    event_name = 'page_view',
    coalesce(
      last_value(
        if(event_name = 'page_view', page_path, null) ignore nulls
      ) over(
        partition by unique_session_id
        order by
          event_timestamp asc rows between unbounded preceding
          and 1 preceding
      ),
      '(entrance)'
    ),
    null
  ) as previous_page,
  -- look for the next page_path
  if(
    event_name = 'page_view',
    coalesce(
      first_value(
        if(event_name = 'page_view', page_path, null) ignore nulls
      ) over(
        partition by unique_session_id
        order by
          event_timestamp asc rows between 1 following
          and unbounded following
      ),
      '(exit)'
    ),
    null
  ) as next_page
from
  events
  where
 event_name = 'page_view'
  )


  select
  date,
  Hostname,
  device_category,
  flow.page_path,
  page_location,
  page_title,
  First_campaign,


  -- definitions of the channel grouping based on the source / medium of every session
    case
        when First_Source is null and (First_medium = '(not set)' or First_medium is null) then 'Direct'
        when First_medium = 'organic' then 'Organic Search'
        when regexp_contains(First_medium, r'^(social|social-network|social-media|sm|social network|social media)$') then 'Social'
        when First_medium = 'email' then 'Email'
        when First_medium = 'affiliate' then 'Affiliates'
        when First_medium = 'referral' then 'Referral'
        when regexp_contains(First_medium, r'^(cpc|ppc|paidsearch)$') then 'Lead Gen'
        when regexp_contains(First_medium, r' ^(cpv|cpa|cpp|content-text)$') then 'Other Advertising'
        when regexp_contains(First_medium, r'^(display|cpm|banner)$') then 'Display'
        else '(Other)' end as session_default_channel_grouping,
  --groups page types
 CASE
    WHEN next_page like '%petition/1%' THEN 'petition'
    WHEN next_page like '%petition/2%' THEN 'petition'
    WHEN next_page like '%donate/1%' THEN 'donate'
    WHEN next_page like '%donate/2%' THEN 'donate'
    WHEN next_page like '%wspieram/1%' THEN 'donate'
    WHEN next_page like '%wspieram/2%' THEN 'donate'
    WHEN next_page like '%doneren/1%' THEN 'donate'
    WHEN next_page like '%doneren/2%' THEN 'donate'
    WHEN next_page like '%donazioni/1%' THEN 'donate'
    WHEN next_page like '%donazioni/2%' THEN 'donate'
    WHEN next_page like '%don/1%' THEN 'donate'
    WHEN next_page like '%don/2%' THEN 'donate'
    WHEN next_page like '%daruj/1%' THEN 'donate'
    WHEN next_page like '%daruj/2%' THEN 'donate'
    WHEN next_page like '%donar/1%' THEN 'donate'
    WHEN next_page like '%donar/2%' THEN 'donate'
    WHEN next_page like '%action/1%' THEN 'action'
    WHEN next_page like '%action/2%' THEN 'action'
    WHEN next_page like '%actie/1%' THEN 'action'
    WHEN next_page like '%actie/2%' THEN 'action'
    WHEN next_page like '%akcje/1%' THEN 'action'
    WHEN next_page like '%akcje/2%' THEN 'action'
    WHEN next_page like '%accion/1%' THEN 'action'
    WHEN next_page like '%accion/2%' THEN 'action'
    WHEN next_page like '%akce/1%' THEN 'action'
    WHEN next_page like '%akce/2%' THEN 'action'
    WHEN next_page like '%subscribe/1%' THEN 'subscribe'
    WHEN next_page like '%subscribe/2%' THEN 'subscribe'
    WHEN next_page like '%data/2%' THEN 'data'
    WHEN next_page like '%data/1%' THEN 'data'
    WHEN next_page like '(exit)' THEN 'exit'


    ELSE 'Website'
END AS NextPageType,

--add measures
  
  COUNT (DISTINCT user_pseudo_id) AS Users,
  COUNT (DISTINCT unique_session_id) AS sessions,
  COUNT (DISTINCT engaged_sessions_id) AS engaged_sessions,
  COUNT(CASE WHEN previous_page = '(entrance)' THEN unique_session_id ELSE NULL END) AS Entrance,
  COUNT(CASE WHEN next_page = '(exit)' THEN unique_session_id ELSE NULL END) AS Exit,
  next_page

  from
  flow
  group by
  date,
  Hostname,
  page_location,
  page_title,
  First_campaign,
  device_category,
  page_path,
  session_default_channel_grouping,
  NextPageType,
  next_page

content_copyCOPY