with events as (
select
device.web_info.hostname AS Hostname,
-- traffic_source.name (dimension | name of the marketing campaign that first acquired the user)
traffic_source.name AS First_campaign,
-- traffic_source.medium (dimension | name of the medium (paid search, organic search, email, etc.) that first acquired the user)
traffic_source.medium AS First_medium,
-- traffic_source.source (dimension | name of the network that first acquired the user)
traffic_source.source as First_Source,
device.category as device_category,
-- device.mobile_brand_name (dimension | the device brand name)
device.mobile_brand_name as brand,
--page data
regexp_replace((select value.string_value from unnest(event_params) where key = 'page_location'), r'\?.*', '') as page_location,
(select value.string_value from unnest(event_params) where key = 'page_title') as page_title,
user_pseudo_id,
concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as unique_session_id,
-- (case when(
-- select
-- value.string_value
--from
--unnest(event_params)
--where
-- key = 'session_engaged' AND event_name = 'page_view'
--) ="1" then 1 else 0 end) as engaged_sessions_id,
(case when (select value.string_value from unnest(event_params) where key = 'session_engaged') = '1' then concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) end) as engaged_sessions_id,
event_name,
event_timestamp,
regexp_replace(regexp_replace((select p.value.string_value from unnest(event_params) as p where p.key = 'page_location'), r'^https?://[^/]+',''), r'[\?].*','') as page_path,
event_date as date
FROM
`bigquery-341716.analytics_278788286.events_*`
where
_table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
and format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
AND user_pseudo_id IS NOT null)
,
flow as
(select
unique_session_id,
user_pseudo_id,
engaged_sessions_id,
page_location,
page_title,
device_category,
brand,
Hostname,
event_name,
page_path,
date,
event_timestamp,
First_Source,
First_medium,
First_campaign,
-- look for the previous page_path
if(
event_name = 'page_view',
coalesce(
last_value(
if(event_name = 'page_view', page_path, null) ignore nulls
) over(
partition by unique_session_id
order by
event_timestamp asc rows between unbounded preceding
and 1 preceding
),
'(entrance)'
),
null
) as previous_page,
-- look for the next page_path
if(
event_name = 'page_view',
coalesce(
first_value(
if(event_name = 'page_view', page_path, null) ignore nulls
) over(
partition by unique_session_id
order by
event_timestamp asc rows between 1 following
and unbounded following
),
'(exit)'
),
null
) as next_page
from
events
where
event_name = 'page_view'
)
select
date,
Hostname,
device_category,
flow.page_path,
page_location,
page_title,
First_campaign,
-- definitions of the channel grouping based on the source / medium of every session
case
when First_Source is null and (First_medium = '(not set)' or First_medium is null) then 'Direct'
when First_medium = 'organic' then 'Organic Search'
when regexp_contains(First_medium, r'^(social|social-network|social-media|sm|social network|social media)$') then 'Social'
when First_medium = 'email' then 'Email'
when First_medium = 'affiliate' then 'Affiliates'
when First_medium = 'referral' then 'Referral'
when regexp_contains(First_medium, r'^(cpc|ppc|paidsearch)$') then 'Lead Gen'
when regexp_contains(First_medium, r' ^(cpv|cpa|cpp|content-text)$') then 'Other Advertising'
when regexp_contains(First_medium, r'^(display|cpm|banner)$') then 'Display'
else '(Other)' end as session_default_channel_grouping,
--groups page types
CASE
WHEN next_page like '%petition/1%' THEN 'petition'
WHEN next_page like '%petition/2%' THEN 'petition'
WHEN next_page like '%donate/1%' THEN 'donate'
WHEN next_page like '%donate/2%' THEN 'donate'
WHEN next_page like '%wspieram/1%' THEN 'donate'
WHEN next_page like '%wspieram/2%' THEN 'donate'
WHEN next_page like '%doneren/1%' THEN 'donate'
WHEN next_page like '%doneren/2%' THEN 'donate'
WHEN next_page like '%donazioni/1%' THEN 'donate'
WHEN next_page like '%donazioni/2%' THEN 'donate'
WHEN next_page like '%don/1%' THEN 'donate'
WHEN next_page like '%don/2%' THEN 'donate'
WHEN next_page like '%daruj/1%' THEN 'donate'
WHEN next_page like '%daruj/2%' THEN 'donate'
WHEN next_page like '%donar/1%' THEN 'donate'
WHEN next_page like '%donar/2%' THEN 'donate'
WHEN next_page like '%action/1%' THEN 'action'
WHEN next_page like '%action/2%' THEN 'action'
WHEN next_page like '%actie/1%' THEN 'action'
WHEN next_page like '%actie/2%' THEN 'action'
WHEN next_page like '%akcje/1%' THEN 'action'
WHEN next_page like '%akcje/2%' THEN 'action'
WHEN next_page like '%accion/1%' THEN 'action'
WHEN next_page like '%accion/2%' THEN 'action'
WHEN next_page like '%akce/1%' THEN 'action'
WHEN next_page like '%akce/2%' THEN 'action'
WHEN next_page like '%subscribe/1%' THEN 'subscribe'
WHEN next_page like '%subscribe/2%' THEN 'subscribe'
WHEN next_page like '%data/2%' THEN 'data'
WHEN next_page like '%data/1%' THEN 'data'
WHEN next_page like '(exit)' THEN 'exit'
ELSE 'Website'
END AS NextPageType,
--add measures
COUNT (DISTINCT user_pseudo_id) AS Users,
COUNT (DISTINCT unique_session_id) AS sessions,
COUNT (DISTINCT engaged_sessions_id) AS engaged_sessions,
COUNT(CASE WHEN previous_page = '(entrance)' THEN unique_session_id ELSE NULL END) AS Entrance,
COUNT(CASE WHEN next_page = '(exit)' THEN unique_session_id ELSE NULL END) AS Exit,
next_page
from
flow
group by
date,
Hostname,
page_location,
page_title,
First_campaign,
device_category,
page_path,
session_default_channel_grouping,
NextPageType,
next_page