with events as (
select
device.web_info.hostname AS Hostname,
-- traffic_source.name (dimension | name of the marketing campaign that first acquired the user)
(select value.int_value from unnest(event_params) where key = 'ga_session_id') as session_id,
traffic_source.medium AS First_medium,
-- traffic_source.source (dimension | name of the network that first acquired the user)
traffic_source.source as First_Source,
device.category as device_category,
user_pseudo_id,
event_name,
event_timestamp,
traffic_source.name AS First_campaign,
max((select value.string_value from unnest(event_params) where key = 'session_engaged')) as session_engaged,
max((select value.int_value from unnest(event_params) where key = 'video_duration')) as Video_views,
regexp_replace(
regexp_replace(
(
select
p.value.string_value
from
unnest(event_params) as p
where
p.key = 'page_location'
),
r'^https?://[^/]+',
''
),
r'[\?].*',
''
) as page_path,
regexp_replace((select value.string_value from unnest(event_params) where key = 'page_location'), r'\?.*', '') as page_location,
(select value.string_value from unnest(event_params) where key = 'page_title') as page_title,
event_date as date
FROM
`bigquery-341716.analytics_278788286.events_*`
where
_table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
and format_date('%Y%m%d',date_sub(current_date(), interval 2 day))
and user_pseudo_id IS NOT null
group by
Hostname,
session_id,
First_medium,
First_Source,
First_campaign,
device_category,
user_pseudo_id,
event_name,
event_timestamp,
page_path,
page_location,
page_title,
date
)
select
date,
Hostname,
device_category,
page_path,
page_location,
page_title,
First_campaign,
countif(event_name = 'page_view') as PageView,
countif(event_name = 'action_form_submitted') as action_form_submitted,
countif(event_name = 'contact_form_submitted') as contact_form_submitted,
countif(event_name = 'engaged_3_pageviews') as engaged_3_pageviews,
countif(event_name = 'file_download') as file_download,
--COUNTIF(Video_views >1) as video_progress,
--countif(event_name = 'video_progress') as test,
count(distinct case when event_name = 'video_progress' then concat(user_pseudo_id,session_id) end) as video_progress,
count(distinct case when session_engaged = '1' then concat(user_pseudo_id,session_id) end) as engaged_sessions,
-- definitions of the channel grouping based on the source / medium of every session
case
when First_Source is null and (First_medium = '(not set)' or First_medium is null) then 'Direct'
when First_medium = 'organic' then 'Organic Search'
when regexp_contains(First_medium, r'^(social|social-network|social-media|sm|social network|social media)$') then 'Social'
when First_medium = 'email' then 'Email'
when First_medium = 'affiliate' then 'Affiliates'
when First_medium = 'referral' then 'Referral'
when regexp_contains(First_medium, r'^(cpc|ppc|paidsearch)$') then 'Lead Gen'
when regexp_contains(First_medium, r' ^(cpv|cpa|cpp|content-text)$') then 'Other Advertising'
when regexp_contains(First_medium, r'^(display|cpm|banner)$') then 'Display'
else '(Other)' end as session_default_channel_grouping,
from
events
group by
Hostname,
device_category,
session_default_channel_grouping,
page_path,
page_location,
page_title,
First_campaign,
date