weekly_data_profile
Fri Jan 20 2023 10:58:54 GMT+0000 (Coordinated Universal Time)
Saved by @shubhangi_burle
%jdbc(hive)
set tez.queue.name=default;
set hive.execution.engine=tez;
-- SELECT * FROM fraud.fra_consumer_category_master LIMIT 20
-- SELECT MIN(updated_date), MAX(updated_date), (MIN(updated_date) - MAX(updated_date)) FROM fraud.fra_consumer_category_master --2022-07-21 to 2023-01-18, 81
select DISTINCT profile.user_id user_id
,profile.reg_dt reg_dt
,profile.full_name full_name
,profile.no_of_upi_banks_lifetime
,profile.no_of_imps_banks_lifetime
,profile.sender_active_days sender_active_days
,profile.receiver_active_days_week
,profile.receiver_suc_txn_week
,profile.receiver_suc_amt_week
,profile.no_of_senders_week
,profile.no_of_sender_vpas_week
,profile.no_of_sender_locations
,profile.off_hr_rcvr_suc_txn
,profile.off_hr_rcvr_suc_amt
,profile.receiver_no_of_suc_banks
,profile.receiver_active_days_suc_txn_week
,profile.sender_suc_txn_week sender_suc_txn_week
,profile.sender_suc_amt_week
,profile.no_of_receivers_week no_of_receivers_week
,profile.no_of_receiver_vpas_week no_of_receiver_vpas_week
,profile.sender_suc_upi_amt_week
,profile.sender_suc_wal_amt_week
,profile.sender_suc_pg_amt_week
--------------------------------------gc amount?
,profile.sender_no_of_suc_cards_week
,profile.sender_user_states_week
,profile.off_hr_sndr_suc_txn
,profile.off_hr_sndr_suc_amt
,profile.kyc_info kyc_info
,profile.user_created user_created
,profile.last_transaction_time last_transaction_time
,profile.sender_txn_week sender_txn_week
,profile.sender_amt_week
,profile.sender_daily_txn_suc_amt
,profile.sender_daily_avg_suc_amt
,profile.sender_no_of_suc_upi_banks_week
,profile.sender_to_suc_imps_banks_week
,profile.sender_int_suc_p2m_txn_week
,profile.sender_int_suc_p2m_amt_week
,profile.sender_int_suc_p2p_txn_week
,profile.sender_int_suc_p2p_amt_week
,profile.sender_ext_suc_p2m_txn_week
,profile.sender_ext_suc_p2m_amt_week
,profile.sender_ext_suc_p2p_txn_week
,profile.sender_ext_suc_p2p_amt_week
,profile.receiver_txn_week
,profile.receiver_amt_week
,profile.receiver_daily_avg_txn_suc_amt
,profile.receiver_daily_avg_suc_amt
,profile.receiver_suc_rev_txn
,profile.receiver_suc_rev_amt
,profile.receiver_int_p2p_suc_txn
,profile.receiver_int_p2p_suc_amt
,profile.receiver_ext_p2p_suc_txn
,profile.receiver_ext_p2p_suc_amt
from
(select COALESCE(sender.user_id, rcvr.user_id) user_id
,ud.full_name full_name
,ud.kyc kyc_info
,ud.created user_created
,ud.registration_date reg_dt
,coalesce(sender.last_transaction_time, rcvr.last_transaction_time) last_transaction_time
,NVL(sender.sender_active_days,0) sender_active_days
,NVL(sender.no_of_receiver_week,0) no_of_receivers_week
,NVL(sender.no_of_receiver_vpas_week,0) no_of_receiver_vpas_week
,NVL(sender.cnt_txn_lifetime,0) sender_txn_week
,NVL(sender.cnt_suc_txn_lifetime,0) sender_suc_txn_week
,NVL(sender.sum_amt_lifetime,0) sender_amt_week
,NVL(sender.sum_suc_amt_lifetime,0) sender_suc_amt_week
,NVL(sender.daily_txn_amt,0) sender_daily_txn_suc_amt
,NVL(sender.sndr_avg_amt_daily,0) sender_daily_avg_suc_amt
,NVL(sender.suc_upi_amt,0) sender_suc_upi_amt_week
,NVL(sender.suc_wal_amt,0) sender_suc_wal_amt_week
,NVL(sender.suc_pg_amt,0) sender_suc_pg_amt_week
,NVL(sender.user_state,0) sender_user_states_week --no.of states user sent money from
,NVL(sender.no_of_cards,0) sender_no_of_suc_cards_week
,NVL(sender.no_of_suc_upi_banks,0) sender_no_of_suc_upi_banks_week
,NVL(sender.sent_cnt_suc_imps_banks,0) sender_to_suc_imps_banks_week
,NVL(sender.no_of_upi_banks,0) no_of_upi_banks_lifetime
,NVL(sender.no_of_imps_banks,0) no_of_imps_banks_lifetime
,NVL(sender.cnt_suc_int_p2m_txn,0) sender_int_suc_p2m_txn_week
,NVL(sender.sum_suc_int_p2m_amt,0) sender_int_suc_p2m_amt_week
,NVL(sender.cnt_suc_int_p2p_txn,0) sender_int_suc_p2p_txn_week
,NVL(sender.sum_suc_int_p2p_amt,0) sender_int_suc_p2p_amt_week
,NVL(sender.cnt_suc_ext_p2m_txn,0) sender_ext_suc_p2m_txn_week
,NVL(sender.sum_suc_ext_p2m_amt,0) sender_ext_suc_p2m_amt_week
,NVL(sender.cnt_suc_ext_p2p_txn,0) sender_ext_suc_p2p_txn_week
,NVL(sender.sum_suc_ext_p2p_amt,0) sender_ext_suc_p2p_amt_week
,NVL(sender.off_hr_sndr_txn,0) off_hr_sndr_suc_txn
,NVL(sender.off_hr_sndr_amt,0) off_hr_sndr_suc_amt
--receiver
,NVL(rcvr.receiver_active_days_overall,0) receiver_active_days_week
,NVL(rcvr.no_of_senders_week,0) no_of_senders_week
,NVL(rcvr.no_of_sender_vpas_week,0) no_of_sender_vpas_week
,NVL(rcvr.no_of_sender_locations,0) no_of_sender_locations
,NVL(rcvr.receiver_active_days_suc,0) receiver_active_days_suc_txn_week
,NVL(rcvr.recv_txn,0) receiver_txn_week
,NVL(rcvr.recv_amt,0) receiver_amt_week
,NVL(rcvr.recv_suc_txn,0) receiver_suc_txn_week
,NVL(rcvr.recv_suc_amt,0) receiver_suc_amt_week
,NVL(rcvr.recv_daily_avg_txn_amt,0) receiver_daily_avg_txn_suc_amt
,NVL(rcvr.recv_avg_amt_daily,0) receiver_daily_avg_suc_amt
,NVL(rcvr.cnt_rev_txn,0) receiver_suc_rev_txn
,NVL(rcvr.cnt_rev_amt,0) receiver_suc_rev_amt
,NVL(rcvr.receiver_banks,0) receiver_no_of_suc_banks
,NVL(rcvr.recv_cnt_suc_int_p2p_txn,0) receiver_int_p2p_suc_txn
,NVL(rcvr.recv_sum_suc_int_p2p_amt,0) receiver_int_p2p_suc_amt
,NVL(rcvr.recv_cnt_suc_ext_p2p_txn,0) receiver_ext_p2p_suc_txn
,NVL(rcvr.recv_sum_suc_ext_p2p_amt,0) receiver_ext_p2p_suc_amt
,NVL(rcvr.off_hr_rcvr_txn,0) off_hr_rcvr_suc_txn
,NVL(rcvr.off_hr_rcvr_amt,0) off_hr_rcvr_suc_amt
from
(
--sender
select ur.senderuserid as user_id
,max(ur.transaction_time) as last_transaction_time
--AML Requested variables
,COALESCE(count(distinct date(ur.updated_date)),0) as sender_active_days
,COALESCE(count(distinct ur.receiveruser),0) as no_of_receiver_week
,COALESCE(count(distinct ur.reciver_vpa),0) as no_of_receiver_vpas_week
-- count of transactions lifetime
,COALESCE(count(distinct ur.transaction_id),0) as cnt_txn_lifetime
,COALESCE(count(distinct ur.transaction_id),0) as cnt_suc_txn_lifetime
-- sum of amount lifetime
,COALESCE(sum(ur.totaltransactionamount),0) as sum_amt_lifetime
,COALESCE(sum(ur.totaltransactionamount),0) as sum_suc_amt_lifetime
--Average daily amount
,COALESCE(sum(ur.totaltransactionamount),0)/COALESCE(count(distinct ur.transaction_id),0) as daily_txn_amt
,COALESCE(sum(ur.totaltransactionamount),0)/COALESCE(count(distinct date(ur.updated_date)),0) as sndr_avg_amt_daily
--exclusive instrument Concentration
,COALESCE(sum(case when ur.upi_flag = 1 and ur.wallet_flag = 0 and ur.card_flag = 0 then ur.totaltransactionamount end),0) as suc_upi_amt
,COALESCE(sum(case when ur.upi_flag = 0 and ur.wallet_flag = 1 and ur.card_flag = 0 then ur.totaltransactionamount end),0) as suc_wal_amt
,COALESCE(sum(case when ur.upi_flag = 0 and ur.wallet_flag = 0 and ur.card_flag = 1 then ur.totaltransactionamount end),0) as suc_pg_amt
--location
,COALESCE(count(distinct edw.state),0) as user_state
--card
,COALESCE(count(distinct case when ur.workflowtype in ('CONSUMER_TO_MERCHANT','CONSUMER_TO_MERCHANT_V2','WALLET_TOPUP') and ur.sendertype = 'INTERNAL_USER' and ur.card_flag = true then ur.senderglobalcardid end),0) no_of_cards
,COALESCE(count(distinct case when ur.upi_flag = true then ur.senderbankaccountsha end),0) no_of_suc_upi_banks
,COALESCE(count(distinct case when ur.receiveruser like 'ifsc.npci%' and ur.workflowtype = 'CONSUMER_TO_EXTERNAL' and ur.receivertype = 'EXTERNAL_USER' then ur.receiverbankaccountsha end),0) sent_cnt_suc_imps_banks
,a.cnt_upi_banks_lifetime no_of_upi_banks --lifetime
,a.cnt_imps_banks_lifetime no_of_imps_banks --lifetime
,COALESCE(count(distinct case when ur.workflowtype in ('CONSUMER_TO_MERCHANT','CONSUMER_TO_MERCHANT_V2') and ur.receivertype = 'INTERNAL_USER' then ur.transaction_id end),0) as cnt_suc_int_p2m_txn
,COALESCE(sum(case when ur.workflowtype in ('CONSUMER_TO_MERCHANT','CONSUMER_TO_MERCHANT_V2') and ur.receivertype = 'INTERNAL_USER' then ur.totaltransactionamount end),0) as sum_suc_int_p2m_amt
,COALESCE(count(distinct case when ur.workflowtype in ('CONSUMER_TO_CONSUMER','CONSUMER_TO_CONSUMER_V2') and ur.receivertype = 'INTERNAL_USER' then ur.transaction_id end),0) as cnt_suc_int_p2p_txn
,COALESCE(sum(case when ur.workflowtype in ('CONSUMER_TO_CONSUMER','CONSUMER_TO_CONSUMER_V2') and ur.receivertype = 'INTERNAL_USER' then ur.totaltransactionamount end),0) as sum_suc_int_p2p_amt
--External flow *
,COALESCE(count(distinct case when ur.workflowtype in ('CONSUMER_TO_MERCHANT','CONSUMER_TO_MERCHANT_V2') and ur.receivertype = 'EXTERNAL_USER' then ur.transaction_id end),0) as cnt_suc_ext_p2m_txn
,COALESCE(sum(case when ur.workflowtype in ('CONSUMER_TO_MERCHANT','CONSUMER_TO_MERCHANT_V2') and ur.receivertype = 'EXTERNAL_USER' then ur.totaltransactionamount end),0) as sum_suc_ext_p2m_amt
,COALESCE(count(distinct case when ur.workflowtype in ('CONSUMER_TO_CONSUMER','CONSUMER_TO_CONSUMER_V2') and ur.receivertype = 'EXTERNAL_USER' then ur.transaction_id end),0) as cnt_suc_ext_p2p_txn
,COALESCE(sum(case when ur.workflowtype in ('CONSUMER_TO_CONSUMER','CONSUMER_TO_CONSUMER_V2') and ur.receivertype = 'EXTERNAL_USER' then ur.totaltransactionamount end),0) as sum_suc_ext_p2p_amt
--Off hour
,COALESCE(count(distinct case when ur.hr in (22,23,00,01,02,03,04,05,06) then ur.transaction_id end),0) as off_hr_sndr_txn
,COALESCE(sum(case when ur.hr in (22,23,00,01,02,03,04,05,06) then ur.totaltransactionamount end),0) as off_hr_sndr_amt
from
(select DISTINCT senderuserid, workflowtype, receivertype, errorcode, transaction_id, totaltransactionamount,upi_flag, wallet_flag, card_flag, transaction_time
, senderphonenumber, senderglobalcardid, senderbankaccountsha, hour(transaction_time) hr, updated_date, receiverbankaccountsha, receiveruser, sendertype, reciver_vpa
from fraud.transaction_details_v3
where date(updated_date) BETWEEN date_sub('{{ next_ds }}',7) and date_sub('{{ next_ds }}',1)
AND senderuserid in (SELECT DISTINCT identifier FROM fraud.aml_freshdesk WHERE run_date = '{{ next_ds }}')) ur
LEFT JOIN
(SELECT user_id, state FROM edw_shared.user_dimension
WHERE user_id IN (SELECT DISTINCT identifier FROM fraud.aml_freshdesk WHERE run_date = '{{ next_ds }}'))edw
on ur.senderuserid = edw.user_id
LEFT JOIN
-- (SELECT * FROM fraud.accounts_temp) a
(select user_id
,count(distinct masked_account_number) as cnt_banks_lifetime
,count(distinct (case when usage_domain= 'UPI' then masked_account_number end)) as cnt_upi_banks_lifetime
,count(distinct (case when usage_domain= 'IMPS' then masked_account_number end)) as cnt_imps_banks_lifetime
from payment.accounts
WHERE user_id IN (SELECT DISTINCT identifier FROM fraud.aml_freshdesk WHERE run_date = '{{ next_ds }}')
group by user_id)a
on ur.senderuserid = a.user_id
group by ur.senderuserid , a.cnt_upi_banks_lifetime, a.cnt_imps_banks_lifetime
)sender
full outer JOIN
(
select b.receiveruser user_id
,max(b.transaction_time) as last_transaction_time
,COALESCE(count(distinct date(b.updated_date)),0) as receiver_active_days_overall
,COALESCE(count(distinct date(b.updated_date)),0) as receiver_active_days_suc
,COALESCE(count(distinct b.senderuserid),0) as no_of_senders_week
,COALESCE(count(distinct b.sender_vpa),0) as no_of_sender_vpas_week
-- count of transactions lifetime
,COALESCE(count(distinct b.transaction_id),0) as recv_txn
,COALESCE(count(distinct b.transaction_id end),0) as recv_suc_txn
--txn amt
,COALESCE(sum(b.totaltransactionamount),0) as recv_amt
,COALESCE(sum(b.totaltransactionamount end),0) as recv_suc_amt
,COALESCE(sum(b.totaltransactionamount end),0)/COALESCE(count(distinct b.transaction_id),0)as recv_daily_avg_txn_amt
,COALESCE(sum(b.totaltransactionamount),0)/COALESCE(count(distinct date(b.updated_date)),0)as recv_avg_amt_daily
--location
--merchant reversals
,COALESCE(count(distinct case when b.workflowtype in ('MERCHANT_REVERSAL_V2') and b.receivertype = 'INTERNAL_USER' then b.transaction_id end),0) as cnt_rev_txn
,COALESCE(sum(case when b.workflowtype in ('MERCHANT_REVERSAL_V2') and b.receivertype = 'INTERNAL_USER' then b.totaltransactionamount end),0) as cnt_rev_amt
--recever daily txn amount
,COALESCE(count(distinct case when b.workflowtype in ('CONSUMER_TO_CONSUMER','EXTERNAL_TO_CONSUMER', 'CONSUMER_TO_CONSUMER_V2') and b.receivertype = 'INTERNAL_USER' and b.receiverbankaccountsha != '' then b.receiverbankaccountsha end),0) receiver_banks
,COALESCE(count(distinct case when b.workflowtype in ('CONSUMER_TO_CONSUMER', 'CONSUMER_TO_CONSUMER_V2') and b.receivertype = 'INTERNAL_USER' then b.transaction_id end),0) as recv_cnt_suc_int_p2p_txn
,COALESCE(sum(case when b.workflowtype in ('CONSUMER_TO_CONSUMER', 'CONSUMER_TO_CONSUMER_V2') and b.receivertype = 'INTERNAL_USER' then b.totaltransactionamount end),0) as recv_sum_suc_int_p2p_amt
,COALESCE(count(distinct case when b.workflowtype in ('EXTERNAL_TO_CONSUMER') and b.receivertype = 'INTERNAL_USER' then b.transaction_id end),0) as recv_cnt_suc_ext_p2p_txn
,COALESCE(sum(case when b.workflowtype in ('EXTERNAL_TO_CONSUMER') and b.receivertype = 'INTERNAL_USER' then b.totaltransactionamount end),0) as recv_sum_suc_ext_p2p_amt
,COALESCE(count(distinct case when hour(b.transaction_time) in (22,23,00,01,02,03,04,05,06) then b.transaction_id end),0) as off_hr_rcvr_txn
,COALESCE(sum(case when hour(b.transaction_time) in (22,23,00,01,02,03,04,05,06) then b.totaltransactionamount end),0) as off_hr_rcvr_amt
,COALESCE(count(distinct c.state)) no_of_sender_locations
FROM
(SELECT receiveruser
, transaction_time
, updated_date
, errorcode
, senderuserid
, sender_vpa
, transaction_id
, totaltransactionamount
, workflowtype
, receivertype
, receiverbankaccountsha
from fraud.transaction_details_v3
WHERE updated_date BETWEEN date_sub('{{ next_ds }}',7) AND date_sub('{{ next_ds }}',1)
AND errorcode = 'SUCCESS'
AND receiveruser in (SELECT DISTINCT identifier FROM fraud.aml_freshdesk WHERE run_date = '{{ next_ds }}')) b
LEFT JOIN
(SELECT user_id, state FROM edw_shared.user_dimension) c
on b.senderuserid = c.user_id
group by b.receiveruser )rcvr
on sender.user_id = rcvr.user_id
-- Run the query by enabling the below variable for offline execution
INNER JOIN
(select user_ext_id, kyc, created, full_name, registration_date, phone_number
from users.users
WHERE user_ext_id IN (SELECT DISTINCT identifier FROM fraud.aml_freshdesk WHERE run_date = '{{ next_ds }}')) ud
on (ud.user_ext_id = sender.user_id) or (ud.user_ext_id = rcvr.user_id)
-- INNER JOIN
-- (select DISTINCT user_ext_id, full_name, kyc, created, registration_date
-- from fraud.users_temp) ud
-- on (ud.user_ext_id = sender.user_id) or (ud.user_ext_id = rcvr.user_id)
)profile
INNER JOIN
(select DISTINCT identifier from fraud.aml_freshdesk where run_date = '{{ next_ds }}')aml
on aml.identifier = profile.user_id;



Comments