tcp-ds数据集sql查询

TPC-DS是与真实场景非常接近的一个测试集,它包含7张事实表,17张纬度表,平均每张表含有18列。用这个数据集能够很好的模拟企业数据查询、报表生成以及数据挖掘等复杂应用。

下列是7个事实表与17个维度表之间的ER图。

1.jpg
2.png
3.png
4.png
5.png
6.png
7.png
-- 1.符合某条件的每各客户单次消费额总额大于900的客户表
-- 条件:
-- 在1999-2001年期间,
-- 每月的1-3或25-28日的,
-- 来自williamson county的客户。

with sales_record(ss_ticket_number,ss_customer_sk,sales)as
(
    select
        ss_ticket_number,
        ss_customer_sk,
        sum(ss_sales_price)as sales
    from
        store_sales ss,
        date_dim dd,
        store s,
        household_demographics hd
    where
        ss.ss_sold_date_sk=dd.d_date_sk
        and ss.ss_store_sk=s.s_store_sk
        and ss.ss_hdemo_sk=hd.hd_demo_sk
        and(dd.d_dom between 1 and 3 or dd.d_dom between 25 and 28)
        and dd.d_year in (1999,2000,2001)
        and s.s_county in('williamson county')
    group by
        ss_ticket_number,
        ss_cutomer_sk
)
select
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales,
    count(*) as cnt
from
    sales_record,
    customer
where
    ss_customer_sk=c_customer_sk
    and sales>900
group by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales 
order by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag desc;


-- 2.4号店利润率最好最差的top10名产品名称


with ascending as
(
    select
        item_sk,
        rank()over(order by avg_net_profit asc)as rn
    from
    (
    select
            ss_item_sk as item_sk,
            avg(ss_net_paid-ss_ext_wholesale_cost)as avg_net_profit
        from
            store_sales
        where
            ss_store_sk=4
        group by
            ss_item_sk
        having
            avg(ss_net_paid-ss_ext_discount_amt)>0.8*
            (
                select
                    avg(ss_net_paid-ss_ext_wholesale_cost)
                from
                    store_sales
                where
                    ss_store_sk=4
                group by
                ss_store_sk
                )
        
        
        )s
),
descending as -- 小于平均水平的0.8
(
    select
        item_sk,
        rank()over(order by avg_net_profit desc)as rn
    from
    (select
        ss_item_sk as item_sk,
        avg(ss_net_paid-ss_ext_wholesale_cost)as avg_net_profit
    from
        store_sales
    where
        ss_store_sk=4
    group by
        ss_item_sk
    having
        avg(ss_net_paid-ss_ext_wholesale_cost)<0.2*
        (select
            avg(ss_net_paid-ss_ext_wholesale_cost)
        from
            store_sales
        where
            ss_store_sk=4
        group by
            ss_store_sk)
    )s
)

select
    ascending.rn,
    i1.i_product_name as best_performing,
    i2.i_product_name as worst_performing
from
    item i1,
    item i2,
    ascending,
    descending
where
    ascending.rn=descending.rn
    and ascending.rn<=10
    and descending.rn<=10
    and i1.i_item_sk=ascending.item_sk
    and i2.i_item_sk=descending.item_sk
order by
    ascending.rn


-- 3.符合某条件的客户的消费总额,按。。排序

select
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales,
    count(*)as cnt
from
    (
    select
        ss_ticket_number,
        ss_customer_sk,
        sum(ss_sales_price)as sales
    from
        store_sales ss,
        date_dim dd,
        store s,
        household_demographics hd
    where
        ss_sold_date_sk=d_date_sk
        and ss_store_sk=s_store_sk
        and ss_hdemo_sk=hd_demo_sk
        and(d_dom between 1 and 3 or d_dom between 25 and 28)
        and hd_buy_potential='>10000'
        and d_year in (1999,2000,2001)
        and s_county in ('williamson county')
    group by
        ss_ticket_number,
        ss_customer_sk)as sales_record(ss_ticket_number,ss_customer_sk,sales),
    customer
where
    ss_customer_sk=c_customer_sk
    and sales>900
group by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales
order by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag desc;
        
        

-- 4.人群画像统计
-- 在2001年4-6月内,在store,没在web、catalog上买的用户a

SELECT 
    cd_gender,
    cd_marital_status,
    cd_education_status,
    cd_purchase_estimate,
    cd_credit_rating,
    COUNT(DISTINCT c_customer_sk) AS cust_num
FROM
    customer c,
    customer_address ca,
    customer_demographics cd
WHERE
    c_current_addr_sk = ca_address_sk
        AND ca_state IN ('nm' , 'ky', 'ga')
        AND cd_demo_sk = c_current_cdemo_sk
        -- 在xx时间内 没在web catalog 在store
        AND EXISTS( SELECT 
            *
        FROM
            store_sales,
            date_dim
        WHERE
            c_customer_sk = ss_customer_sk
                AND ss_sold_date_sk = d_date_sk
                AND d_year = 2001
                AND d_moy BETWEEN 4 AND 4 + 2)
        AND NOT EXISTS( SELECT 
            *
        FROM
            web_sales,
            date_dim
        WHERE
            c_customer_sk = ws_bill_customer_sk
                AND ws_sold_date_sk = d_date_sk
                AND d_year = 2001
                AND d_moy BETWEEN 4 AND 4 + 2)
        AND NOT EXISTS( SELECT 
            *
        FROM
            catalog_sales,
            date_dim
        WHERE
            c_customer_sk = cs_ship_customer_sk
                AND cs_sold_date_sk = d_date_sk
                AND d_year = 2001
                AND d_moy BETWEEN 4 AND 4 + 2)
GROUP BY cd_gender , cd_marital_status  , cd_education_status, cd_purchase_estimate , cd_credit_rating
ORDER BY cd_gender , cd_marital_status , cd_education_status , cd_purchase_estimate , cd_credit_rating;

    

-- 5月销售额与年平均销售额差10%以上

with v1 as
(   select
        i_category,
        i_brand,
        cc_name,
        d_year,
        d_moy,
        sum(cs_sales_price)as current_sum_sales,
        avg(sum(cs_sales_price))over(partition by i_category,i_brand,cc_name,d_year)as avg_year_sales,
        sum(sum(cs_sales_price))over(partition by i_category,i_brand,cc_name,d_year)as sum_year_sales,
        sum(sum(cs_sales_price))over(partition by i_category,i_brand,cc_name,d_year order by d_moy rows 
            between unbounded preceding and current row)as accumulated_sum_year_sales,
        rank()over(partition by i_category,i_brand,cc_name order by d_year,d_moy)as rn
    from
        item,
        catalog_sales,
        date_dim,
        call_center
    where
        cs_item_sk=i_item_sk
        and cs_sold_date_sk=d_date_sk
        and cc_call_center_sk=cs_call_center_sk
        and(d_year=2001 or(d_year=2001-1 and d_moy=12)or (d_year=2001+1 and d_moy=1))
    group by
        i_category,
        i_brand,
        cc_name,
        d_year,
        d_moy
),
v2 as
(
    select 
        v1.i_category,
        v1.i_brand,
        v1.cc_name,
        v1.d_year,
        v1.d_moy,
        v1.avg_year_sales,
        v1_lag.current_sum_sales as previous_sum_sales,
        v1.current_sum_sales,
        v1_lead.current_sum_sales as next_sum_sales,
        v1.sum_year_sales,
        v1.accumulated_sum_year_sales
    from
        v1,
        v1 v1_lag,
        v1 v1_lead
    where
        v1.i_category=v1_lag.i_category
        and v1.i_category=v1_lead.i_category
        and v1.i_brand=v1_lag.i_brand
        and v1.i_brand=v1_lead.i_brand
        and v1.cc_name=v1_lag.cc_name
        and v1.cc_name=v1_lead.cc_name
        and v1.rn=v1_lag.rn+1
        and v1.rn=v1_lead.rn-1
)
select
    *
from
    v2
where
    d_year=2001
    and avg_year_sales>0
    and(case when avg_year_sales>0 then abs(current_sum_sales-avg_year_sales)/avg_year_sales else null end)>0.1
    order by
        current_sum_sales-avg_year_sales

-- 2000年store网中,tn州退货比较大的用户,按金额降序排列
-- 退款金额是store网用户平均退货金额的1.5倍
with
customer_total_return as
(
    select
        sr_customer_sk,
        sr_store_sk,
        sum(sr_fee)as total_return
    from 
        store_returns,
        date_dim
    where
        sr_returned_date_sk=d_date_sk
        and d_year=2000
    group by
        sr_customer_sk,
        sr_store_sk
)

select
    c_customer_id,
    total_return
from
    customer_total_return ctr,
    store s,
    customer c
where
    ctr.total_return>
    (select
        avg(total_return)*1.5
    from
        customer_total_return)
    and s.s_store_sk=ctr.sr_store_sk
    and ctr.sr_customer_sk=c.c_customer_sk
    and s.s_state='tn'
order by
    total_return desc;

--2002年四个季度web和catalog销售同比情况。

with s1 as 
(
    select 
        ws_sold_date_sk as sold_date_sk,
        ws_ext_sales_price as sales_price
    from 
        web_sales
    union all
    select
        cs_sold_date_sk as sold_date_sk,
        cs_ext_sales_price as sales_price
    from
        catalog_sales
),
s2 as
(
    select
        sum(case when (d_year=2001 and d_qoy=1) then sales_price else 0 end) as sales_2001_q1,
        sum(case when (d_year=2001 and d_qoy=2) then sales_price else 0 end) as sales_2001_q2,
        sum(case when (d_year=2001 and d_qoy=3) then sales_price else 0 end) as sales_2001_q3,
        sum(case when (d_year=2001 and d_qoy=4) then sales_price else 0 end) as sales_2001_q4,
        sum(case when (d_year=2002 and d_qoy=1) then sales_price else 0 end) as sales_2002_q1,
        sum(case when (d_year=2002 and d_qoy=2) then sales_price else 0 end) as sales_2002_q2,
        sum(case when (d_year=2002 and d_qoy=3) then sales_price else 0 end) as sales_2002_q3,
        sum(case when (d_year=2002 and d_qoy=4) then sales_price else 0 end) as sales_2002_q4
    from
        s1,
        date_dim
    where
        d_date_sk=sold_date_sk
)
select 
    round((sales_2002_q1-sales_2001_q1)/sales_2001_q1,2)as q1_sales_ratio,
    round((sales_2002_q2-sales_2001_q2)/sales_2001_q1,2)as q2_sales_ratio,
    round((sales_2002_q3-sales_2001_q3)/sales_2001_q1,2)as q3_sales_ratio,
    round((sales_2002_q4-sales_2001_q4)/sales_2001_q1,2)as q4_sales_ratio
from
    s2
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 216,651评论 6 501
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 92,468评论 3 392
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 162,931评论 0 353
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 58,218评论 1 292
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 67,234评论 6 388
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 51,198评论 1 299
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 40,084评论 3 418
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,926评论 0 274
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,341评论 1 311
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,563评论 2 333
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,731评论 1 348
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,430评论 5 343
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 41,036评论 3 326
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,676评论 0 22
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,829评论 1 269
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,743评论 2 368
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,629评论 2 354

推荐阅读更多精彩内容