[MCOL-134] ColumnStore returns empty set for 8 of the STAR SCHEMA BENCHMARK queries Created: 2016-06-12  Updated: 2016-10-10  Resolved: 2016-10-10

Status: Closed
Project: MariaDB ColumnStore
Component/s: ExeMgr
Affects Version/s: None
Fix Version/s: Icebox

Type: Bug Priority: Blocker
Reporter: Justin Swanhart (Inactive) Assignee: Unassigned
Resolution: Cannot Reproduce Votes: 0
Labels: None

Attachments: File supplier.tbl    

 Description   

Preparing benchmark data

sudo mkdir -p /home/justin/src
sudo chown -R a+wrx /home/justin/src
cd /home/justin/src
https://github.com/greenlion/ssb-dbgen.git
cd ssb-dbgen
make
./dbgen -s 10 -T l
./dbgen -s 10 -T c
./dbgen -s 10 -T p
./dbgen -s 10 -T s
./dbgen -s 10 -T d

create schema

CREATE DATABASE ssb_cs;
USE ssb_cs;

DROP TABLE IF EXISTS customer;
CREATE TABLE IF NOT EXISTS customer
(
C_CustomerKey int ,
C_Name varchar(25),
C_Address varchar(25),
C_City varchar(10),
C_Nation varchar(15),
C_Region varchar(12),
C_Phone varchar(15),
C_MktSegment varchar(10)
);

DROP TABLE IF EXISTS part;
CREATE TABLE IF NOT EXISTS part
(
P_PartKey int ,
P_Name varchar(25),
P_MFGR varchar(10),
P_Category varchar(10),
P_Brand varchar(15),
P_Colour varchar(15),
P_Type varchar(25),
P_Size tinyint,
P_Container char(10)
);

DROP TABLE IF EXISTS supplier;
CREATE TABLE supplier
(
S_SuppKey int ,
S_Name char(25),
S_Address varchar(25),
S_City char(10),
S_Nation char(15),
S_Region char(12),
S_Phone char(15)
);

DROP TABLE IF EXISTS dim_date;
CREATE TABLE IF NOT EXISTS dim_date
(
D_DateKey int ,
D_Date char(18),
D_DayOfWeek char(9),
D_Month char(9),
D_Year smallint,
D_YearMonthNum int,
D_YearMonth char(7),
D_DayNumInWeek tinyint,
D_DayNumInMonth tinyint,
D_DayNumInYear smallint,
D_MonthNumInYear tinyint,
D_WeekNumInYear tinyint,
D_SellingSeason char(12),
D_LastDayInWeekFl tinyint,
D_LastDayInMonthFl tinyint,
D_HolidayFl tinyint,
D_WeekDayFl tinyint
);

DROP TABLE IF EXISTS lineorder;
CREATE TABLE IF NOT EXISTS lineorder
(
LO_OrderKey bigint not null,
LO_LineNumber tinyint not null,
LO_CustKey int not null,
LO_PartKey int not null,
LO_SuppKey int not null,
LO_OrderDateKey int not null,
LO_OrderPriority varchar(15),
LO_ShipPriority char(1),
LO_Quantity tinyint,
LO_ExtendedPrice decimal,
LO_OrdTotalPrice decimal,
LO_Discount decimal,
LO_Revenue decimal,
LO_SupplyCost decimal,
LO_Tax tinyint,
LO_CommitDateKey int not null,
LO_ShipMode varchar(10)
);

Loading CS

load data infile '/home/justin/src/ssb-dbgen/lineorder.tbl' into table lineorder fields terminated by '|';
– Query OK, 59986052 rows affected (6 min 21.08 sec)
– Records: 59986052 Deleted: 0 Skipped: 0 Warnings: 0

load data infile '/home/justin/src/ssb-dbgen/customer.tbl' into table customer fields terminated by '|';
– Query OK, 300000 rows affected (12.94 sec)
– Records: 300000 Deleted: 0 Skipped: 0 Warnings: 0

load data infile '/home/justin/src/ssb-dbgen/supplier.tbl' into table supplier fields terminated by '|';
– Query OK, 20000 rows affected (3.39 sec)
– Records: 20000 Deleted: 0 Skipped: 0 Warnings: 0

load data infile '/home/justin/src/ssb-dbgen/part.tbl' into table part fields terminated by '|';
– Query OK, 800000 rows affected (12.78 sec)
– Records: 800000 Deleted: 0 Skipped: 0 Warnings: 0

load data infile '/home/justin/src/ssb-dbgen/date.tbl' into table dim_date fields terminated by '|';
– Query OK, 2556 rows affected (2.21 sec)
– Records: 2556 Deleted: 0 Skipped: 0 Warnings: 0

Test queries ColumnStore vs Infobright Community Edition

Q1.1 CS:

---------------

revenue

---------------

4472807765583

---------------
1 row in set (8.09 sec)

Q1.1 ICE:

---------------

revenue

---------------

4472807765583

---------------
1 row in set (8.57 sec)

Q1.2 CS:

--------------

revenue

--------------

965049065847

--------------
1 row in set (1.15 sec)

Q1.2 ICE:

--------------

revenue

--------------

965049065847

--------------
1 row in set (3.20 sec)

Q1.3 CS:

--------------

revenue

--------------

261356323969

--------------
1 row in set (1.07 sec)

Q1.3 ICE:

--------------

revenue

--------------

261356323969

--------------
1 row in set (3.29 sec)

Q2.1 CS

EMPTY SET

Q2.1 ICE

...

3942185810 1998 MFGR#128
3361726461 1998 MFGR#129

--------------------------------
280 rows in set (6.48 sec)

Q2.2 CS

EMPTY SET

Q2.1 ICE

...

3889049897 1998 MFGR#2227
4198537254 1998 MFGR#2228

--------------------------------
56 rows in set (2.62 sec)

Q2.3 CS

EMPTY SET

Q2.3 ICE

...

6616808054 1997 MFGR#2239
3737796454 1998 MFGR#2239

--------------------------------
7 rows in set (2.52 sec)

Q3.1 CS

EMPTY SET

Q3.1 ICE

...

INDIA VIETNAM 1997 50374353273
JAPAN VIETNAM 1997 49162418930

-------------------------------------+
150 rows in set (6.04 sec)

Q3.2 CS

EMPTY SET

Q3.2 ICE

...

UNITED ST4 UNITED ST2 1997 334014039
UNITED ST0 UNITED ST2 1997 313110196

-------------------------------------+
600 rows in set (2.98 sec)

Q3.3 CS

UNITED KI1 UNITED KI5 1997 487301847
UNITED KI5 UNITED KI5 1997 487127099

-------------------------------------+
24 rows in set (1.59 sec)

Q3.3 ICE

UNITED KI1 UNITED KI5 1997 487301847
UNITED KI5 UNITED KI5 1997 487127099

-------------------------------------+
24 rows in set (2.04 sec)

Q3.4 CS

------------------------------------+

c_city s_city d_year revenue

------------------------------------+

UNITED KI1 UNITED KI1 1997 63303088
UNITED KI1 UNITED KI5 1997 45388550
UNITED KI5 UNITED KI1 1997 44641976
UNITED KI5 UNITED KI5 1997 32600020

------------------------------------+
4 rows in set (1.42 sec)

Q3.4 ICE

------------------------------------+

c_city s_city d_year revenue

------------------------------------+

UNITED KI1 UNITED KI1 1997 63303088
UNITED KI1 UNITED KI5 1997 45388550
UNITED KI5 UNITED KI1 1997 44641976
UNITED KI5 UNITED KI5 1997 32600020

------------------------------------+
4 rows in set (1.96 sec)

Q4.1 CS

EMPTY SET

Q4.1 ICE

...

1998 PERU 60776071215
1998 UNITED STATES 61345891337

---------------------------------
35 rows in set (7.42 sec)

Q4.2 CS

EMPTY SET

Q4.2 ICE

...

1998 UNITED STATES MFGR#24 6115408091
1998 UNITED STATES MFGR#25 6295963298

------------------------------------------+
100 rows in set (3.98 sec)

Q4.3 CS

EMPTY SET

Q4.3 ICE

...

1998 UNITED ST9 MFGR#148 65976845
1998 UNITED ST9 MFGR#149 89906813

------------------------------------+
800 rows in set (3.31 sec)



 Comments   
Comment by Justin Swanhart (Inactive) [ 2016-06-13 ]

– Q1.1
select sum(lo_extendedprice*lo_discount) as
revenue
from lineorder join dim_date on lo_orderdatekey = d_datekey
where
d_year = 1993
and lo_discount between 1 and 3
and lo_quantity < 25;
– Q1.2
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder
join dim_date on lo_orderdatekey = d_datekey
where d_yearmonth = 'Jan1994' and lo_discount
between 4 and 6 and lo_quantity between 26 and 35;
– Q1.3
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder
join dim_date on lo_orderdatekey = d_datekey
where d_weeknuminyear = 6
and d_year = 1994
and lo_discount between 5 and 7 and lo_quantity between 26 and 35;
– Q2.1
select sum(lo_revenue), d_year, p_brand
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join part
on lo_partkey = p_partkey join supplier
on lo_suppkey = s_suppkey
where p_category = 'MFGR#12'
and s_region = 'AMERICA'
group by d_year, p_brand
order by d_year, p_brand;
– Q2.2
select sum(lo_revenue), d_year, p_brand
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join part
on lo_partkey = p_partkey
join supplier
on lo_suppkey = s_suppkey
where p_brand between 'MFGR#2221' and 'MFGR#2228'
and s_region = 'ASIA'
group by d_year, p_brand
order by d_year, p_brand;
– Q2.3
select sum(lo_revenue), d_year, p_brand
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join part
on lo_partkey = p_partkey
join supplier
on lo_suppkey = s_suppkey
where p_brand= 'MFGR#2239'
and s_region = 'EUROPE'
group by d_year, p_brand
order by d_year, p_brand;
– Q3.1
select c_nation, s_nation, d_year, sum(lo_revenue) as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join dim_date on lo_orderdatekey = d_datekey
where c_region = 'ASIA'
and s_region = 'ASIA'
and d_year >= 1992 and d_year <= 1997
group by c_nation, s_nation, d_year
order by d_year asc, revenue desc;
– Q3.2
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join dim_date
on lo_orderdatekey = d_datekey
where c_nation = 'UNITED STATES'
and s_nation = 'UNITED STATES'
and d_year >= 1992
and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, revenue desc;
– Q3.3
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier on lo_suppkey = s_suppkey
join dim_date on lo_orderdatekey = d_datekey
where (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_year >= 1992
and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, revenue desc;
– Q3.4
select c_city, s_city, d_year, sum(lo_revenue)
as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join dim_date
on lo_orderdatekey = d_datekey
where
(c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_yearmonth = 'Dec1997'
group by c_city, s_city, d_year
order by d_year asc, revenue desc;
– Q4.1
select d_year, c_nation,
sum(lo_revenue - lo_supplycost) as profit
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join customer
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join part
on lo_partkey = p_partkey
where
c_region = 'AMERICA'
and s_region = 'AMERICA'
and (p_mfgr = 'MFGR#1'
or p_mfgr = 'MFGR#2')
group by d_year, c_nation
order by d_year, c_nation;
– Q4.2
select d_year, s_nation, p_category,
sum(lo_revenue - lo_supplycost) as profit
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join customer
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join part
on lo_partkey = p_partkey
where
c_region = 'AMERICA'
and s_region = 'AMERICA'
and (d_year = 1997 or d_year = 1998)
and (p_mfgr = 'MFGR#1'
or p_mfgr = 'MFGR#2')
group by d_year, s_nation, p_category
order by d_year, s_nation, p_category;
– Q4.3
select d_year, s_city, p_brand,
sum(lo_revenue - lo_supplycost) as profit
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join customer
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join part
on lo_partkey = p_partkey
where
s_nation = 'UNITED STATES'
and (d_year = 1997 or d_year = 1998)
and p_category = 'MFGR#14'
group by d_year, s_city, p_brand
order by d_year, s_city, p_brand;

Comment by Justin Swanhart (Inactive) [ 2016-06-14 ]

root cause for 2.1 (and likely other failures) is improper support for CHAR datatype in cpimport.

Comment by David Thompson (Inactive) [ 2016-10-10 ]

Cannot reproduce this anymore, get matching results for each query. It's possible something was resolved since this was raised but not aware of anything.

Generated at Thu Feb 08 02:18:45 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.