The goal of the MADlib pivot function is to provide a data summarization tool that can do basic OLAP type operations on data stored in one table and output the summarized data to a second table.
pivot( source_table, output_table, index, pivot_cols, pivot_values, aggregate_func, fill_value, keep_null, output_col_dictionary )
Arguments
VARCHAR. Name of output table that contains the pivoted data. The output table contains all the columns present in the 'index' column list, plus additional columns for each distinct value in 'pivot_cols'.
VARCHAR. default: 'AVG'. A comma-separated list of aggregates to be applied to values. These can be PostgreSQL built-in aggregates [1] or UDAs. It is possible to assign a set of aggregates per value column. Please refer to the examples 12-14 below for syntax details.
BOOLEAN. default: FALSE. This parameter is used to handle auto-generated column names that exceed the PostgreSQL limit of 63 bytes (could be a common occurrence). If TRUE, column names will be set as numerical IDs and will create a dictionary table called output_table appended with _dictionary. If FALSE, will auto-generate column names in the usual way unless the limit of 63 bytes will be exceeded. In this case, a dictionary output file will be created and a message given to the user.
DROP TABLE IF EXISTS pivset CASCADE; -- View below may depend on table so use CASCADE CREATE TABLE pivset( id INTEGER, piv INTEGER, val FLOAT8 ); INSERT INTO pivset VALUES (0, 10, 1), (0, 10, 2), (0, 20, 3), (1, 20, 4), (1, 30, 5), (1, 30, 6), (1, 10, 7), (NULL, 10, 8), (1, NULL, 9), (1, 10, NULL);
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset', 'pivout', 'id', 'piv', 'val'); SELECT * FROM pivout ORDER BY id;
id | val_avg_piv_10 | val_avg_piv_20 | val_avg_piv_30 ----+----------------+----------------+---------------- 0 | 1.5 | 3 | 1 | 7 | 4 | 5.5 | 8 | |Here NULL is showing as an empty cell in the output.
DROP VIEW IF EXISTS pivset_ext; CREATE VIEW pivset_ext AS SELECT *, COALESCE(id + (val / 3)::integer, 0) AS id2, COALESCE(100*(val / 3)::integer, 0) AS piv2, COALESCE(val + 10, 0) AS val2 FROM pivset; SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext ORDER BY id,id2,piv,piv2,val,val2;
id | id2 | piv | piv2 | val | val2 ----+-----+-----+------+-----+------ 0 | 0 | 10 | 0 | 1 | 11 0 | 1 | 10 | 100 | 2 | 12 0 | 1 | 20 | 100 | 3 | 13 1 | 0 | 10 | 0 | | 0 1 | 2 | 20 | 100 | 4 | 14 1 | 3 | 10 | 200 | 7 | 17 1 | 3 | 30 | 200 | 5 | 15 1 | 3 | 30 | 200 | 6 | 16 1 | 4 | | 300 | 9 | 19 | 0 | 10 | 300 | 8 | 18 (10 rows)
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum'); SELECT * FROM pivout ORDER BY id;
id | val_sum_piv_10 | val_sum_piv_20 | val_sum_piv_30 ----+----------------+----------------+---------------- 0 | 3 | 3 | 1 | 7 | 4 | 11 | 8 | |
DROP FUNCTION IF EXISTS array_add1 (ANYARRAY, ANYELEMENT) CASCADE; CREATE FUNCTION array_add1(ANYARRAY, ANYELEMENT) RETURNS ANYARRAY AS $$ SELECT $1 || $2 $$ LANGUAGE sql STRICT; DROP AGGREGATE IF EXISTS array_accum1 (anyelement); CREATE AGGREGATE array_accum1 (anyelement) ( sfunc = array_add1, stype = anyarray, initcond = '{}' ); DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_accum1'); SELECT * FROM pivout ORDER BY id;
id | val_array_accum1_piv_10 | val_array_accum1_piv_20 | val_array_accum1_piv_30 ----+-------------------------+-------------------------+------------------------- 0 | {1,2} | {3} | {} 1 | {7} | {4} | {5,6} | {8} | {} | {}
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', True); SELECT * FROM pivout ORDER BY id;
id | val_sum_piv_10 | val_sum_piv_20 | val_sum_piv_30 | val_sum_piv_null ----+----------------+----------------+----------------+------------------ 0 | 3 | 3 | | 1 | 7 | 4 | 11 | 9 | 8 | | |
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111'); SELECT * FROM pivout ORDER BY id;
id | val_sum_piv_10 | val_sum_piv_20 | val_sum_piv_30 ----+----------------+----------------+---------------- 0 | 3 | 3 | 111 1 | 7 | 4 | 11 | 8 | 111 | 111
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id,id2', 'piv', 'val'); SELECT * FROM pivout ORDER BY id,id2;
id | id2 | val_avg_piv_10 | val_avg_piv_20 | val_avg_piv_30 ----+-----+----------------+----------------+---------------- 0 | 0 | 1 | | 0 | 1 | 2 | 3 | 1 | 0 | | | 1 | 2 | | 4 | 1 | 3 | 7 | | 5.5 1 | 4 | | | | 0 | 8 | |
\x on
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val'); SELECT * FROM pivout ORDER BY id;
-[ RECORD 1 ]-----------+---- id | 0 val_avg_piv_10_piv2_0 | 1 val_avg_piv_10_piv2_100 | 2 val_avg_piv_10_piv2_200 | val_avg_piv_10_piv2_300 | val_avg_piv_20_piv2_0 | val_avg_piv_20_piv2_100 | 3 val_avg_piv_20_piv2_200 | val_avg_piv_20_piv2_300 | val_avg_piv_30_piv2_0 | val_avg_piv_30_piv2_100 | val_avg_piv_30_piv2_200 | val_avg_piv_30_piv2_300 | -[ RECORD 2 ]-----------+---- id | 1 val_avg_piv_10_piv2_0 | val_avg_piv_10_piv2_100 | val_avg_piv_10_piv2_200 | 7 val_avg_piv_10_piv2_300 | val_avg_piv_20_piv2_0 | val_avg_piv_20_piv2_100 | 4 val_avg_piv_20_piv2_200 | val_avg_piv_20_piv2_300 | val_avg_piv_30_piv2_0 | val_avg_piv_30_piv2_100 | val_avg_piv_30_piv2_200 | 5.5 val_avg_piv_30_piv2_300 | ...
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2'); SELECT * FROM pivout ORDER BY id;
-[ RECORD 1 ]---+----- id | 0 val_avg_piv_10 | 1.5 val_avg_piv_20 | 3 val_avg_piv_30 | val2_avg_piv_10 | 11.5 val2_avg_piv_20 | 13 val2_avg_piv_30 | -[ RECORD 2 ]---+----- id | 1 val_avg_piv_10 | 7 val_avg_piv_20 | 4 val_avg_piv_30 | 5.5 val2_avg_piv_10 | 8.5 val2_avg_piv_20 | 14 val2_avg_piv_30 | 15.5 ...
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum'); SELECT * FROM pivout ORDER BY id;
-[ RECORD 1 ]--+---- id | 0 val_avg_piv_10 | 1.5 val_avg_piv_20 | 3 val_avg_piv_30 | val_sum_piv_10 | 3 val_sum_piv_20 | 3 val_sum_piv_30 | -[ RECORD 2 ]--+---- id | 1 val_avg_piv_10 | 7 val_avg_piv_20 | 4 val_avg_piv_30 | 5.5 val_sum_piv_10 | 7 val_sum_piv_20 | 4 val_sum_piv_30 | 11 ...
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'val=avg, val2=sum'); SELECT * FROM pivout ORDER BY id;
-[ RECORD 1 ]---+---- id | 0 val_avg_piv_10 | 1.5 val_avg_piv_20 | 3 val_avg_piv_30 | val2_sum_piv_10 | 23 val2_sum_piv_20 | 13 val2_sum_piv_30 | -[ RECORD 2 ]---+---- id | 1 val_avg_piv_10 | 7 val_avg_piv_20 | 4 val_avg_piv_30 | 5.5 val2_sum_piv_10 | 17 val2_sum_piv_20 | 14 val2_sum_piv_30 | 31 ...
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'val=avg, val2=[avg,sum]'); SELECT * FROM pivout ORDER BY id;
-[ RECORD 1 ]---+----- id | 0 val_avg_piv_10 | 1.5 val_avg_piv_20 | 3 val_avg_piv_30 | val2_avg_piv_10 | 11.5 val2_avg_piv_20 | 13 val2_avg_piv_30 | val2_sum_piv_10 | 23 val2_sum_piv_20 | 13 val2_sum_piv_30 | -[ RECORD 2 ]---+----- id | 1 val_avg_piv_10 | 7 val_avg_piv_20 | 4 val_avg_piv_30 | 5.5 val2_avg_piv_10 | 8.5 val2_avg_piv_20 | 14 val2_avg_piv_30 | 15.5 val2_sum_piv_10 | 17 val2_sum_piv_20 | 14 val2_sum_piv_30 | 31 ...
DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', 'val=avg, val2=[avg,sum]', '111', True); SELECT * FROM pivout ORDER BY id,id2;
-[ RECORD 1 ]--------------+----- id | 0 id2 | 0 val_avg_piv_null_piv2_0 | 111 val_avg_piv_null_piv2_100 | 111 val_avg_piv_null_piv2_200 | 111 val_avg_piv_null_piv2_300 | 111 val_avg_piv_10_piv2_0 | 1 val_avg_piv_10_piv2_100 | 111 val_avg_piv_10_piv2_200 | 111 val_avg_piv_10_piv2_300 | 111 val_avg_piv_20_piv2_0 | 111 val_avg_piv_20_piv2_100 | 111 val_avg_piv_20_piv2_200 | 111 val_avg_piv_20_piv2_300 | 111 val_avg_piv_30_piv2_0 | 111 val_avg_piv_30_piv2_100 | 111 val_avg_piv_30_piv2_200 | 111 val_avg_piv_30_piv2_300 | 111 val2_avg_piv_null_piv2_0 | 111 val2_avg_piv_null_piv2_100 | 111 val2_avg_piv_null_piv2_200 | 111 val2_avg_piv_null_piv2_300 | 111 val2_avg_piv_10_piv2_0 | 11 val2_avg_piv_10_piv2_100 | 111 val2_avg_piv_10_piv2_200 | 111 val2_avg_piv_10_piv2_300 | 111 val2_avg_piv_20_piv2_0 | 111 val2_avg_piv_20_piv2_100 | 111 val2_avg_piv_20_piv2_200 | 111 val2_avg_piv_20_piv2_300 | 111 val2_avg_piv_30_piv2_0 | 111 val2_avg_piv_30_piv2_100 | 111 val2_avg_piv_30_piv2_200 | 111 val2_avg_piv_30_piv2_300 | 111 val2_sum_piv_null_piv2_0 | 111 val2_sum_piv_null_piv2_100 | 111 val2_sum_piv_null_piv2_200 | 111 val2_sum_piv_null_piv2_300 | 111 val2_sum_piv_10_piv2_0 | 11 val2_sum_piv_10_piv2_100 | 111 val2_sum_piv_10_piv2_200 | 111 val2_sum_piv_10_piv2_300 | 111 val2_sum_piv_20_piv2_0 | 111 val2_sum_piv_20_piv2_100 | 111 val2_sum_piv_20_piv2_200 | 111 val2_sum_piv_20_piv2_300 | 111 val2_sum_piv_30_piv2_0 | 111 val2_sum_piv_30_piv2_100 | 111 val2_sum_piv_30_piv2_200 | 111 val2_sum_piv_30_piv2_300 | 111 -[ RECORD 2 ]--------------+----- id | 0 id2 | 1 val_avg_piv_null_piv2_0 | 111 val_avg_piv_null_piv2_100 | 111 val_avg_piv_null_piv2_200 | 111 val_avg_piv_null_piv2_300 | 111 val_avg_piv_10_piv2_0 | 111 val_avg_piv_10_piv2_100 | 2 val_avg_piv_10_piv2_200 | 111 val_avg_piv_10_piv2_300 | 111 val_avg_piv_20_piv2_0 | 111 val_avg_piv_20_piv2_100 | 3 val_avg_piv_20_piv2_200 | 111 val_avg_piv_20_piv2_300 | 111 val_avg_piv_30_piv2_0 | 111 val_avg_piv_30_piv2_100 | 111 val_avg_piv_30_piv2_200 | 111 val_avg_piv_30_piv2_300 | 111 val2_avg_piv_null_piv2_0 | 111 val2_avg_piv_null_piv2_100 | 111 val2_avg_piv_null_piv2_200 | 111 val2_avg_piv_null_piv2_300 | 111 val2_avg_piv_10_piv2_0 | 111 val2_avg_piv_10_piv2_100 | 12 val2_avg_piv_10_piv2_200 | 111 val2_avg_piv_10_piv2_300 | 111 val2_avg_piv_20_piv2_0 | 111 val2_avg_piv_20_piv2_100 | 13 val2_avg_piv_20_piv2_200 | 111 val2_avg_piv_20_piv2_300 | 111 val2_avg_piv_30_piv2_0 | 111 val2_avg_piv_30_piv2_100 | 111 val2_avg_piv_30_piv2_200 | 111 val2_avg_piv_30_piv2_300 | 111 val2_sum_piv_null_piv2_0 | 111 val2_sum_piv_null_piv2_100 | 111 val2_sum_piv_null_piv2_200 | 111 val2_sum_piv_null_piv2_300 | 111 val2_sum_piv_10_piv2_0 | 111 val2_sum_piv_10_piv2_100 | 12 val2_sum_piv_10_piv2_200 | 111 val2_sum_piv_10_piv2_300 | 111 val2_sum_piv_20_piv2_0 | 111 val2_sum_piv_20_piv2_100 | 13 val2_sum_piv_20_piv2_200 | 111 val2_sum_piv_20_piv2_300 | 111 val2_sum_piv_30_piv2_0 | 111 val2_sum_piv_30_piv2_100 | 111 val2_sum_piv_30_piv2_200 | 111 val2_sum_piv_30_piv2_300 | 111 ...
DROP TABLE IF EXISTS pivout, pivout_dictionary; SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', 'val=avg, val2=[avg,sum]', '111', True, True); SELECT * FROM pivout_dictionary;
__pivot_cid__ | pval | agg | piv | piv2 | col_name ---------------+------+-----+-----+------+------------------------------ __p_1__ | val | avg | | 100 | "val_avg_piv_null_piv2_100" __p_5__ | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100" __p_9__ | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100" __p_12__ | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0" __p_16__ | val2 | avg | | 0 | "val2_avg_piv_null_piv2_0" __p_23__ | val2 | avg | 10 | 300 | "val2_avg_piv_10_piv2_300" __p_27__ | val2 | avg | 20 | 300 | "val2_avg_piv_20_piv2_300" __p_30__ | val2 | avg | 30 | 200 | "val2_avg_piv_30_piv2_200" __p_34__ | val2 | sum | | 200 | "val2_sum_piv_null_piv2_200" __p_38__ | val2 | sum | 10 | 200 | "val2_sum_piv_10_piv2_200" __p_41__ | val2 | sum | 20 | 100 | "val2_sum_piv_20_piv2_100" __p_45__ | val2 | sum | 30 | 100 | "val2_sum_piv_30_piv2_100" __p_2__ | val | avg | | 200 | "val_avg_piv_null_piv2_200" __p_6__ | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200" __p_11__ | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300" __p_15__ | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300" __p_19__ | val2 | avg | | 300 | "val2_avg_piv_null_piv2_300" __p_20__ | val2 | avg | 10 | 0 | "val2_avg_piv_10_piv2_0" __p_24__ | val2 | avg | 20 | 0 | "val2_avg_piv_20_piv2_0" __p_28__ | val2 | avg | 30 | 0 | "val2_avg_piv_30_piv2_0" __p_33__ | val2 | sum | | 100 | "val2_sum_piv_null_piv2_100" __p_37__ | val2 | sum | 10 | 100 | "val2_sum_piv_10_piv2_100" __p_42__ | val2 | sum | 20 | 200 | "val2_sum_piv_20_piv2_200" __p_46__ | val2 | sum | 30 | 200 | "val2_sum_piv_30_piv2_200" __p_3__ | val | avg | | 300 | "val_avg_piv_null_piv2_300" __p_7__ | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300" __p_10__ | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200" __p_14__ | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200" __p_18__ | val2 | avg | | 200 | "val2_avg_piv_null_piv2_200" __p_21__ | val2 | avg | 10 | 100 | "val2_avg_piv_10_piv2_100" __p_25__ | val2 | avg | 20 | 100 | "val2_avg_piv_20_piv2_100" __p_29__ | val2 | avg | 30 | 100 | "val2_avg_piv_30_piv2_100" __p_32__ | val2 | sum | | 0 | "val2_sum_piv_null_piv2_0" __p_36__ | val2 | sum | 10 | 0 | "val2_sum_piv_10_piv2_0" __p_43__ | val2 | sum | 20 | 300 | "val2_sum_piv_20_piv2_300" __p_47__ | val2 | sum | 30 | 300 | "val2_sum_piv_30_piv2_300" __p_0__ | val | avg | | 0 | "val_avg_piv_null_piv2_0" __p_4__ | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0" __p_8__ | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0" __p_13__ | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100" __p_17__ | val2 | avg | | 100 | "val2_avg_piv_null_piv2_100" __p_22__ | val2 | avg | 10 | 200 | "val2_avg_piv_10_piv2_200" __p_26__ | val2 | avg | 20 | 200 | "val2_avg_piv_20_piv2_200" __p_31__ | val2 | avg | 30 | 300 | "val2_avg_piv_30_piv2_300" __p_35__ | val2 | sum | | 300 | "val2_sum_piv_null_piv2_300" __p_39__ | val2 | sum | 10 | 300 | "val2_sum_piv_10_piv2_300" __p_40__ | val2 | sum | 20 | 0 | "val2_sum_piv_20_piv2_0" __p_44__ | val2 | sum | 30 | 0 | "val2_sum_piv_30_piv2_0" (48 rows)
SELECT * FROM pivout ORDER BY id,id2;
-[ RECORD 1 ]-- id | 0 id2 | 0 __p_0__ | 111 __p_1__ | 111 __p_2__ | 111 __p_3__ | 111 __p_4__ | 1 __p_5__ | 111 __p_6__ | 111 __p_7__ | 111 __p_8__ | 111 __p_9__ | 111 __p_10__ | 111 __p_11__ | 111 __p_12__ | 111 __p_13__ | 111 __p_14__ | 111 __p_15__ | 111 __p_16__ | 111 __p_17__ | 111 __p_18__ | 111 __p_19__ | 111 __p_20__ | 11 __p_21__ | 111 __p_22__ | 111 __p_23__ | 111 __p_24__ | 111 __p_25__ | 111 __p_26__ | 111 __p_27__ | 111 __p_28__ | 111 __p_29__ | 111 __p_30__ | 111 __p_31__ | 111 __p_32__ | 111 __p_33__ | 111 __p_34__ | 111 __p_35__ | 111 __p_36__ | 11 __p_37__ | 111 __p_38__ | 111 __p_39__ | 111 __p_40__ | 111 __p_41__ | 111 __p_42__ | 111 __p_43__ | 111 __p_44__ | 111 __p_45__ | 111 __p_46__ | 111 __p_47__ | 111 -[ RECORD 2 ]-- id | 0 id2 | 1 __p_0__ | 111 __p_1__ | 111 __p_2__ | 111 __p_3__ | 111 __p_4__ | 111 __p_5__ | 2 __p_6__ | 111 __p_7__ | 111 __p_8__ | 111 __p_9__ | 3 __p_10__ | 111 __p_11__ | 111 __p_12__ | 111 __p_13__ | 111 __p_14__ | 111 __p_15__ | 111 __p_16__ | 111 __p_17__ | 111 __p_18__ | 111 __p_19__ | 111 __p_20__ | 111 __p_21__ | 12 __p_22__ | 111 __p_23__ | 111 __p_24__ | 111 __p_25__ | 13 __p_26__ | 111 __p_27__ | 111 __p_28__ | 111 __p_29__ | 111 __p_30__ | 111 __p_31__ | 111 __p_32__ | 111 __p_33__ | 111 __p_34__ | 111 __p_35__ | 111 __p_36__ | 111 __p_37__ | 12 __p_38__ | 111 __p_39__ | 111 __p_40__ | 111 __p_41__ | 13 __p_42__ | 111 __p_43__ | 111 __p_44__ | 111 __p_45__ | 111 __p_46__ | 111 __p_47__ | 111 ...
[1] https://www.postgresql.org/docs/8.2/static/functions-aggregate.html
[2] https://www.postgresql.org/docs/8.2/static/sql-createaggregate.html