web123456

Spark performance tuning case-multi-table join optimization, reducing shuffle

// Read data once from tableA and put it in temporary table t1 DROP TABLE IF EXISTS temp.tmp_xx_$date_1; CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_1 as select xxx from tableA where xxxx; // Read and convert data from temporary table t1, get temporary table t2 DROP TABLE IF EXISTS temp.tmp_xx_$date_2; CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_2 as SELECT device_id ,c2 FROM ( SELECT device_id ,c2 ,ROW_NUMBER() OVER (PARTITION BY device_id ORDER BY time DESC) AS num FROM ( select device_id ,c2 from temp.tmp_xx_$date_1 where !isempty(c2) )t )t WHERE t.num=1; // Read and convert data from temporary table t1, get temporary table t3 DROP TABLE IF EXISTS temp.tmp_xx_$date_3; CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_3 as SELECT device_id ,c3 FROM ( SELECT device_id ,c3 ,ROW_NUMBER() OVER (PARTITION BY device_id ORDER BY time DESC) AS num FROM ( select device_id ,c3 from temp.tmp_xx_$date_1 where !isempty(c3) )t )t WHERE t.num=1; // Read and convert data from temporary table t1, get temporary table t4 DROP TABLE IF EXISTS temp.tmp_xx_$date_4; CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_4 as SELECT device_id ,c4 FROM ( SELECT device_id ,c6 ,ROW_NUMBER() OVER (PARTITION BY device_id ORDER BY time DESC) AS num FROM ( select device_id ,c4 from temp.tmp_xx_$date_1 where !isempty(c4) )t )t WHERE t.num=1 ... // Read and convert data from temporary table t1, get temporary table t7 DROP TABLE IF EXISTS temp.tmp_xx_$date_7; CREATE TABLE IF NOT EXISTS temp.tmp_xxx_$date_7 as SELECT device_id ,c7 FROM ( SELECT device_id ,c7 ,ROW_NUMBER() OVER (PARTITION BY device_id ORDER BY time DESC) AS num FROM ( select device_id ,c7 from temp.tmp_xx_$date_1 where !isempty(c7) )t )t WHERE t.num=1 //Write the result of t2 associated t3-t7 to the final result table INSERT OVERWRITE TABLE biads.xxxx PARTITION (pt_d='$date') select xxx from temp.tmp_xx_$date_2 left join temp.tmp_xx_$date_3 left join temp.tmp_xx_$date_4 ... left join temp.tmp_xx_$date_7