digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
1 [labelType="html" label="<br><b>InMemoryTableScan</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (3)";
3 [labelType="html" label="<br><b>Sort</b><br><br>"];
}
4 [labelType="html" label="<b>Exchange</b><br><br>number of partitions: 12"];
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 3 ms";
6 [labelType="html" label="<br><b>Project</b><br><br>"];
7 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 12"];
13 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 13"];
}
8 [labelType="html" label="<b>BroadcastExchange</b><br><br>time to broadcast: 1 ms<br>time to build: 1 ms<br>time to collect: 91 ms<br>number of output rows: 12<br>data size: 1024.9 KiB"];
subgraph cluster9 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 11 ms";
10 [labelType="html" label="<br><b>Project</b><br><br>"];
11 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 12"];
}
12 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 12<br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 4.4 KiB"];
14 [labelType="html" label="<b>InMemoryTableScan</b><br><br>number of output rows: 13"];
subgraph cluster15 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 0 ms";
16 [labelType="html" label="<br><b>Project</b><br><br>"];
}
17 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 0"];
1->0;
3->1;
4->3;
6->4;
7->6;
8->7;
10->8;
11->10;
12->11;
13->7;
14->13;
16->14;
17->16;
}
18
CollectLimit 1000000
InMemoryTableScan [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
Sort [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST], true, 0
WholeStageCodegen (3)
Exchange rangepartitioning(sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7533384]
Project [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
BroadcastHashJoin [sector_id#94354208], [sector_id#94160418], Inner, BuildLeft, false
Filter isnotnull(sector_id#94160418)
WholeStageCodegen (2)
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7533377]
Project [CASE WHEN ((sector_id#94354027 = NA) OR (sector_id#94354027 = null)) THEN null ELSE cast(sector_id#94354027 as int) END AS sector_id#94354208, CASE WHEN ((numcos#94354030 = NA) OR (numcos#94354030 = null)) THEN null ELSE cast(numcos#94354030 as float) END AS numcos#94354279, CASE WHEN ((numdates#94354031 = NA) OR (numdates#94354031 = null)) THEN null ELSE cast(numdates#94354031 as float) END AS numdates#94354348, CASE WHEN ((coverage#94354052 = NA) OR (coverage#94354052 = null)) THEN null ELSE cast(coverage#94354052 as float) END AS coverage#94354418, round((CASE WHEN ((numcos#94354030 = NA) OR (numcos#94354030 = null)) THEN null ELSE cast(cast(numcos#94354030 as float) as double) END / CASE WHEN ((coverage#94354052 = NA) OR (coverage#94354052 = null)) THEN null ELSE cast(cast(coverage#94354052 as float) as double) END), 0) AS universe#94354578]
Filter (NOT coalesce(((sector_id#94354027 = NA) OR (sector_id#94354027 = null)), false) AND isnotnull(cast(sector_id#94354027 as int)))
WholeStageCodegen (1)
FileScan csv [sector_id#94354027,numcos#94354030,numdates#94354031,coverage#94354052] Batched: false, DataFilters: [NOT coalesce(((sector_id#94354027 = NA) OR (sector_id#94354027 = null)), false), isnotnull(cast(..., Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/rankviews_history/rank..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,numcos:string,numdates:string,coverage:string>
InMemoryTableScan [sector_id#94160418, sort#94160419, description#94160423], [isnotnull(sector_id#94160418)]
Project [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
WholeStageCodegen (1)
FileScan csv [sector_id#94160398,sort#94160399,description#94160400,universe#94160401] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
== Physical Plan ==
CollectLimit (16)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- * Sort (15)
+- Exchange (14)
+- * Project (13)
+- * BroadcastHashJoin Inner BuildLeft (12)
:- BroadcastExchange (6)
: +- * Project (5)
: +- * Filter (4)
: +- Scan csv (3)
+- * Filter (11)
+- InMemoryTableScan (7)
+- InMemoryRelation (8)
+- * Project (10)
+- Scan csv (9)
(1) InMemoryTableScan
Output [7]: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
Arguments: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
(2) InMemoryRelation
Arguments: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(3) Sort [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST], true, 0
+- Exchange rangepartitioning(sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7533384]
+- *(2) Project [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
+- *(2) BroadcastHashJoin [sector_id#94354208], [sector_id#94160418], Inner, BuildLeft, false
:- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7533377]
: +- *(1) Project [CASE WHEN ((sector_id#94354027 = NA) OR (sector_id#94354027 = null)) THEN null ELSE cast(sector_id#94354027 as int) END AS sector_id#94354208, CASE WHEN ((numcos#94354030 = NA) OR (numcos#94354030 = null)) THEN null ELSE cast(numcos#94354030 as float) END AS numcos#94354279, CASE WHEN ((numdates#94354031 = NA) OR (numdates#94354031 = null)) THEN null ELSE cast(numdates#94354031 as float) END AS numdates#94354348, CASE WHEN ((coverage#94354052 = NA) OR (coverage#94354052 = null)) THEN null ELSE cast(coverage#94354052 as float) END AS coverage#94354418, round((CASE WHEN ((numcos#94354030 = NA) OR (numcos#94354030 = null)) THEN null ELSE cast(cast(numcos#94354030 as float) as double) END / CASE WHEN ((coverage#94354052 = NA) OR (coverage#94354052 = null)) THEN null ELSE cast(cast(coverage#94354052 as float) as double) END), 0) AS universe#94354578]
: +- *(1) Filter (NOT coalesce(((sector_id#94354027 = NA) OR (sector_id#94354027 = null)), false) AND isnotnull(cast(sector_id#94354027 as int)))
: +- FileScan csv [sector_id#94354027,numcos#94354030,numdates#94354031,coverage#94354052] Batched: false, DataFilters: [NOT coalesce(((sector_id#94354027 = NA) OR (sector_id#94354027 = null)), false), isnotnull(cast(..., Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/rankviews_history/rank..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,numcos:string,numdates:string,coverage:string>
+- *(2) Filter isnotnull(sector_id#94160418)
+- InMemoryTableScan [sector_id#94160418, sort#94160419, description#94160423], [isnotnull(sector_id#94160418)]
+- InMemoryRelation [sector_id#94160418, sort#94160419, description#94160423, universe#94160424], StorageLevel(disk, memory, deserialized, 1 replicas)
+- *(1) Project [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
+- FileScan csv [sector_id#94160398,sort#94160399,description#94160400,universe#94160401] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
,None), [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST]
(3) Scan csv
Output [4]: [sector_id#94354027, numcos#94354030, numdates#94354031, coverage#94354052]
Batched: false
Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/rankviews_history/rankviews/stats_sector_id.csv]
ReadSchema: struct<sector_id:string,numcos:string,numdates:string,coverage:string>
(4) Filter [codegen id : 1]
Input [4]: [sector_id#94354027, numcos#94354030, numdates#94354031, coverage#94354052]
Condition : (NOT coalesce(((sector_id#94354027 = NA) OR (sector_id#94354027 = null)), false) AND isnotnull(cast(sector_id#94354027 as int)))
(5) Project [codegen id : 1]
Output [5]: [CASE WHEN ((sector_id#94354027 = NA) OR (sector_id#94354027 = null)) THEN null ELSE cast(sector_id#94354027 as int) END AS sector_id#94354208, CASE WHEN ((numcos#94354030 = NA) OR (numcos#94354030 = null)) THEN null ELSE cast(numcos#94354030 as float) END AS numcos#94354279, CASE WHEN ((numdates#94354031 = NA) OR (numdates#94354031 = null)) THEN null ELSE cast(numdates#94354031 as float) END AS numdates#94354348, CASE WHEN ((coverage#94354052 = NA) OR (coverage#94354052 = null)) THEN null ELSE cast(coverage#94354052 as float) END AS coverage#94354418, round((CASE WHEN ((numcos#94354030 = NA) OR (numcos#94354030 = null)) THEN null ELSE cast(cast(numcos#94354030 as float) as double) END / CASE WHEN ((coverage#94354052 = NA) OR (coverage#94354052 = null)) THEN null ELSE cast(cast(coverage#94354052 as float) as double) END), 0) AS universe#94354578]
Input [4]: [sector_id#94354027, numcos#94354030, numdates#94354031, coverage#94354052]
(6) BroadcastExchange
Input [5]: [sector_id#94354208, numcos#94354279, numdates#94354348, coverage#94354418, universe#94354578]
Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7533377]
(7) InMemoryTableScan
Output [3]: [sector_id#94160418, sort#94160419, description#94160423]
Arguments: [sector_id#94160418, sort#94160419, description#94160423], [isnotnull(sector_id#94160418)]
(8) InMemoryRelation
Arguments: [sector_id#94160418, sort#94160419, description#94160423, universe#94160424], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
+- FileScan csv [sector_id#94160398,sort#94160399,description#94160400,universe#94160401] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
,None)
(9) Scan csv
Output [4]: [sector_id#94160398, sort#94160399, description#94160400, universe#94160401]
Batched: false
Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv]
ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
(10) Project [codegen id : 1]
Output [4]: [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
Input [4]: [sector_id#94160398, sort#94160399, description#94160400, universe#94160401]
(11) Filter
Input [3]: [sector_id#94160418, sort#94160419, description#94160423]
Condition : isnotnull(sector_id#94160418)
(12) BroadcastHashJoin [codegen id : 2]
Left keys [1]: [sector_id#94354208]
Right keys [1]: [sector_id#94160418]
Join condition: None
(13) Project [codegen id : 2]
Output [7]: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
Input [8]: [sector_id#94354208, numcos#94354279, numdates#94354348, coverage#94354418, universe#94354578, sector_id#94160418, sort#94160419, description#94160423]
(14) Exchange
Input [7]: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
Arguments: rangepartitioning(sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7533384]
(15) Sort [codegen id : 3]
Input [7]: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
Arguments: [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST], true, 0
(16) CollectLimit
Input [7]: [sector_id#94354208, numcos#94354279, numdates#94354348, sort#94160419, description#94160423, universe#94354578, coverage#94354418]
Arguments: 1000000