digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
1 [labelType="html" label="<br><b>InMemoryTableScan</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (3)";
3 [labelType="html" label="<br><b>Sort</b><br><br>"];
}
4 [labelType="html" label="<b>Exchange</b><br><br>number of partitions: 13"];
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 15 ms";
6 [labelType="html" label="<br><b>Project</b><br><br>"];
7 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 13"];
8 [labelType="html" label="<br><b>Project</b><br><br>"];
9 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 13"];
}
10 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 13<br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 6.3 KiB"];
11 [labelType="html" label="<b>BroadcastExchange</b><br><br>time to broadcast: 1 ms<br>time to build: 0 ms<br>time to collect: 26 ms<br>number of output rows: 13<br>data size: 1024.9 KiB"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 0 ms";
13 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 13"];
}
14 [labelType="html" label="<b>InMemoryTableScan</b><br><br>number of output rows: 13"];
subgraph cluster15 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 0 ms";
16 [labelType="html" label="<br><b>Project</b><br><br>"];
}
17 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 0"];
1->0;
3->1;
4->3;
6->4;
7->6;
8->7;
9->8;
10->9;
11->7;
13->11;
14->13;
16->14;
17->16;
}
18
CollectLimit 1000000
InMemoryTableScan [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
Sort [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST], true, 0
WholeStageCodegen (3)
Exchange rangepartitioning(sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7535639]
Project [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
BroadcastHashJoin [sector_id#94382736], [sector_id#94160418], Inner, BuildRight, false
Project [CASE WHEN ((sector_id#94382379 = NA) OR (sector_id#94382379 = null)) THEN null ELSE cast(sector_id#94382379 as int) END AS sector_id#94382736, CASE WHEN ((numcos#94382382 = NA) OR (numcos#94382382 = null)) THEN null ELSE cast(numcos#94382382 as float) END AS numcos#94382820, CASE WHEN ((numdates#94382383 = NA) OR (numdates#94382383 = null)) THEN null ELSE cast(numdates#94382383 as float) END AS numdates#94382821, CASE WHEN ((coverage#94382404 = NA) OR (coverage#94382404 = null)) THEN null ELSE cast(coverage#94382404 as float) END AS coverage#94382913, round((CASE WHEN ((numcos#94382382 = NA) OR (numcos#94382382 = null)) THEN null ELSE cast(cast(numcos#94382382 as float) as double) END / CASE WHEN ((coverage#94382404 = NA) OR (coverage#94382404 = null)) THEN null ELSE cast(cast(coverage#94382404 as float) as double) END), 0) AS universe#94383074]
Filter (NOT coalesce(((sector_id#94382379 = NA) OR (sector_id#94382379 = null)), false) AND isnotnull(cast(sector_id#94382379 as int)))
WholeStageCodegen (2)
FileScan csv [sector_id#94382379,numcos#94382382,numdates#94382383,coverage#94382404] Batched: false, DataFilters: [NOT coalesce(((sector_id#94382379 = NA) OR (sector_id#94382379 = null)), false), isnotnull(cast(..., Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/leverage/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,numcos:string,numdates:string,coverage:string>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7535634]
Filter isnotnull(sector_id#94160418)
WholeStageCodegen (1)
InMemoryTableScan [sector_id#94160418, sort#94160419, description#94160423], [isnotnull(sector_id#94160418)]
Project [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
WholeStageCodegen (1)
FileScan csv [sector_id#94160398,sort#94160399,description#94160400,universe#94160401] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
== Physical Plan ==
CollectLimit (16)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- * Sort (15)
+- Exchange (14)
+- * Project (13)
+- * BroadcastHashJoin Inner BuildRight (12)
:- * Project (5)
: +- * Filter (4)
: +- Scan csv (3)
+- BroadcastExchange (11)
+- * Filter (10)
+- InMemoryTableScan (6)
+- InMemoryRelation (7)
+- * Project (9)
+- Scan csv (8)
(1) InMemoryTableScan
Output [7]: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
Arguments: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
(2) InMemoryRelation
Arguments: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(3) Sort [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST], true, 0
+- Exchange rangepartitioning(sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7535639]
+- *(2) Project [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
+- *(2) BroadcastHashJoin [sector_id#94382736], [sector_id#94160418], Inner, BuildRight, false
:- *(2) Project [CASE WHEN ((sector_id#94382379 = NA) OR (sector_id#94382379 = null)) THEN null ELSE cast(sector_id#94382379 as int) END AS sector_id#94382736, CASE WHEN ((numcos#94382382 = NA) OR (numcos#94382382 = null)) THEN null ELSE cast(numcos#94382382 as float) END AS numcos#94382820, CASE WHEN ((numdates#94382383 = NA) OR (numdates#94382383 = null)) THEN null ELSE cast(numdates#94382383 as float) END AS numdates#94382821, CASE WHEN ((coverage#94382404 = NA) OR (coverage#94382404 = null)) THEN null ELSE cast(coverage#94382404 as float) END AS coverage#94382913, round((CASE WHEN ((numcos#94382382 = NA) OR (numcos#94382382 = null)) THEN null ELSE cast(cast(numcos#94382382 as float) as double) END / CASE WHEN ((coverage#94382404 = NA) OR (coverage#94382404 = null)) THEN null ELSE cast(cast(coverage#94382404 as float) as double) END), 0) AS universe#94383074]
: +- *(2) Filter (NOT coalesce(((sector_id#94382379 = NA) OR (sector_id#94382379 = null)), false) AND isnotnull(cast(sector_id#94382379 as int)))
: +- FileScan csv [sector_id#94382379,numcos#94382382,numdates#94382383,coverage#94382404] Batched: false, DataFilters: [NOT coalesce(((sector_id#94382379 = NA) OR (sector_id#94382379 = null)), false), isnotnull(cast(..., Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/leverage/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,numcos:string,numdates:string,coverage:string>
+- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7535634]
+- *(1) Filter isnotnull(sector_id#94160418)
+- InMemoryTableScan [sector_id#94160418, sort#94160419, description#94160423], [isnotnull(sector_id#94160418)]
+- InMemoryRelation [sector_id#94160418, sort#94160419, description#94160423, universe#94160424], StorageLevel(disk, memory, deserialized, 1 replicas)
+- *(1) Project [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
+- FileScan csv [sector_id#94160398,sort#94160399,description#94160400,universe#94160401] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
,None), [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST]
(3) Scan csv
Output [4]: [sector_id#94382379, numcos#94382382, numdates#94382383, coverage#94382404]
Batched: false
Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/leverage/stats_sector_id.csv]
ReadSchema: struct<sector_id:string,numcos:string,numdates:string,coverage:string>
(4) Filter [codegen id : 2]
Input [4]: [sector_id#94382379, numcos#94382382, numdates#94382383, coverage#94382404]
Condition : (NOT coalesce(((sector_id#94382379 = NA) OR (sector_id#94382379 = null)), false) AND isnotnull(cast(sector_id#94382379 as int)))
(5) Project [codegen id : 2]
Output [5]: [CASE WHEN ((sector_id#94382379 = NA) OR (sector_id#94382379 = null)) THEN null ELSE cast(sector_id#94382379 as int) END AS sector_id#94382736, CASE WHEN ((numcos#94382382 = NA) OR (numcos#94382382 = null)) THEN null ELSE cast(numcos#94382382 as float) END AS numcos#94382820, CASE WHEN ((numdates#94382383 = NA) OR (numdates#94382383 = null)) THEN null ELSE cast(numdates#94382383 as float) END AS numdates#94382821, CASE WHEN ((coverage#94382404 = NA) OR (coverage#94382404 = null)) THEN null ELSE cast(coverage#94382404 as float) END AS coverage#94382913, round((CASE WHEN ((numcos#94382382 = NA) OR (numcos#94382382 = null)) THEN null ELSE cast(cast(numcos#94382382 as float) as double) END / CASE WHEN ((coverage#94382404 = NA) OR (coverage#94382404 = null)) THEN null ELSE cast(cast(coverage#94382404 as float) as double) END), 0) AS universe#94383074]
Input [4]: [sector_id#94382379, numcos#94382382, numdates#94382383, coverage#94382404]
(6) InMemoryTableScan
Output [3]: [sector_id#94160418, sort#94160419, description#94160423]
Arguments: [sector_id#94160418, sort#94160419, description#94160423], [isnotnull(sector_id#94160418)]
(7) InMemoryRelation
Arguments: [sector_id#94160418, sort#94160419, description#94160423, universe#94160424], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
+- FileScan csv [sector_id#94160398,sort#94160399,description#94160400,universe#94160401] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
,None)
(8) Scan csv
Output [4]: [sector_id#94160398, sort#94160399, description#94160400, universe#94160401]
Batched: false
Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/curate/curate_sector.csv]
ReadSchema: struct<sector_id:string,sort:string,description:string,universe:string>
(9) Project [codegen id : 1]
Output [4]: [CASE WHEN ((sector_id#94160398 = NA) OR (sector_id#94160398 = null)) THEN null ELSE cast(sector_id#94160398 as int) END AS sector_id#94160418, CASE WHEN (sort#94160399 = null) THEN null ELSE sort#94160399 END AS sort#94160419, CASE WHEN (description#94160400 = null) THEN null ELSE description#94160400 END AS description#94160423, CASE WHEN ((universe#94160401 = NA) OR (universe#94160401 = null)) THEN null ELSE cast(universe#94160401 as int) END AS universe#94160424]
Input [4]: [sector_id#94160398, sort#94160399, description#94160400, universe#94160401]
(10) Filter [codegen id : 1]
Input [3]: [sector_id#94160418, sort#94160419, description#94160423]
Condition : isnotnull(sector_id#94160418)
(11) BroadcastExchange
Input [3]: [sector_id#94160418, sort#94160419, description#94160423]
Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7535634]
(12) BroadcastHashJoin [codegen id : 2]
Left keys [1]: [sector_id#94382736]
Right keys [1]: [sector_id#94160418]
Join condition: None
(13) Project [codegen id : 2]
Output [7]: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
Input [8]: [sector_id#94382736, numcos#94382820, numdates#94382821, coverage#94382913, universe#94383074, sector_id#94160418, sort#94160419, description#94160423]
(14) Exchange
Input [7]: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
Arguments: rangepartitioning(sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7535639]
(15) Sort [codegen id : 3]
Input [7]: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
Arguments: [sort#94160419 ASC NULLS FIRST, description#94160423 ASC NULLS FIRST], true, 0
(16) CollectLimit
Input [7]: [sector_id#94382736, numcos#94382820, numdates#94382821, sort#94160419, description#94160423, universe#94383074, coverage#94382913]
Arguments: 1000000