digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (1)";
2 [labelType="html" label="<br><b>ColumnarToRow</b><br><br>"];
}
3 [labelType="html" label="<br><b>InMemoryTableScan</b><br><br>"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (2)";
5 [labelType="html" label="<br><b>Sort</b><br><br>"];
}
6 [labelType="html" label="<br><b>Exchange</b><br><br>"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (1)";
8 [labelType="html" label="<br><b>Project</b><br><br>"];
9 [labelType="html" label="<br><b>Filter</b><br><br>"];
}
10 [labelType="html" label="<br><b>Scan csv </b><br><br>"];
2->0;
3->2;
5->3;
6->5;
8->6;
9->8;
10->9;
}
11
CollectLimit 1000000
ColumnarToRow
WholeStageCodegen (1)
InMemoryTableScan [year#94257890, turnover#94257940, days_hold#94257972]
Sort [year#94257890 ASC NULLS FIRST], true, 0
WholeStageCodegen (2)
Exchange rangepartitioning(year#94257890 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7525604]
Project [CASE WHEN ((year#94257723 = NA) OR (year#94257723 = null)) THEN null ELSE cast(year#94257723 as float) END AS year#94257890, CASE WHEN ((turnover#94257747 = NA) OR (turnover#94257747 = null)) THEN null ELSE cast(turnover#94257747 as float) END AS turnover#94257940, CASE WHEN ((turnover#94257747 = NA) OR (turnover#94257747 = null)) THEN null ELSE (1.0 / cast(cast(turnover#94257747 as float) as double)) END AS days_hold#94257972]
Filter ((isnotnull(turnover#94257747) AND NOT coalesce(((turnover#94257747 = NA) OR (turnover#94257747 = null)), false)) AND isnotnull(cast(turnover#94257747 as float)))
WholeStageCodegen (1)
FileScan csv [year#94257723,turnover#94257747] Batched: false, DataFilters: [isnotnull(turnover#94257747), NOT coalesce(((turnover#94257747 = NA) OR (turnover#94257747 = nul..., Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/cam1/cam1_score/stats_..., PartitionFilters: [], PushedFilters: [IsNotNull(turnover)], ReadSchema: struct<year:string,turnover:string>
== Physical Plan ==
CollectLimit (9)
+- * ColumnarToRow (8)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- * Sort (7)
+- Exchange (6)
+- * Project (5)
+- * Filter (4)
+- Scan csv (3)
(1) InMemoryTableScan
Output [3]: [year#94257890, turnover#94257940, days_hold#94257972]
Arguments: [year#94257890, turnover#94257940, days_hold#94257972]
(2) InMemoryRelation
Arguments: [year#94257890, turnover#94257940, days_hold#94257972], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94257890 ASC NULLS FIRST], true, 0
+- Exchange rangepartitioning(year#94257890 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7525604]
+- *(1) Project [CASE WHEN ((year#94257723 = NA) OR (year#94257723 = null)) THEN null ELSE cast(year#94257723 as float) END AS year#94257890, CASE WHEN ((turnover#94257747 = NA) OR (turnover#94257747 = null)) THEN null ELSE cast(turnover#94257747 as float) END AS turnover#94257940, CASE WHEN ((turnover#94257747 = NA) OR (turnover#94257747 = null)) THEN null ELSE (1.0 / cast(cast(turnover#94257747 as float) as double)) END AS days_hold#94257972]
+- *(1) Filter ((isnotnull(turnover#94257747) AND NOT coalesce(((turnover#94257747 = NA) OR (turnover#94257747 = null)), false)) AND isnotnull(cast(turnover#94257747 as float)))
+- FileScan csv [year#94257723,turnover#94257747] Batched: false, DataFilters: [isnotnull(turnover#94257747), NOT coalesce(((turnover#94257747 = NA) OR (turnover#94257747 = nul..., Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/cam1/cam1_score/stats_..., PartitionFilters: [], PushedFilters: [IsNotNull(turnover)], ReadSchema: struct<year:string,turnover:string>
,None), [year#94257890 ASC NULLS FIRST]
(3) Scan csv
Output [2]: [year#94257723, turnover#94257747]
Batched: false
Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/cam1/cam1_score/stats_year.csv]
PushedFilters: [IsNotNull(turnover)]
ReadSchema: struct<year:string,turnover:string>
(4) Filter [codegen id : 1]
Input [2]: [year#94257723, turnover#94257747]
Condition : ((isnotnull(turnover#94257747) AND NOT coalesce(((turnover#94257747 = NA) OR (turnover#94257747 = null)), false)) AND isnotnull(cast(turnover#94257747 as float)))
(5) Project [codegen id : 1]
Output [3]: [CASE WHEN ((year#94257723 = NA) OR (year#94257723 = null)) THEN null ELSE cast(year#94257723 as float) END AS year#94257890, CASE WHEN ((turnover#94257747 = NA) OR (turnover#94257747 = null)) THEN null ELSE cast(turnover#94257747 as float) END AS turnover#94257940, CASE WHEN ((turnover#94257747 = NA) OR (turnover#94257747 = null)) THEN null ELSE (1.0 / cast(cast(turnover#94257747 as float) as double)) END AS days_hold#94257972]
Input [2]: [year#94257723, turnover#94257747]
(6) Exchange
Input [3]: [year#94257890, turnover#94257940, days_hold#94257972]
Arguments: rangepartitioning(year#94257890 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7525604]
(7) Sort [codegen id : 2]
Input [3]: [year#94257890, turnover#94257940, days_hold#94257972]
Arguments: [year#94257890 ASC NULLS FIRST], true, 0
(8) ColumnarToRow [codegen id : 1]
Input [3]: [year#94257890, turnover#94257940, days_hold#94257972]
(9) CollectLimit
Input [3]: [year#94257890, turnover#94257940, days_hold#94257972]
Arguments: 1000000