== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94328492, turnover#94328540, days_hold#94328575] Arguments: [year#94328492, turnover#94328540, days_hold#94328575] (2) InMemoryRelation Arguments: [year#94328492, turnover#94328540, days_hold#94328575], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94328492 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94328492 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7531340] +- *(1) Project [year#94328492, turnover#94328540, (1.0 / cast(turnover#94328540 as double)) AS days_hold#94328575] +- *(1) Filter isnotnull(turnover#94328540) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94328540, year#94328492], [isnotnull(turnover#94328540)] +- InMemoryRelation [year#94328492, retIC#94328494, resretIC#94328496, numcos#94328498, numdates#94328500, annual_bmret#94328502, annual_ret#94328504, std_ret#94328506, Sharpe_ret#94328508, PctPos_ret#94328510, TR_ret#94328512, IR_ret#94328514, annual_resret#94328516, std_resret#94328518, Sharpe_resret#94328520, PctPos_resret#94328522, TR_resret#94328524, IR_resret#94328526, annual_retnet#94328528, std_retnet#94328530, Sharpe_retnet#94328532, PctPos_retnet#94328534, TR_retnet#94328536, IR_retnet#94328538, turnover#94328540], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94328260 = NA) OR (year#94328260 = null)) THEN null ELSE cast(year#94328260 as float) END AS year#94328492, CASE WHEN ((retIC#94328262 = NA) OR (retIC#94328262 = null)) THEN null ELSE cast(retIC#94328262 as float) END AS retIC#94328494, CASE WHEN ((resretIC#94328263 = NA) OR (resretIC#94328263 = null)) THEN null ELSE cast(resretIC#94328263 as float) END AS resretIC#94328496, CASE WHEN ((numcos#94328266 = NA) OR (numcos#94328266 = null)) THEN null ELSE cast(numcos#94328266 as float) END AS numcos#94328498, CASE WHEN ((numdates#94328268 = NA) OR (numdates#94328268 = null)) THEN null ELSE cast(numdates#94328268 as int) END AS numdates#94328500, CASE WHEN ((annual_bmret#94328270 = NA) OR (annual_bmret#94328270 = null)) THEN null ELSE cast(annual_bmret#94328270 as float) END AS annual_bmret#94328502, CASE WHEN ((annual_ret#94328271 = NA) OR (annual_ret#94328271 = null)) THEN null ELSE cast(annual_ret#94328271 as float) END AS annual_ret#94328504, CASE WHEN ((std_ret#94328274 = NA) OR (std_ret#94328274 = null)) THEN null ELSE cast(std_ret#94328274 as float) END AS std_ret#94328506, CASE WHEN ((Sharpe_ret#94328276 = NA) OR (Sharpe_ret#94328276 = null)) THEN null ELSE cast(Sharpe_ret#94328276 as float) END AS Sharpe_ret#94328508, CASE WHEN ((PctPos_ret#94328278 = NA) OR (PctPos_ret#94328278 = null)) THEN null ELSE cast(PctPos_ret#94328278 as float) END AS PctPos_ret#94328510, CASE WHEN ((TR_ret#94328280 = NA) OR (TR_ret#94328280 = null)) THEN null ELSE cast(TR_ret#94328280 as float) END AS TR_ret#94328512, CASE WHEN ((IR_ret#94328283 = NA) OR (IR_ret#94328283 = null)) THEN null ELSE cast(IR_ret#94328283 as float) END AS IR_ret#94328514, CASE WHEN ((annual_resret#94328285 = NA) OR (annual_resret#94328285 = null)) THEN null ELSE cast(annual_resret#94328285 as float) END AS annual_resret#94328516, CASE WHEN ((std_resret#94328287 = NA) OR (std_resret#94328287 = null)) THEN null ELSE cast(std_resret#94328287 as float) END AS std_resret#94328518, CASE WHEN ((Sharpe_resret#94328289 = NA) OR (Sharpe_resret#94328289 = null)) THEN null ELSE cast(Sharpe_resret#94328289 as float) END AS Sharpe_resret#94328520, CASE WHEN ((PctPos_resret#94328291 = NA) OR (PctPos_resret#94328291 = null)) THEN null ELSE cast(PctPos_resret#94328291 as float) END AS PctPos_resret#94328522, CASE WHEN ((TR_resret#94328293 = NA) OR (TR_resret#94328293 = null)) THEN null ELSE cast(TR_resret#94328293 as float) END AS TR_resret#94328524, CASE WHEN ((IR_resret#94328295 = NA) OR (IR_resret#94328295 = null)) THEN null ELSE cast(IR_resret#94328295 as float) END AS IR_resret#94328526, CASE WHEN ((annual_retnet#94328297 = NA) OR (annual_retnet#94328297 = null)) THEN null ELSE cast(annual_retnet#94328297 as float) END AS annual_retnet#94328528, CASE WHEN ((std_retnet#94328299 = NA) OR (std_retnet#94328299 = null)) THEN null ELSE cast(std_retnet#94328299 as float) END AS std_retnet#94328530, CASE WHEN ((Sharpe_retnet#94328301 = NA) OR (Sharpe_retnet#94328301 = null)) THEN null ELSE cast(Sharpe_retnet#94328301 as float) END AS Sharpe_retnet#94328532, CASE WHEN ((PctPos_retnet#94328303 = NA) OR (PctPos_retnet#94328303 = null)) THEN null ELSE cast(PctPos_retnet#94328303 as float) END AS PctPos_retnet#94328534, CASE WHEN ((TR_retnet#94328305 = NA) OR (TR_retnet#94328305 = null)) THEN null ELSE cast(TR_retnet#94328305 as float) END AS TR_retnet#94328536, CASE WHEN ((IR_retnet#94328307 = NA) OR (IR_retnet#94328307 = null)) THEN null ELSE cast(IR_retnet#94328307 as float) END AS IR_retnet#94328538, CASE WHEN ((turnover#94328310 = NA) OR (turnover#94328310 = null)) THEN null ELSE cast(turnover#94328310 as float) END AS turnover#94328540] +- FileScan csv [year#94328260,retIC#94328262,resretIC#94328263,numcos#94328266,numdates#94328268,annual_bmret#94328270,annual_ret#94328271,std_ret#94328274,Sharpe_ret#94328276,PctPos_ret#94328278,TR_ret#94328280,IR_ret#94328283,annual_resret#94328285,std_resret#94328287,Sharpe_resret#94328289,PctPos_resret#94328291,TR_resret#94328293,IR_resret#94328295,annual_retnet#94328297,std_retnet#94328299,Sharpe_retnet#94328301,PctPos_retnet#94328303,TR_retnet#94328305,IR_retnet#94328307,turnover#94328310] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/estimize_signal_histor..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94328492 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94328540, year#94328492] Arguments: [turnover#94328540, year#94328492], [isnotnull(turnover#94328540)] (4) InMemoryRelation Arguments: [year#94328492, retIC#94328494, resretIC#94328496, numcos#94328498, numdates#94328500, annual_bmret#94328502, annual_ret#94328504, std_ret#94328506, Sharpe_ret#94328508, PctPos_ret#94328510, TR_ret#94328512, IR_ret#94328514, annual_resret#94328516, std_resret#94328518, Sharpe_resret#94328520, PctPos_resret#94328522, TR_resret#94328524, IR_resret#94328526, annual_retnet#94328528, std_retnet#94328530, Sharpe_retnet#94328532, PctPos_retnet#94328534, TR_retnet#94328536, IR_retnet#94328538, turnover#94328540], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94328260 = NA) OR (year#94328260 = null)) THEN null ELSE cast(year#94328260 as float) END AS year#94328492, CASE WHEN ((retIC#94328262 = NA) OR (retIC#94328262 = null)) THEN null ELSE cast(retIC#94328262 as float) END AS retIC#94328494, CASE WHEN ((resretIC#94328263 = NA) OR (resretIC#94328263 = null)) THEN null ELSE cast(resretIC#94328263 as float) END AS resretIC#94328496, CASE WHEN ((numcos#94328266 = NA) OR (numcos#94328266 = null)) THEN null ELSE cast(numcos#94328266 as float) END AS numcos#94328498, CASE WHEN ((numdates#94328268 = NA) OR (numdates#94328268 = null)) THEN null ELSE cast(numdates#94328268 as int) END AS numdates#94328500, CASE WHEN ((annual_bmret#94328270 = NA) OR (annual_bmret#94328270 = null)) THEN null ELSE cast(annual_bmret#94328270 as float) END AS annual_bmret#94328502, CASE WHEN ((annual_ret#94328271 = NA) OR (annual_ret#94328271 = null)) THEN null ELSE cast(annual_ret#94328271 as float) END AS annual_ret#94328504, CASE WHEN ((std_ret#94328274 = NA) OR (std_ret#94328274 = null)) THEN null ELSE cast(std_ret#94328274 as float) END AS std_ret#94328506, CASE WHEN ((Sharpe_ret#94328276 = NA) OR (Sharpe_ret#94328276 = null)) THEN null ELSE cast(Sharpe_ret#94328276 as float) END AS Sharpe_ret#94328508, CASE WHEN ((PctPos_ret#94328278 = NA) OR (PctPos_ret#94328278 = null)) THEN null ELSE cast(PctPos_ret#94328278 as float) END AS PctPos_ret#94328510, CASE WHEN ((TR_ret#94328280 = NA) OR (TR_ret#94328280 = null)) THEN null ELSE cast(TR_ret#94328280 as float) END AS TR_ret#94328512, CASE WHEN ((IR_ret#94328283 = NA) OR (IR_ret#94328283 = null)) THEN null ELSE cast(IR_ret#94328283 as float) END AS IR_ret#94328514, CASE WHEN ((annual_resret#94328285 = NA) OR (annual_resret#94328285 = null)) THEN null ELSE cast(annual_resret#94328285 as float) END AS annual_resret#94328516, CASE WHEN ((std_resret#94328287 = NA) OR (std_resret#94328287 = null)) THEN null ELSE cast(std_resret#94328287 as float) END AS std_resret#94328518, CASE WHEN ((Sharpe_resret#94328289 = NA) OR (Sharpe_resret#94328289 = null)) THEN null ELSE cast(Sharpe_resret#94328289 as float) END AS Sharpe_resret#94328520, CASE WHEN ((PctPos_resret#94328291 = NA) OR (PctPos_resret#94328291 = null)) THEN null ELSE cast(PctPos_resret#94328291 as float) END AS PctPos_resret#94328522, CASE WHEN ((TR_resret#94328293 = NA) OR (TR_resret#94328293 = null)) THEN null ELSE cast(TR_resret#94328293 as float) END AS TR_resret#94328524, CASE WHEN ((IR_resret#94328295 = NA) OR (IR_resret#94328295 = null)) THEN null ELSE cast(IR_resret#94328295 as float) END AS IR_resret#94328526, CASE WHEN ((annual_retnet#94328297 = NA) OR (annual_retnet#94328297 = null)) THEN null ELSE cast(annual_retnet#94328297 as float) END AS annual_retnet#94328528, CASE WHEN ((std_retnet#94328299 = NA) OR (std_retnet#94328299 = null)) THEN null ELSE cast(std_retnet#94328299 as float) END AS std_retnet#94328530, CASE WHEN ((Sharpe_retnet#94328301 = NA) OR (Sharpe_retnet#94328301 = null)) THEN null ELSE cast(Sharpe_retnet#94328301 as float) END AS Sharpe_retnet#94328532, CASE WHEN ((PctPos_retnet#94328303 = NA) OR (PctPos_retnet#94328303 = null)) THEN null ELSE cast(PctPos_retnet#94328303 as float) END AS PctPos_retnet#94328534, CASE WHEN ((TR_retnet#94328305 = NA) OR (TR_retnet#94328305 = null)) THEN null ELSE cast(TR_retnet#94328305 as float) END AS TR_retnet#94328536, CASE WHEN ((IR_retnet#94328307 = NA) OR (IR_retnet#94328307 = null)) THEN null ELSE cast(IR_retnet#94328307 as float) END AS IR_retnet#94328538, CASE WHEN ((turnover#94328310 = NA) OR (turnover#94328310 = null)) THEN null ELSE cast(turnover#94328310 as float) END AS turnover#94328540] +- FileScan csv [year#94328260,retIC#94328262,resretIC#94328263,numcos#94328266,numdates#94328268,annual_bmret#94328270,annual_ret#94328271,std_ret#94328274,Sharpe_ret#94328276,PctPos_ret#94328278,TR_ret#94328280,IR_ret#94328283,annual_resret#94328285,std_resret#94328287,Sharpe_resret#94328289,PctPos_resret#94328291,TR_resret#94328293,IR_resret#94328295,annual_retnet#94328297,std_retnet#94328299,Sharpe_retnet#94328301,PctPos_retnet#94328303,TR_retnet#94328305,IR_retnet#94328307,turnover#94328310] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/estimize_signal_histor..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94328260, retIC#94328262, resretIC#94328263, numcos#94328266, numdates#94328268, annual_bmret#94328270, annual_ret#94328271, std_ret#94328274, Sharpe_ret#94328276, PctPos_ret#94328278, TR_ret#94328280, IR_ret#94328283, annual_resret#94328285, std_resret#94328287, Sharpe_resret#94328289, PctPos_resret#94328291, TR_resret#94328293, IR_resret#94328295, annual_retnet#94328297, std_retnet#94328299, Sharpe_retnet#94328301, PctPos_retnet#94328303, TR_retnet#94328305, IR_retnet#94328307, turnover#94328310] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/estimize_signal_history/estimizesignal_postearnings/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94328260 = NA) OR (year#94328260 = null)) THEN null ELSE cast(year#94328260 as float) END AS year#94328492, CASE WHEN ((retIC#94328262 = NA) OR (retIC#94328262 = null)) THEN null ELSE cast(retIC#94328262 as float) END AS retIC#94328494, CASE WHEN ((resretIC#94328263 = NA) OR (resretIC#94328263 = null)) THEN null ELSE cast(resretIC#94328263 as float) END AS resretIC#94328496, CASE WHEN ((numcos#94328266 = NA) OR (numcos#94328266 = null)) THEN null ELSE cast(numcos#94328266 as float) END AS numcos#94328498, CASE WHEN ((numdates#94328268 = NA) OR (numdates#94328268 = null)) THEN null ELSE cast(numdates#94328268 as int) END AS numdates#94328500, CASE WHEN ((annual_bmret#94328270 = NA) OR (annual_bmret#94328270 = null)) THEN null ELSE cast(annual_bmret#94328270 as float) END AS annual_bmret#94328502, CASE WHEN ((annual_ret#94328271 = NA) OR (annual_ret#94328271 = null)) THEN null ELSE cast(annual_ret#94328271 as float) END AS annual_ret#94328504, CASE WHEN ((std_ret#94328274 = NA) OR (std_ret#94328274 = null)) THEN null ELSE cast(std_ret#94328274 as float) END AS std_ret#94328506, CASE WHEN ((Sharpe_ret#94328276 = NA) OR (Sharpe_ret#94328276 = null)) THEN null ELSE cast(Sharpe_ret#94328276 as float) END AS Sharpe_ret#94328508, CASE WHEN ((PctPos_ret#94328278 = NA) OR (PctPos_ret#94328278 = null)) THEN null ELSE cast(PctPos_ret#94328278 as float) END AS PctPos_ret#94328510, CASE WHEN ((TR_ret#94328280 = NA) OR (TR_ret#94328280 = null)) THEN null ELSE cast(TR_ret#94328280 as float) END AS TR_ret#94328512, CASE WHEN ((IR_ret#94328283 = NA) OR (IR_ret#94328283 = null)) THEN null ELSE cast(IR_ret#94328283 as float) END AS IR_ret#94328514, CASE WHEN ((annual_resret#94328285 = NA) OR (annual_resret#94328285 = null)) THEN null ELSE cast(annual_resret#94328285 as float) END AS annual_resret#94328516, CASE WHEN ((std_resret#94328287 = NA) OR (std_resret#94328287 = null)) THEN null ELSE cast(std_resret#94328287 as float) END AS std_resret#94328518, CASE WHEN ((Sharpe_resret#94328289 = NA) OR (Sharpe_resret#94328289 = null)) THEN null ELSE cast(Sharpe_resret#94328289 as float) END AS Sharpe_resret#94328520, CASE WHEN ((PctPos_resret#94328291 = NA) OR (PctPos_resret#94328291 = null)) THEN null ELSE cast(PctPos_resret#94328291 as float) END AS PctPos_resret#94328522, CASE WHEN ((TR_resret#94328293 = NA) OR (TR_resret#94328293 = null)) THEN null ELSE cast(TR_resret#94328293 as float) END AS TR_resret#94328524, CASE WHEN ((IR_resret#94328295 = NA) OR (IR_resret#94328295 = null)) THEN null ELSE cast(IR_resret#94328295 as float) END AS IR_resret#94328526, CASE WHEN ((annual_retnet#94328297 = NA) OR (annual_retnet#94328297 = null)) THEN null ELSE cast(annual_retnet#94328297 as float) END AS annual_retnet#94328528, CASE WHEN ((std_retnet#94328299 = NA) OR (std_retnet#94328299 = null)) THEN null ELSE cast(std_retnet#94328299 as float) END AS std_retnet#94328530, CASE WHEN ((Sharpe_retnet#94328301 = NA) OR (Sharpe_retnet#94328301 = null)) THEN null ELSE cast(Sharpe_retnet#94328301 as float) END AS Sharpe_retnet#94328532, CASE WHEN ((PctPos_retnet#94328303 = NA) OR (PctPos_retnet#94328303 = null)) THEN null ELSE cast(PctPos_retnet#94328303 as float) END AS PctPos_retnet#94328534, CASE WHEN ((TR_retnet#94328305 = NA) OR (TR_retnet#94328305 = null)) THEN null ELSE cast(TR_retnet#94328305 as float) END AS TR_retnet#94328536, CASE WHEN ((IR_retnet#94328307 = NA) OR (IR_retnet#94328307 = null)) THEN null ELSE cast(IR_retnet#94328307 as float) END AS IR_retnet#94328538, CASE WHEN ((turnover#94328310 = NA) OR (turnover#94328310 = null)) THEN null ELSE cast(turnover#94328310 as float) END AS turnover#94328540] Input [25]: [year#94328260, retIC#94328262, resretIC#94328263, numcos#94328266, numdates#94328268, annual_bmret#94328270, annual_ret#94328271, std_ret#94328274, Sharpe_ret#94328276, PctPos_ret#94328278, TR_ret#94328280, IR_ret#94328283, annual_resret#94328285, std_resret#94328287, Sharpe_resret#94328289, PctPos_resret#94328291, TR_resret#94328293, IR_resret#94328295, annual_retnet#94328297, std_retnet#94328299, Sharpe_retnet#94328301, PctPos_retnet#94328303, TR_retnet#94328305, IR_retnet#94328307, turnover#94328310] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94328540, year#94328492] (8) Filter [codegen id : 1] Input [2]: [turnover#94328540, year#94328492] Condition : isnotnull(turnover#94328540) (9) Project [codegen id : 1] Output [3]: [year#94328492, turnover#94328540, (1.0 / cast(turnover#94328540 as double)) AS days_hold#94328575] Input [2]: [turnover#94328540, year#94328492] (10) Exchange Input [3]: [year#94328492, turnover#94328540, days_hold#94328575] Arguments: rangepartitioning(year#94328492 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7531340] (11) Sort [codegen id : 2] Input [3]: [year#94328492, turnover#94328540, days_hold#94328575] Arguments: [year#94328492 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94328492, turnover#94328540, days_hold#94328575] (13) CollectLimit Input [3]: [year#94328492, turnover#94328540, days_hold#94328575] Arguments: 1000000