== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94191798, turnover#94191864, days_hold#94191905] Arguments: [year#94191798, turnover#94191864, days_hold#94191905] (2) InMemoryRelation Arguments: [year#94191798, turnover#94191864, days_hold#94191905], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94191798 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94191798 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7520234] +- *(1) Project [year#94191798, turnover#94191864, (1.0 / cast(turnover#94191864 as double)) AS days_hold#94191905] +- *(1) Filter isnotnull(turnover#94191864) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94191864, year#94191798], [isnotnull(turnover#94191864)] +- InMemoryRelation [year#94191798, retIC#94191799, resretIC#94191800, numcos#94191801, numdates#94191835, annual_bmret#94191836, annual_ret#94191837, std_ret#94191838, Sharpe_ret#94191839, PctPos_ret#94191840, TR_ret#94191841, IR_ret#94191842, annual_resret#94191843, std_resret#94191845, Sharpe_resret#94191847, PctPos_resret#94191849, TR_resret#94191850, IR_resret#94191852, annual_retnet#94191854, std_retnet#94191856, Sharpe_retnet#94191857, PctPos_retnet#94191859, TR_retnet#94191861, IR_retnet#94191863, turnover#94191864], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94191598 = NA) OR (year#94191598 = null)) THEN null ELSE cast(year#94191598 as int) END AS year#94191798, CASE WHEN ((retIC#94191599 = NA) OR (retIC#94191599 = null)) THEN null ELSE cast(retIC#94191599 as float) END AS retIC#94191799, CASE WHEN ((resretIC#94191600 = NA) OR (resretIC#94191600 = null)) THEN null ELSE cast(resretIC#94191600 as float) END AS resretIC#94191800, CASE WHEN ((numcos#94191601 = NA) OR (numcos#94191601 = null)) THEN null ELSE cast(numcos#94191601 as float) END AS numcos#94191801, CASE WHEN ((numdates#94191602 = NA) OR (numdates#94191602 = null)) THEN null ELSE cast(numdates#94191602 as int) END AS numdates#94191835, CASE WHEN ((annual_bmret#94191603 = NA) OR (annual_bmret#94191603 = null)) THEN null ELSE cast(annual_bmret#94191603 as float) END AS annual_bmret#94191836, CASE WHEN ((annual_ret#94191604 = NA) OR (annual_ret#94191604 = null)) THEN null ELSE cast(annual_ret#94191604 as float) END AS annual_ret#94191837, CASE WHEN ((std_ret#94191605 = NA) OR (std_ret#94191605 = null)) THEN null ELSE cast(std_ret#94191605 as float) END AS std_ret#94191838, CASE WHEN ((Sharpe_ret#94191606 = NA) OR (Sharpe_ret#94191606 = null)) THEN null ELSE cast(Sharpe_ret#94191606 as float) END AS Sharpe_ret#94191839, CASE WHEN ((PctPos_ret#94191607 = NA) OR (PctPos_ret#94191607 = null)) THEN null ELSE cast(PctPos_ret#94191607 as float) END AS PctPos_ret#94191840, CASE WHEN ((TR_ret#94191608 = NA) OR (TR_ret#94191608 = null)) THEN null ELSE cast(TR_ret#94191608 as float) END AS TR_ret#94191841, CASE WHEN ((IR_ret#94191609 = NA) OR (IR_ret#94191609 = null)) THEN null ELSE cast(IR_ret#94191609 as float) END AS IR_ret#94191842, CASE WHEN ((annual_resret#94191610 = NA) OR (annual_resret#94191610 = null)) THEN null ELSE cast(annual_resret#94191610 as float) END AS annual_resret#94191843, CASE WHEN ((std_resret#94191611 = NA) OR (std_resret#94191611 = null)) THEN null ELSE cast(std_resret#94191611 as float) END AS std_resret#94191845, CASE WHEN ((Sharpe_resret#94191612 = NA) OR (Sharpe_resret#94191612 = null)) THEN null ELSE cast(Sharpe_resret#94191612 as float) END AS Sharpe_resret#94191847, CASE WHEN ((PctPos_resret#94191613 = NA) OR (PctPos_resret#94191613 = null)) THEN null ELSE cast(PctPos_resret#94191613 as float) END AS PctPos_resret#94191849, CASE WHEN ((TR_resret#94191614 = NA) OR (TR_resret#94191614 = null)) THEN null ELSE cast(TR_resret#94191614 as float) END AS TR_resret#94191850, CASE WHEN ((IR_resret#94191615 = NA) OR (IR_resret#94191615 = null)) THEN null ELSE cast(IR_resret#94191615 as float) END AS IR_resret#94191852, CASE WHEN ((annual_retnet#94191616 = NA) OR (annual_retnet#94191616 = null)) THEN null ELSE cast(annual_retnet#94191616 as float) END AS annual_retnet#94191854, CASE WHEN ((std_retnet#94191617 = NA) OR (std_retnet#94191617 = null)) THEN null ELSE cast(std_retnet#94191617 as float) END AS std_retnet#94191856, CASE WHEN ((Sharpe_retnet#94191618 = NA) OR (Sharpe_retnet#94191618 = null)) THEN null ELSE cast(Sharpe_retnet#94191618 as float) END AS Sharpe_retnet#94191857, CASE WHEN ((PctPos_retnet#94191619 = NA) OR (PctPos_retnet#94191619 = null)) THEN null ELSE cast(PctPos_retnet#94191619 as float) END AS PctPos_retnet#94191859, CASE WHEN ((TR_retnet#94191620 = NA) OR (TR_retnet#94191620 = null)) THEN null ELSE cast(TR_retnet#94191620 as float) END AS TR_retnet#94191861, CASE WHEN ((IR_retnet#94191621 = NA) OR (IR_retnet#94191621 = null)) THEN null ELSE cast(IR_retnet#94191621 as float) END AS IR_retnet#94191863, CASE WHEN ((turnover#94191622 = NA) OR (turnover#94191622 = null)) THEN null ELSE cast(turnover#94191622 as float) END AS turnover#94191864] +- FileScan csv [year#94191598,retIC#94191599,resretIC#94191600,numcos#94191601,numdates#94191602,annual_bmret#94191603,annual_ret#94191604,std_ret#94191605,Sharpe_ret#94191606,PctPos_ret#94191607,TR_ret#94191608,IR_ret#94191609,annual_resret#94191610,std_resret#94191611,Sharpe_resret#94191612,PctPos_resret#94191613,TR_resret#94191614,IR_resret#94191615,annual_retnet#94191616,std_retnet#94191617,Sharpe_retnet#94191618,PctPos_retnet#94191619,TR_retnet#94191620,IR_retnet#94191621,turnover#94191622] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/size/stat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94191798 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94191864, year#94191798] Arguments: [turnover#94191864, year#94191798], [isnotnull(turnover#94191864)] (4) InMemoryRelation Arguments: [year#94191798, retIC#94191799, resretIC#94191800, numcos#94191801, numdates#94191835, annual_bmret#94191836, annual_ret#94191837, std_ret#94191838, Sharpe_ret#94191839, PctPos_ret#94191840, TR_ret#94191841, IR_ret#94191842, annual_resret#94191843, std_resret#94191845, Sharpe_resret#94191847, PctPos_resret#94191849, TR_resret#94191850, IR_resret#94191852, annual_retnet#94191854, std_retnet#94191856, Sharpe_retnet#94191857, PctPos_retnet#94191859, TR_retnet#94191861, IR_retnet#94191863, turnover#94191864], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94191598 = NA) OR (year#94191598 = null)) THEN null ELSE cast(year#94191598 as int) END AS year#94191798, CASE WHEN ((retIC#94191599 = NA) OR (retIC#94191599 = null)) THEN null ELSE cast(retIC#94191599 as float) END AS retIC#94191799, CASE WHEN ((resretIC#94191600 = NA) OR (resretIC#94191600 = null)) THEN null ELSE cast(resretIC#94191600 as float) END AS resretIC#94191800, CASE WHEN ((numcos#94191601 = NA) OR (numcos#94191601 = null)) THEN null ELSE cast(numcos#94191601 as float) END AS numcos#94191801, CASE WHEN ((numdates#94191602 = NA) OR (numdates#94191602 = null)) THEN null ELSE cast(numdates#94191602 as int) END AS numdates#94191835, CASE WHEN ((annual_bmret#94191603 = NA) OR (annual_bmret#94191603 = null)) THEN null ELSE cast(annual_bmret#94191603 as float) END AS annual_bmret#94191836, CASE WHEN ((annual_ret#94191604 = NA) OR (annual_ret#94191604 = null)) THEN null ELSE cast(annual_ret#94191604 as float) END AS annual_ret#94191837, CASE WHEN ((std_ret#94191605 = NA) OR (std_ret#94191605 = null)) THEN null ELSE cast(std_ret#94191605 as float) END AS std_ret#94191838, CASE WHEN ((Sharpe_ret#94191606 = NA) OR (Sharpe_ret#94191606 = null)) THEN null ELSE cast(Sharpe_ret#94191606 as float) END AS Sharpe_ret#94191839, CASE WHEN ((PctPos_ret#94191607 = NA) OR (PctPos_ret#94191607 = null)) THEN null ELSE cast(PctPos_ret#94191607 as float) END AS PctPos_ret#94191840, CASE WHEN ((TR_ret#94191608 = NA) OR (TR_ret#94191608 = null)) THEN null ELSE cast(TR_ret#94191608 as float) END AS TR_ret#94191841, CASE WHEN ((IR_ret#94191609 = NA) OR (IR_ret#94191609 = null)) THEN null ELSE cast(IR_ret#94191609 as float) END AS IR_ret#94191842, CASE WHEN ((annual_resret#94191610 = NA) OR (annual_resret#94191610 = null)) THEN null ELSE cast(annual_resret#94191610 as float) END AS annual_resret#94191843, CASE WHEN ((std_resret#94191611 = NA) OR (std_resret#94191611 = null)) THEN null ELSE cast(std_resret#94191611 as float) END AS std_resret#94191845, CASE WHEN ((Sharpe_resret#94191612 = NA) OR (Sharpe_resret#94191612 = null)) THEN null ELSE cast(Sharpe_resret#94191612 as float) END AS Sharpe_resret#94191847, CASE WHEN ((PctPos_resret#94191613 = NA) OR (PctPos_resret#94191613 = null)) THEN null ELSE cast(PctPos_resret#94191613 as float) END AS PctPos_resret#94191849, CASE WHEN ((TR_resret#94191614 = NA) OR (TR_resret#94191614 = null)) THEN null ELSE cast(TR_resret#94191614 as float) END AS TR_resret#94191850, CASE WHEN ((IR_resret#94191615 = NA) OR (IR_resret#94191615 = null)) THEN null ELSE cast(IR_resret#94191615 as float) END AS IR_resret#94191852, CASE WHEN ((annual_retnet#94191616 = NA) OR (annual_retnet#94191616 = null)) THEN null ELSE cast(annual_retnet#94191616 as float) END AS annual_retnet#94191854, CASE WHEN ((std_retnet#94191617 = NA) OR (std_retnet#94191617 = null)) THEN null ELSE cast(std_retnet#94191617 as float) END AS std_retnet#94191856, CASE WHEN ((Sharpe_retnet#94191618 = NA) OR (Sharpe_retnet#94191618 = null)) THEN null ELSE cast(Sharpe_retnet#94191618 as float) END AS Sharpe_retnet#94191857, CASE WHEN ((PctPos_retnet#94191619 = NA) OR (PctPos_retnet#94191619 = null)) THEN null ELSE cast(PctPos_retnet#94191619 as float) END AS PctPos_retnet#94191859, CASE WHEN ((TR_retnet#94191620 = NA) OR (TR_retnet#94191620 = null)) THEN null ELSE cast(TR_retnet#94191620 as float) END AS TR_retnet#94191861, CASE WHEN ((IR_retnet#94191621 = NA) OR (IR_retnet#94191621 = null)) THEN null ELSE cast(IR_retnet#94191621 as float) END AS IR_retnet#94191863, CASE WHEN ((turnover#94191622 = NA) OR (turnover#94191622 = null)) THEN null ELSE cast(turnover#94191622 as float) END AS turnover#94191864] +- FileScan csv [year#94191598,retIC#94191599,resretIC#94191600,numcos#94191601,numdates#94191602,annual_bmret#94191603,annual_ret#94191604,std_ret#94191605,Sharpe_ret#94191606,PctPos_ret#94191607,TR_ret#94191608,IR_ret#94191609,annual_resret#94191610,std_resret#94191611,Sharpe_resret#94191612,PctPos_resret#94191613,TR_resret#94191614,IR_resret#94191615,annual_retnet#94191616,std_retnet#94191617,Sharpe_retnet#94191618,PctPos_retnet#94191619,TR_retnet#94191620,IR_retnet#94191621,turnover#94191622] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/size/stat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94191598, retIC#94191599, resretIC#94191600, numcos#94191601, numdates#94191602, annual_bmret#94191603, annual_ret#94191604, std_ret#94191605, Sharpe_ret#94191606, PctPos_ret#94191607, TR_ret#94191608, IR_ret#94191609, annual_resret#94191610, std_resret#94191611, Sharpe_resret#94191612, PctPos_resret#94191613, TR_resret#94191614, IR_resret#94191615, annual_retnet#94191616, std_retnet#94191617, Sharpe_retnet#94191618, PctPos_retnet#94191619, TR_retnet#94191620, IR_retnet#94191621, turnover#94191622] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/size/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94191598 = NA) OR (year#94191598 = null)) THEN null ELSE cast(year#94191598 as int) END AS year#94191798, CASE WHEN ((retIC#94191599 = NA) OR (retIC#94191599 = null)) THEN null ELSE cast(retIC#94191599 as float) END AS retIC#94191799, CASE WHEN ((resretIC#94191600 = NA) OR (resretIC#94191600 = null)) THEN null ELSE cast(resretIC#94191600 as float) END AS resretIC#94191800, CASE WHEN ((numcos#94191601 = NA) OR (numcos#94191601 = null)) THEN null ELSE cast(numcos#94191601 as float) END AS numcos#94191801, CASE WHEN ((numdates#94191602 = NA) OR (numdates#94191602 = null)) THEN null ELSE cast(numdates#94191602 as int) END AS numdates#94191835, CASE WHEN ((annual_bmret#94191603 = NA) OR (annual_bmret#94191603 = null)) THEN null ELSE cast(annual_bmret#94191603 as float) END AS annual_bmret#94191836, CASE WHEN ((annual_ret#94191604 = NA) OR (annual_ret#94191604 = null)) THEN null ELSE cast(annual_ret#94191604 as float) END AS annual_ret#94191837, CASE WHEN ((std_ret#94191605 = NA) OR (std_ret#94191605 = null)) THEN null ELSE cast(std_ret#94191605 as float) END AS std_ret#94191838, CASE WHEN ((Sharpe_ret#94191606 = NA) OR (Sharpe_ret#94191606 = null)) THEN null ELSE cast(Sharpe_ret#94191606 as float) END AS Sharpe_ret#94191839, CASE WHEN ((PctPos_ret#94191607 = NA) OR (PctPos_ret#94191607 = null)) THEN null ELSE cast(PctPos_ret#94191607 as float) END AS PctPos_ret#94191840, CASE WHEN ((TR_ret#94191608 = NA) OR (TR_ret#94191608 = null)) THEN null ELSE cast(TR_ret#94191608 as float) END AS TR_ret#94191841, CASE WHEN ((IR_ret#94191609 = NA) OR (IR_ret#94191609 = null)) THEN null ELSE cast(IR_ret#94191609 as float) END AS IR_ret#94191842, CASE WHEN ((annual_resret#94191610 = NA) OR (annual_resret#94191610 = null)) THEN null ELSE cast(annual_resret#94191610 as float) END AS annual_resret#94191843, CASE WHEN ((std_resret#94191611 = NA) OR (std_resret#94191611 = null)) THEN null ELSE cast(std_resret#94191611 as float) END AS std_resret#94191845, CASE WHEN ((Sharpe_resret#94191612 = NA) OR (Sharpe_resret#94191612 = null)) THEN null ELSE cast(Sharpe_resret#94191612 as float) END AS Sharpe_resret#94191847, CASE WHEN ((PctPos_resret#94191613 = NA) OR (PctPos_resret#94191613 = null)) THEN null ELSE cast(PctPos_resret#94191613 as float) END AS PctPos_resret#94191849, CASE WHEN ((TR_resret#94191614 = NA) OR (TR_resret#94191614 = null)) THEN null ELSE cast(TR_resret#94191614 as float) END AS TR_resret#94191850, CASE WHEN ((IR_resret#94191615 = NA) OR (IR_resret#94191615 = null)) THEN null ELSE cast(IR_resret#94191615 as float) END AS IR_resret#94191852, CASE WHEN ((annual_retnet#94191616 = NA) OR (annual_retnet#94191616 = null)) THEN null ELSE cast(annual_retnet#94191616 as float) END AS annual_retnet#94191854, CASE WHEN ((std_retnet#94191617 = NA) OR (std_retnet#94191617 = null)) THEN null ELSE cast(std_retnet#94191617 as float) END AS std_retnet#94191856, CASE WHEN ((Sharpe_retnet#94191618 = NA) OR (Sharpe_retnet#94191618 = null)) THEN null ELSE cast(Sharpe_retnet#94191618 as float) END AS Sharpe_retnet#94191857, CASE WHEN ((PctPos_retnet#94191619 = NA) OR (PctPos_retnet#94191619 = null)) THEN null ELSE cast(PctPos_retnet#94191619 as float) END AS PctPos_retnet#94191859, CASE WHEN ((TR_retnet#94191620 = NA) OR (TR_retnet#94191620 = null)) THEN null ELSE cast(TR_retnet#94191620 as float) END AS TR_retnet#94191861, CASE WHEN ((IR_retnet#94191621 = NA) OR (IR_retnet#94191621 = null)) THEN null ELSE cast(IR_retnet#94191621 as float) END AS IR_retnet#94191863, CASE WHEN ((turnover#94191622 = NA) OR (turnover#94191622 = null)) THEN null ELSE cast(turnover#94191622 as float) END AS turnover#94191864] Input [25]: [year#94191598, retIC#94191599, resretIC#94191600, numcos#94191601, numdates#94191602, annual_bmret#94191603, annual_ret#94191604, std_ret#94191605, Sharpe_ret#94191606, PctPos_ret#94191607, TR_ret#94191608, IR_ret#94191609, annual_resret#94191610, std_resret#94191611, Sharpe_resret#94191612, PctPos_resret#94191613, TR_resret#94191614, IR_resret#94191615, annual_retnet#94191616, std_retnet#94191617, Sharpe_retnet#94191618, PctPos_retnet#94191619, TR_retnet#94191620, IR_retnet#94191621, turnover#94191622] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94191864, year#94191798] (8) Filter [codegen id : 1] Input [2]: [turnover#94191864, year#94191798] Condition : isnotnull(turnover#94191864) (9) Project [codegen id : 1] Output [3]: [year#94191798, turnover#94191864, (1.0 / cast(turnover#94191864 as double)) AS days_hold#94191905] Input [2]: [turnover#94191864, year#94191798] (10) Exchange Input [3]: [year#94191798, turnover#94191864, days_hold#94191905] Arguments: rangepartitioning(year#94191798 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7520234] (11) Sort [codegen id : 2] Input [3]: [year#94191798, turnover#94191864, days_hold#94191905] Arguments: [year#94191798 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94191798, turnover#94191864, days_hold#94191905] (13) CollectLimit Input [3]: [year#94191798, turnover#94191864, days_hold#94191905] Arguments: 1000000