== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94199303, turnover#94199406, days_hold#94199452] Arguments: [year#94199303, turnover#94199406, days_hold#94199452] (2) InMemoryRelation Arguments: [year#94199303, turnover#94199406, days_hold#94199452], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94199303 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94199303 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7520875] +- *(1) Project [year#94199303, turnover#94199406, (1.0 / cast(turnover#94199406 as double)) AS days_hold#94199452] +- *(1) Filter isnotnull(turnover#94199406) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94199406, year#94199303], [isnotnull(turnover#94199406)] +- InMemoryRelation [year#94199303, retIC#94199304, resretIC#94199338, numcos#94199339, numdates#94199340, annual_bmret#94199341, annual_ret#94199375, std_ret#94199376, Sharpe_ret#94199377, PctPos_ret#94199378, TR_ret#94199379, IR_ret#94199380, annual_resret#94199382, std_resret#94199384, Sharpe_resret#94199386, PctPos_resret#94199389, TR_resret#94199391, IR_resret#94199393, annual_retnet#94199394, std_retnet#94199396, Sharpe_retnet#94199398, PctPos_retnet#94199400, TR_retnet#94199402, IR_retnet#94199404, turnover#94199406], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94199136 = NA) OR (year#94199136 = null)) THEN null ELSE cast(year#94199136 as int) END AS year#94199303, CASE WHEN ((retIC#94199137 = NA) OR (retIC#94199137 = null)) THEN null ELSE cast(retIC#94199137 as float) END AS retIC#94199304, CASE WHEN ((resretIC#94199138 = NA) OR (resretIC#94199138 = null)) THEN null ELSE cast(resretIC#94199138 as float) END AS resretIC#94199338, CASE WHEN ((numcos#94199139 = NA) OR (numcos#94199139 = null)) THEN null ELSE cast(numcos#94199139 as float) END AS numcos#94199339, CASE WHEN ((numdates#94199140 = NA) OR (numdates#94199140 = null)) THEN null ELSE cast(numdates#94199140 as int) END AS numdates#94199340, CASE WHEN ((annual_bmret#94199141 = NA) OR (annual_bmret#94199141 = null)) THEN null ELSE cast(annual_bmret#94199141 as float) END AS annual_bmret#94199341, CASE WHEN ((annual_ret#94199142 = NA) OR (annual_ret#94199142 = null)) THEN null ELSE cast(annual_ret#94199142 as float) END AS annual_ret#94199375, CASE WHEN ((std_ret#94199143 = NA) OR (std_ret#94199143 = null)) THEN null ELSE cast(std_ret#94199143 as float) END AS std_ret#94199376, CASE WHEN ((Sharpe_ret#94199144 = NA) OR (Sharpe_ret#94199144 = null)) THEN null ELSE cast(Sharpe_ret#94199144 as float) END AS Sharpe_ret#94199377, CASE WHEN ((PctPos_ret#94199145 = NA) OR (PctPos_ret#94199145 = null)) THEN null ELSE cast(PctPos_ret#94199145 as float) END AS PctPos_ret#94199378, CASE WHEN ((TR_ret#94199146 = NA) OR (TR_ret#94199146 = null)) THEN null ELSE cast(TR_ret#94199146 as float) END AS TR_ret#94199379, CASE WHEN ((IR_ret#94199147 = NA) OR (IR_ret#94199147 = null)) THEN null ELSE cast(IR_ret#94199147 as float) END AS IR_ret#94199380, CASE WHEN ((annual_resret#94199148 = NA) OR (annual_resret#94199148 = null)) THEN null ELSE cast(annual_resret#94199148 as float) END AS annual_resret#94199382, CASE WHEN ((std_resret#94199149 = NA) OR (std_resret#94199149 = null)) THEN null ELSE cast(std_resret#94199149 as float) END AS std_resret#94199384, CASE WHEN ((Sharpe_resret#94199150 = NA) OR (Sharpe_resret#94199150 = null)) THEN null ELSE cast(Sharpe_resret#94199150 as float) END AS Sharpe_resret#94199386, CASE WHEN ((PctPos_resret#94199151 = NA) OR (PctPos_resret#94199151 = null)) THEN null ELSE cast(PctPos_resret#94199151 as float) END AS PctPos_resret#94199389, CASE WHEN ((TR_resret#94199152 = NA) OR (TR_resret#94199152 = null)) THEN null ELSE cast(TR_resret#94199152 as float) END AS TR_resret#94199391, CASE WHEN ((IR_resret#94199153 = NA) OR (IR_resret#94199153 = null)) THEN null ELSE cast(IR_resret#94199153 as float) END AS IR_resret#94199393, CASE WHEN ((annual_retnet#94199154 = NA) OR (annual_retnet#94199154 = null)) THEN null ELSE cast(annual_retnet#94199154 as float) END AS annual_retnet#94199394, CASE WHEN ((std_retnet#94199155 = NA) OR (std_retnet#94199155 = null)) THEN null ELSE cast(std_retnet#94199155 as float) END AS std_retnet#94199396, CASE WHEN ((Sharpe_retnet#94199156 = NA) OR (Sharpe_retnet#94199156 = null)) THEN null ELSE cast(Sharpe_retnet#94199156 as float) END AS Sharpe_retnet#94199398, CASE WHEN ((PctPos_retnet#94199157 = NA) OR (PctPos_retnet#94199157 = null)) THEN null ELSE cast(PctPos_retnet#94199157 as float) END AS PctPos_retnet#94199400, CASE WHEN ((TR_retnet#94199158 = NA) OR (TR_retnet#94199158 = null)) THEN null ELSE cast(TR_retnet#94199158 as float) END AS TR_retnet#94199402, CASE WHEN ((IR_retnet#94199159 = NA) OR (IR_retnet#94199159 = null)) THEN null ELSE cast(IR_retnet#94199159 as float) END AS IR_retnet#94199404, CASE WHEN ((turnover#94199160 = NA) OR (turnover#94199160 = null)) THEN null ELSE cast(turnover#94199160 as float) END AS turnover#94199406] +- FileScan csv [year#94199136,retIC#94199137,resretIC#94199138,numcos#94199139,numdates#94199140,annual_bmret#94199141,annual_ret#94199142,std_ret#94199143,Sharpe_ret#94199144,PctPos_ret#94199145,TR_ret#94199146,IR_ret#94199147,annual_resret#94199148,std_resret#94199149,Sharpe_resret#94199150,PctPos_resret#94199151,TR_resret#94199152,IR_resret#94199153,annual_retnet#94199154,std_retnet#94199155,Sharpe_retnet#94199156,PctPos_retnet#94199157,TR_retnet#94199158,IR_retnet#94199159,turnover#94199160] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/yield/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94199303 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94199406, year#94199303] Arguments: [turnover#94199406, year#94199303], [isnotnull(turnover#94199406)] (4) InMemoryRelation Arguments: [year#94199303, retIC#94199304, resretIC#94199338, numcos#94199339, numdates#94199340, annual_bmret#94199341, annual_ret#94199375, std_ret#94199376, Sharpe_ret#94199377, PctPos_ret#94199378, TR_ret#94199379, IR_ret#94199380, annual_resret#94199382, std_resret#94199384, Sharpe_resret#94199386, PctPos_resret#94199389, TR_resret#94199391, IR_resret#94199393, annual_retnet#94199394, std_retnet#94199396, Sharpe_retnet#94199398, PctPos_retnet#94199400, TR_retnet#94199402, IR_retnet#94199404, turnover#94199406], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94199136 = NA) OR (year#94199136 = null)) THEN null ELSE cast(year#94199136 as int) END AS year#94199303, CASE WHEN ((retIC#94199137 = NA) OR (retIC#94199137 = null)) THEN null ELSE cast(retIC#94199137 as float) END AS retIC#94199304, CASE WHEN ((resretIC#94199138 = NA) OR (resretIC#94199138 = null)) THEN null ELSE cast(resretIC#94199138 as float) END AS resretIC#94199338, CASE WHEN ((numcos#94199139 = NA) OR (numcos#94199139 = null)) THEN null ELSE cast(numcos#94199139 as float) END AS numcos#94199339, CASE WHEN ((numdates#94199140 = NA) OR (numdates#94199140 = null)) THEN null ELSE cast(numdates#94199140 as int) END AS numdates#94199340, CASE WHEN ((annual_bmret#94199141 = NA) OR (annual_bmret#94199141 = null)) THEN null ELSE cast(annual_bmret#94199141 as float) END AS annual_bmret#94199341, CASE WHEN ((annual_ret#94199142 = NA) OR (annual_ret#94199142 = null)) THEN null ELSE cast(annual_ret#94199142 as float) END AS annual_ret#94199375, CASE WHEN ((std_ret#94199143 = NA) OR (std_ret#94199143 = null)) THEN null ELSE cast(std_ret#94199143 as float) END AS std_ret#94199376, CASE WHEN ((Sharpe_ret#94199144 = NA) OR (Sharpe_ret#94199144 = null)) THEN null ELSE cast(Sharpe_ret#94199144 as float) END AS Sharpe_ret#94199377, CASE WHEN ((PctPos_ret#94199145 = NA) OR (PctPos_ret#94199145 = null)) THEN null ELSE cast(PctPos_ret#94199145 as float) END AS PctPos_ret#94199378, CASE WHEN ((TR_ret#94199146 = NA) OR (TR_ret#94199146 = null)) THEN null ELSE cast(TR_ret#94199146 as float) END AS TR_ret#94199379, CASE WHEN ((IR_ret#94199147 = NA) OR (IR_ret#94199147 = null)) THEN null ELSE cast(IR_ret#94199147 as float) END AS IR_ret#94199380, CASE WHEN ((annual_resret#94199148 = NA) OR (annual_resret#94199148 = null)) THEN null ELSE cast(annual_resret#94199148 as float) END AS annual_resret#94199382, CASE WHEN ((std_resret#94199149 = NA) OR (std_resret#94199149 = null)) THEN null ELSE cast(std_resret#94199149 as float) END AS std_resret#94199384, CASE WHEN ((Sharpe_resret#94199150 = NA) OR (Sharpe_resret#94199150 = null)) THEN null ELSE cast(Sharpe_resret#94199150 as float) END AS Sharpe_resret#94199386, CASE WHEN ((PctPos_resret#94199151 = NA) OR (PctPos_resret#94199151 = null)) THEN null ELSE cast(PctPos_resret#94199151 as float) END AS PctPos_resret#94199389, CASE WHEN ((TR_resret#94199152 = NA) OR (TR_resret#94199152 = null)) THEN null ELSE cast(TR_resret#94199152 as float) END AS TR_resret#94199391, CASE WHEN ((IR_resret#94199153 = NA) OR (IR_resret#94199153 = null)) THEN null ELSE cast(IR_resret#94199153 as float) END AS IR_resret#94199393, CASE WHEN ((annual_retnet#94199154 = NA) OR (annual_retnet#94199154 = null)) THEN null ELSE cast(annual_retnet#94199154 as float) END AS annual_retnet#94199394, CASE WHEN ((std_retnet#94199155 = NA) OR (std_retnet#94199155 = null)) THEN null ELSE cast(std_retnet#94199155 as float) END AS std_retnet#94199396, CASE WHEN ((Sharpe_retnet#94199156 = NA) OR (Sharpe_retnet#94199156 = null)) THEN null ELSE cast(Sharpe_retnet#94199156 as float) END AS Sharpe_retnet#94199398, CASE WHEN ((PctPos_retnet#94199157 = NA) OR (PctPos_retnet#94199157 = null)) THEN null ELSE cast(PctPos_retnet#94199157 as float) END AS PctPos_retnet#94199400, CASE WHEN ((TR_retnet#94199158 = NA) OR (TR_retnet#94199158 = null)) THEN null ELSE cast(TR_retnet#94199158 as float) END AS TR_retnet#94199402, CASE WHEN ((IR_retnet#94199159 = NA) OR (IR_retnet#94199159 = null)) THEN null ELSE cast(IR_retnet#94199159 as float) END AS IR_retnet#94199404, CASE WHEN ((turnover#94199160 = NA) OR (turnover#94199160 = null)) THEN null ELSE cast(turnover#94199160 as float) END AS turnover#94199406] +- FileScan csv [year#94199136,retIC#94199137,resretIC#94199138,numcos#94199139,numdates#94199140,annual_bmret#94199141,annual_ret#94199142,std_ret#94199143,Sharpe_ret#94199144,PctPos_ret#94199145,TR_ret#94199146,IR_ret#94199147,annual_resret#94199148,std_resret#94199149,Sharpe_resret#94199150,PctPos_resret#94199151,TR_resret#94199152,IR_resret#94199153,annual_retnet#94199154,std_retnet#94199155,Sharpe_retnet#94199156,PctPos_retnet#94199157,TR_retnet#94199158,IR_retnet#94199159,turnover#94199160] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/yield/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94199136, retIC#94199137, resretIC#94199138, numcos#94199139, numdates#94199140, annual_bmret#94199141, annual_ret#94199142, std_ret#94199143, Sharpe_ret#94199144, PctPos_ret#94199145, TR_ret#94199146, IR_ret#94199147, annual_resret#94199148, std_resret#94199149, Sharpe_resret#94199150, PctPos_resret#94199151, TR_resret#94199152, IR_resret#94199153, annual_retnet#94199154, std_retnet#94199155, Sharpe_retnet#94199156, PctPos_retnet#94199157, TR_retnet#94199158, IR_retnet#94199159, turnover#94199160] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/yield/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94199136 = NA) OR (year#94199136 = null)) THEN null ELSE cast(year#94199136 as int) END AS year#94199303, CASE WHEN ((retIC#94199137 = NA) OR (retIC#94199137 = null)) THEN null ELSE cast(retIC#94199137 as float) END AS retIC#94199304, CASE WHEN ((resretIC#94199138 = NA) OR (resretIC#94199138 = null)) THEN null ELSE cast(resretIC#94199138 as float) END AS resretIC#94199338, CASE WHEN ((numcos#94199139 = NA) OR (numcos#94199139 = null)) THEN null ELSE cast(numcos#94199139 as float) END AS numcos#94199339, CASE WHEN ((numdates#94199140 = NA) OR (numdates#94199140 = null)) THEN null ELSE cast(numdates#94199140 as int) END AS numdates#94199340, CASE WHEN ((annual_bmret#94199141 = NA) OR (annual_bmret#94199141 = null)) THEN null ELSE cast(annual_bmret#94199141 as float) END AS annual_bmret#94199341, CASE WHEN ((annual_ret#94199142 = NA) OR (annual_ret#94199142 = null)) THEN null ELSE cast(annual_ret#94199142 as float) END AS annual_ret#94199375, CASE WHEN ((std_ret#94199143 = NA) OR (std_ret#94199143 = null)) THEN null ELSE cast(std_ret#94199143 as float) END AS std_ret#94199376, CASE WHEN ((Sharpe_ret#94199144 = NA) OR (Sharpe_ret#94199144 = null)) THEN null ELSE cast(Sharpe_ret#94199144 as float) END AS Sharpe_ret#94199377, CASE WHEN ((PctPos_ret#94199145 = NA) OR (PctPos_ret#94199145 = null)) THEN null ELSE cast(PctPos_ret#94199145 as float) END AS PctPos_ret#94199378, CASE WHEN ((TR_ret#94199146 = NA) OR (TR_ret#94199146 = null)) THEN null ELSE cast(TR_ret#94199146 as float) END AS TR_ret#94199379, CASE WHEN ((IR_ret#94199147 = NA) OR (IR_ret#94199147 = null)) THEN null ELSE cast(IR_ret#94199147 as float) END AS IR_ret#94199380, CASE WHEN ((annual_resret#94199148 = NA) OR (annual_resret#94199148 = null)) THEN null ELSE cast(annual_resret#94199148 as float) END AS annual_resret#94199382, CASE WHEN ((std_resret#94199149 = NA) OR (std_resret#94199149 = null)) THEN null ELSE cast(std_resret#94199149 as float) END AS std_resret#94199384, CASE WHEN ((Sharpe_resret#94199150 = NA) OR (Sharpe_resret#94199150 = null)) THEN null ELSE cast(Sharpe_resret#94199150 as float) END AS Sharpe_resret#94199386, CASE WHEN ((PctPos_resret#94199151 = NA) OR (PctPos_resret#94199151 = null)) THEN null ELSE cast(PctPos_resret#94199151 as float) END AS PctPos_resret#94199389, CASE WHEN ((TR_resret#94199152 = NA) OR (TR_resret#94199152 = null)) THEN null ELSE cast(TR_resret#94199152 as float) END AS TR_resret#94199391, CASE WHEN ((IR_resret#94199153 = NA) OR (IR_resret#94199153 = null)) THEN null ELSE cast(IR_resret#94199153 as float) END AS IR_resret#94199393, CASE WHEN ((annual_retnet#94199154 = NA) OR (annual_retnet#94199154 = null)) THEN null ELSE cast(annual_retnet#94199154 as float) END AS annual_retnet#94199394, CASE WHEN ((std_retnet#94199155 = NA) OR (std_retnet#94199155 = null)) THEN null ELSE cast(std_retnet#94199155 as float) END AS std_retnet#94199396, CASE WHEN ((Sharpe_retnet#94199156 = NA) OR (Sharpe_retnet#94199156 = null)) THEN null ELSE cast(Sharpe_retnet#94199156 as float) END AS Sharpe_retnet#94199398, CASE WHEN ((PctPos_retnet#94199157 = NA) OR (PctPos_retnet#94199157 = null)) THEN null ELSE cast(PctPos_retnet#94199157 as float) END AS PctPos_retnet#94199400, CASE WHEN ((TR_retnet#94199158 = NA) OR (TR_retnet#94199158 = null)) THEN null ELSE cast(TR_retnet#94199158 as float) END AS TR_retnet#94199402, CASE WHEN ((IR_retnet#94199159 = NA) OR (IR_retnet#94199159 = null)) THEN null ELSE cast(IR_retnet#94199159 as float) END AS IR_retnet#94199404, CASE WHEN ((turnover#94199160 = NA) OR (turnover#94199160 = null)) THEN null ELSE cast(turnover#94199160 as float) END AS turnover#94199406] Input [25]: [year#94199136, retIC#94199137, resretIC#94199138, numcos#94199139, numdates#94199140, annual_bmret#94199141, annual_ret#94199142, std_ret#94199143, Sharpe_ret#94199144, PctPos_ret#94199145, TR_ret#94199146, IR_ret#94199147, annual_resret#94199148, std_resret#94199149, Sharpe_resret#94199150, PctPos_resret#94199151, TR_resret#94199152, IR_resret#94199153, annual_retnet#94199154, std_retnet#94199155, Sharpe_retnet#94199156, PctPos_retnet#94199157, TR_retnet#94199158, IR_retnet#94199159, turnover#94199160] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94199406, year#94199303] (8) Filter [codegen id : 1] Input [2]: [turnover#94199406, year#94199303] Condition : isnotnull(turnover#94199406) (9) Project [codegen id : 1] Output [3]: [year#94199303, turnover#94199406, (1.0 / cast(turnover#94199406 as double)) AS days_hold#94199452] Input [2]: [turnover#94199406, year#94199303] (10) Exchange Input [3]: [year#94199303, turnover#94199406, days_hold#94199452] Arguments: rangepartitioning(year#94199303 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7520875] (11) Sort [codegen id : 2] Input [3]: [year#94199303, turnover#94199406, days_hold#94199452] Arguments: [year#94199303 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94199303, turnover#94199406, days_hold#94199452] (13) CollectLimit Input [3]: [year#94199303, turnover#94199406, days_hold#94199452] Arguments: 1000000