== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94399378, turnover#94399453, days_hold#94399494] Arguments: [year#94399378, turnover#94399453, days_hold#94399494] (2) InMemoryRelation Arguments: [year#94399378, turnover#94399453, days_hold#94399494], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94399378 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94399378 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7537025] +- *(1) Project [year#94399378, turnover#94399453, (1.0 / cast(turnover#94399453 as double)) AS days_hold#94399494] +- *(1) Filter isnotnull(turnover#94399453) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94399453, year#94399378], [isnotnull(turnover#94399453)] +- InMemoryRelation [year#94399378, retIC#94399379, resretIC#94399413, numcos#94399414, numdates#94399415, annual_bmret#94399416, annual_ret#94399418, std_ret#94399420, Sharpe_ret#94399422, PctPos_ret#94399423, TR_ret#94399426, IR_ret#94399427, annual_resret#94399430, std_resret#94399432, Sharpe_resret#94399434, PctPos_resret#94399436, TR_resret#94399437, IR_resret#94399439, annual_retnet#94399442, std_retnet#94399443, Sharpe_retnet#94399446, PctPos_retnet#94399447, TR_retnet#94399450, IR_retnet#94399451, turnover#94399453], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94399178 = NA) OR (year#94399178 = null)) THEN null ELSE cast(year#94399178 as int) END AS year#94399378, CASE WHEN ((retIC#94399179 = NA) OR (retIC#94399179 = null)) THEN null ELSE cast(retIC#94399179 as float) END AS retIC#94399379, CASE WHEN ((resretIC#94399180 = NA) OR (resretIC#94399180 = null)) THEN null ELSE cast(resretIC#94399180 as float) END AS resretIC#94399413, CASE WHEN ((numcos#94399181 = NA) OR (numcos#94399181 = null)) THEN null ELSE cast(numcos#94399181 as float) END AS numcos#94399414, CASE WHEN ((numdates#94399182 = NA) OR (numdates#94399182 = null)) THEN null ELSE cast(numdates#94399182 as int) END AS numdates#94399415, CASE WHEN ((annual_bmret#94399183 = NA) OR (annual_bmret#94399183 = null)) THEN null ELSE cast(annual_bmret#94399183 as float) END AS annual_bmret#94399416, CASE WHEN ((annual_ret#94399184 = NA) OR (annual_ret#94399184 = null)) THEN null ELSE cast(annual_ret#94399184 as float) END AS annual_ret#94399418, CASE WHEN ((std_ret#94399185 = NA) OR (std_ret#94399185 = null)) THEN null ELSE cast(std_ret#94399185 as float) END AS std_ret#94399420, CASE WHEN ((Sharpe_ret#94399186 = NA) OR (Sharpe_ret#94399186 = null)) THEN null ELSE cast(Sharpe_ret#94399186 as float) END AS Sharpe_ret#94399422, CASE WHEN ((PctPos_ret#94399187 = NA) OR (PctPos_ret#94399187 = null)) THEN null ELSE cast(PctPos_ret#94399187 as float) END AS PctPos_ret#94399423, CASE WHEN ((TR_ret#94399188 = NA) OR (TR_ret#94399188 = null)) THEN null ELSE cast(TR_ret#94399188 as float) END AS TR_ret#94399426, CASE WHEN ((IR_ret#94399189 = NA) OR (IR_ret#94399189 = null)) THEN null ELSE cast(IR_ret#94399189 as float) END AS IR_ret#94399427, CASE WHEN ((annual_resret#94399190 = NA) OR (annual_resret#94399190 = null)) THEN null ELSE cast(annual_resret#94399190 as float) END AS annual_resret#94399430, CASE WHEN ((std_resret#94399191 = NA) OR (std_resret#94399191 = null)) THEN null ELSE cast(std_resret#94399191 as float) END AS std_resret#94399432, CASE WHEN ((Sharpe_resret#94399192 = NA) OR (Sharpe_resret#94399192 = null)) THEN null ELSE cast(Sharpe_resret#94399192 as float) END AS Sharpe_resret#94399434, CASE WHEN ((PctPos_resret#94399193 = NA) OR (PctPos_resret#94399193 = null)) THEN null ELSE cast(PctPos_resret#94399193 as float) END AS PctPos_resret#94399436, CASE WHEN ((TR_resret#94399194 = NA) OR (TR_resret#94399194 = null)) THEN null ELSE cast(TR_resret#94399194 as float) END AS TR_resret#94399437, CASE WHEN ((IR_resret#94399195 = NA) OR (IR_resret#94399195 = null)) THEN null ELSE cast(IR_resret#94399195 as float) END AS IR_resret#94399439, CASE WHEN ((annual_retnet#94399196 = NA) OR (annual_retnet#94399196 = null)) THEN null ELSE cast(annual_retnet#94399196 as float) END AS annual_retnet#94399442, CASE WHEN ((std_retnet#94399197 = NA) OR (std_retnet#94399197 = null)) THEN null ELSE cast(std_retnet#94399197 as float) END AS std_retnet#94399443, CASE WHEN ((Sharpe_retnet#94399198 = NA) OR (Sharpe_retnet#94399198 = null)) THEN null ELSE cast(Sharpe_retnet#94399198 as float) END AS Sharpe_retnet#94399446, CASE WHEN ((PctPos_retnet#94399199 = NA) OR (PctPos_retnet#94399199 = null)) THEN null ELSE cast(PctPos_retnet#94399199 as float) END AS PctPos_retnet#94399447, CASE WHEN ((TR_retnet#94399200 = NA) OR (TR_retnet#94399200 = null)) THEN null ELSE cast(TR_retnet#94399200 as float) END AS TR_retnet#94399450, CASE WHEN ((IR_retnet#94399201 = NA) OR (IR_retnet#94399201 = null)) THEN null ELSE cast(IR_retnet#94399201 as float) END AS IR_retnet#94399451, CASE WHEN ((turnover#94399202 = NA) OR (turnover#94399202 = null)) THEN null ELSE cast(turnover#94399202 as float) END AS turnover#94399453] +- FileScan csv [year#94399178,retIC#94399179,resretIC#94399180,numcos#94399181,numdates#94399182,annual_bmret#94399183,annual_ret#94399184,std_ret#94399185,Sharpe_ret#94399186,PctPos_ret#94399187,TR_ret#94399188,IR_ret#94399189,annual_resret#94399190,std_resret#94399191,Sharpe_resret#94399192,PctPos_resret#94399193,TR_resret#94399194,IR_resret#94399195,annual_retnet#94399196,std_retnet#94399197,Sharpe_retnet#94399198,PctPos_retnet#94399199,TR_retnet#94399200,IR_retnet#94399201,turnover#94399202] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/momentum/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94399378 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94399453, year#94399378] Arguments: [turnover#94399453, year#94399378], [isnotnull(turnover#94399453)] (4) InMemoryRelation Arguments: [year#94399378, retIC#94399379, resretIC#94399413, numcos#94399414, numdates#94399415, annual_bmret#94399416, annual_ret#94399418, std_ret#94399420, Sharpe_ret#94399422, PctPos_ret#94399423, TR_ret#94399426, IR_ret#94399427, annual_resret#94399430, std_resret#94399432, Sharpe_resret#94399434, PctPos_resret#94399436, TR_resret#94399437, IR_resret#94399439, annual_retnet#94399442, std_retnet#94399443, Sharpe_retnet#94399446, PctPos_retnet#94399447, TR_retnet#94399450, IR_retnet#94399451, turnover#94399453], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94399178 = NA) OR (year#94399178 = null)) THEN null ELSE cast(year#94399178 as int) END AS year#94399378, CASE WHEN ((retIC#94399179 = NA) OR (retIC#94399179 = null)) THEN null ELSE cast(retIC#94399179 as float) END AS retIC#94399379, CASE WHEN ((resretIC#94399180 = NA) OR (resretIC#94399180 = null)) THEN null ELSE cast(resretIC#94399180 as float) END AS resretIC#94399413, CASE WHEN ((numcos#94399181 = NA) OR (numcos#94399181 = null)) THEN null ELSE cast(numcos#94399181 as float) END AS numcos#94399414, CASE WHEN ((numdates#94399182 = NA) OR (numdates#94399182 = null)) THEN null ELSE cast(numdates#94399182 as int) END AS numdates#94399415, CASE WHEN ((annual_bmret#94399183 = NA) OR (annual_bmret#94399183 = null)) THEN null ELSE cast(annual_bmret#94399183 as float) END AS annual_bmret#94399416, CASE WHEN ((annual_ret#94399184 = NA) OR (annual_ret#94399184 = null)) THEN null ELSE cast(annual_ret#94399184 as float) END AS annual_ret#94399418, CASE WHEN ((std_ret#94399185 = NA) OR (std_ret#94399185 = null)) THEN null ELSE cast(std_ret#94399185 as float) END AS std_ret#94399420, CASE WHEN ((Sharpe_ret#94399186 = NA) OR (Sharpe_ret#94399186 = null)) THEN null ELSE cast(Sharpe_ret#94399186 as float) END AS Sharpe_ret#94399422, CASE WHEN ((PctPos_ret#94399187 = NA) OR (PctPos_ret#94399187 = null)) THEN null ELSE cast(PctPos_ret#94399187 as float) END AS PctPos_ret#94399423, CASE WHEN ((TR_ret#94399188 = NA) OR (TR_ret#94399188 = null)) THEN null ELSE cast(TR_ret#94399188 as float) END AS TR_ret#94399426, CASE WHEN ((IR_ret#94399189 = NA) OR (IR_ret#94399189 = null)) THEN null ELSE cast(IR_ret#94399189 as float) END AS IR_ret#94399427, CASE WHEN ((annual_resret#94399190 = NA) OR (annual_resret#94399190 = null)) THEN null ELSE cast(annual_resret#94399190 as float) END AS annual_resret#94399430, CASE WHEN ((std_resret#94399191 = NA) OR (std_resret#94399191 = null)) THEN null ELSE cast(std_resret#94399191 as float) END AS std_resret#94399432, CASE WHEN ((Sharpe_resret#94399192 = NA) OR (Sharpe_resret#94399192 = null)) THEN null ELSE cast(Sharpe_resret#94399192 as float) END AS Sharpe_resret#94399434, CASE WHEN ((PctPos_resret#94399193 = NA) OR (PctPos_resret#94399193 = null)) THEN null ELSE cast(PctPos_resret#94399193 as float) END AS PctPos_resret#94399436, CASE WHEN ((TR_resret#94399194 = NA) OR (TR_resret#94399194 = null)) THEN null ELSE cast(TR_resret#94399194 as float) END AS TR_resret#94399437, CASE WHEN ((IR_resret#94399195 = NA) OR (IR_resret#94399195 = null)) THEN null ELSE cast(IR_resret#94399195 as float) END AS IR_resret#94399439, CASE WHEN ((annual_retnet#94399196 = NA) OR (annual_retnet#94399196 = null)) THEN null ELSE cast(annual_retnet#94399196 as float) END AS annual_retnet#94399442, CASE WHEN ((std_retnet#94399197 = NA) OR (std_retnet#94399197 = null)) THEN null ELSE cast(std_retnet#94399197 as float) END AS std_retnet#94399443, CASE WHEN ((Sharpe_retnet#94399198 = NA) OR (Sharpe_retnet#94399198 = null)) THEN null ELSE cast(Sharpe_retnet#94399198 as float) END AS Sharpe_retnet#94399446, CASE WHEN ((PctPos_retnet#94399199 = NA) OR (PctPos_retnet#94399199 = null)) THEN null ELSE cast(PctPos_retnet#94399199 as float) END AS PctPos_retnet#94399447, CASE WHEN ((TR_retnet#94399200 = NA) OR (TR_retnet#94399200 = null)) THEN null ELSE cast(TR_retnet#94399200 as float) END AS TR_retnet#94399450, CASE WHEN ((IR_retnet#94399201 = NA) OR (IR_retnet#94399201 = null)) THEN null ELSE cast(IR_retnet#94399201 as float) END AS IR_retnet#94399451, CASE WHEN ((turnover#94399202 = NA) OR (turnover#94399202 = null)) THEN null ELSE cast(turnover#94399202 as float) END AS turnover#94399453] +- FileScan csv [year#94399178,retIC#94399179,resretIC#94399180,numcos#94399181,numdates#94399182,annual_bmret#94399183,annual_ret#94399184,std_ret#94399185,Sharpe_ret#94399186,PctPos_ret#94399187,TR_ret#94399188,IR_ret#94399189,annual_resret#94399190,std_resret#94399191,Sharpe_resret#94399192,PctPos_resret#94399193,TR_resret#94399194,IR_resret#94399195,annual_retnet#94399196,std_retnet#94399197,Sharpe_retnet#94399198,PctPos_retnet#94399199,TR_retnet#94399200,IR_retnet#94399201,turnover#94399202] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/momentum/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94399178, retIC#94399179, resretIC#94399180, numcos#94399181, numdates#94399182, annual_bmret#94399183, annual_ret#94399184, std_ret#94399185, Sharpe_ret#94399186, PctPos_ret#94399187, TR_ret#94399188, IR_ret#94399189, annual_resret#94399190, std_resret#94399191, Sharpe_resret#94399192, PctPos_resret#94399193, TR_resret#94399194, IR_resret#94399195, annual_retnet#94399196, std_retnet#94399197, Sharpe_retnet#94399198, PctPos_retnet#94399199, TR_retnet#94399200, IR_retnet#94399201, turnover#94399202] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/momentum/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94399178 = NA) OR (year#94399178 = null)) THEN null ELSE cast(year#94399178 as int) END AS year#94399378, CASE WHEN ((retIC#94399179 = NA) OR (retIC#94399179 = null)) THEN null ELSE cast(retIC#94399179 as float) END AS retIC#94399379, CASE WHEN ((resretIC#94399180 = NA) OR (resretIC#94399180 = null)) THEN null ELSE cast(resretIC#94399180 as float) END AS resretIC#94399413, CASE WHEN ((numcos#94399181 = NA) OR (numcos#94399181 = null)) THEN null ELSE cast(numcos#94399181 as float) END AS numcos#94399414, CASE WHEN ((numdates#94399182 = NA) OR (numdates#94399182 = null)) THEN null ELSE cast(numdates#94399182 as int) END AS numdates#94399415, CASE WHEN ((annual_bmret#94399183 = NA) OR (annual_bmret#94399183 = null)) THEN null ELSE cast(annual_bmret#94399183 as float) END AS annual_bmret#94399416, CASE WHEN ((annual_ret#94399184 = NA) OR (annual_ret#94399184 = null)) THEN null ELSE cast(annual_ret#94399184 as float) END AS annual_ret#94399418, CASE WHEN ((std_ret#94399185 = NA) OR (std_ret#94399185 = null)) THEN null ELSE cast(std_ret#94399185 as float) END AS std_ret#94399420, CASE WHEN ((Sharpe_ret#94399186 = NA) OR (Sharpe_ret#94399186 = null)) THEN null ELSE cast(Sharpe_ret#94399186 as float) END AS Sharpe_ret#94399422, CASE WHEN ((PctPos_ret#94399187 = NA) OR (PctPos_ret#94399187 = null)) THEN null ELSE cast(PctPos_ret#94399187 as float) END AS PctPos_ret#94399423, CASE WHEN ((TR_ret#94399188 = NA) OR (TR_ret#94399188 = null)) THEN null ELSE cast(TR_ret#94399188 as float) END AS TR_ret#94399426, CASE WHEN ((IR_ret#94399189 = NA) OR (IR_ret#94399189 = null)) THEN null ELSE cast(IR_ret#94399189 as float) END AS IR_ret#94399427, CASE WHEN ((annual_resret#94399190 = NA) OR (annual_resret#94399190 = null)) THEN null ELSE cast(annual_resret#94399190 as float) END AS annual_resret#94399430, CASE WHEN ((std_resret#94399191 = NA) OR (std_resret#94399191 = null)) THEN null ELSE cast(std_resret#94399191 as float) END AS std_resret#94399432, CASE WHEN ((Sharpe_resret#94399192 = NA) OR (Sharpe_resret#94399192 = null)) THEN null ELSE cast(Sharpe_resret#94399192 as float) END AS Sharpe_resret#94399434, CASE WHEN ((PctPos_resret#94399193 = NA) OR (PctPos_resret#94399193 = null)) THEN null ELSE cast(PctPos_resret#94399193 as float) END AS PctPos_resret#94399436, CASE WHEN ((TR_resret#94399194 = NA) OR (TR_resret#94399194 = null)) THEN null ELSE cast(TR_resret#94399194 as float) END AS TR_resret#94399437, CASE WHEN ((IR_resret#94399195 = NA) OR (IR_resret#94399195 = null)) THEN null ELSE cast(IR_resret#94399195 as float) END AS IR_resret#94399439, CASE WHEN ((annual_retnet#94399196 = NA) OR (annual_retnet#94399196 = null)) THEN null ELSE cast(annual_retnet#94399196 as float) END AS annual_retnet#94399442, CASE WHEN ((std_retnet#94399197 = NA) OR (std_retnet#94399197 = null)) THEN null ELSE cast(std_retnet#94399197 as float) END AS std_retnet#94399443, CASE WHEN ((Sharpe_retnet#94399198 = NA) OR (Sharpe_retnet#94399198 = null)) THEN null ELSE cast(Sharpe_retnet#94399198 as float) END AS Sharpe_retnet#94399446, CASE WHEN ((PctPos_retnet#94399199 = NA) OR (PctPos_retnet#94399199 = null)) THEN null ELSE cast(PctPos_retnet#94399199 as float) END AS PctPos_retnet#94399447, CASE WHEN ((TR_retnet#94399200 = NA) OR (TR_retnet#94399200 = null)) THEN null ELSE cast(TR_retnet#94399200 as float) END AS TR_retnet#94399450, CASE WHEN ((IR_retnet#94399201 = NA) OR (IR_retnet#94399201 = null)) THEN null ELSE cast(IR_retnet#94399201 as float) END AS IR_retnet#94399451, CASE WHEN ((turnover#94399202 = NA) OR (turnover#94399202 = null)) THEN null ELSE cast(turnover#94399202 as float) END AS turnover#94399453] Input [25]: [year#94399178, retIC#94399179, resretIC#94399180, numcos#94399181, numdates#94399182, annual_bmret#94399183, annual_ret#94399184, std_ret#94399185, Sharpe_ret#94399186, PctPos_ret#94399187, TR_ret#94399188, IR_ret#94399189, annual_resret#94399190, std_resret#94399191, Sharpe_resret#94399192, PctPos_resret#94399193, TR_resret#94399194, IR_resret#94399195, annual_retnet#94399196, std_retnet#94399197, Sharpe_retnet#94399198, PctPos_retnet#94399199, TR_retnet#94399200, IR_retnet#94399201, turnover#94399202] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94399453, year#94399378] (8) Filter [codegen id : 1] Input [2]: [turnover#94399453, year#94399378] Condition : isnotnull(turnover#94399453) (9) Project [codegen id : 1] Output [3]: [year#94399378, turnover#94399453, (1.0 / cast(turnover#94399453 as double)) AS days_hold#94399494] Input [2]: [turnover#94399453, year#94399378] (10) Exchange Input [3]: [year#94399378, turnover#94399453, days_hold#94399494] Arguments: rangepartitioning(year#94399378 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7537025] (11) Sort [codegen id : 2] Input [3]: [year#94399378, turnover#94399453, days_hold#94399494] Arguments: [year#94399378 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94399378, turnover#94399453, days_hold#94399494] (13) CollectLimit Input [3]: [year#94399378, turnover#94399453, days_hold#94399494] Arguments: 10000