== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94071392, turnover#94071440, days_hold#94071475] Arguments: [year#94071392, turnover#94071440, days_hold#94071475] (2) InMemoryRelation Arguments: [year#94071392, turnover#94071440, days_hold#94071475], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94071392 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94071392 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7510806] +- *(1) Project [year#94071392, turnover#94071440, (1.0 / cast(turnover#94071440 as double)) AS days_hold#94071475] +- *(1) Filter isnotnull(turnover#94071440) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94071440, year#94071392], [isnotnull(turnover#94071440)] +- InMemoryRelation [year#94071392, retIC#94071394, resretIC#94071396, numcos#94071398, numdates#94071400, annual_bmret#94071402, annual_ret#94071404, std_ret#94071406, Sharpe_ret#94071408, PctPos_ret#94071410, TR_ret#94071412, IR_ret#94071414, annual_resret#94071416, std_resret#94071418, Sharpe_resret#94071420, PctPos_resret#94071422, TR_resret#94071424, IR_resret#94071426, annual_retnet#94071428, std_retnet#94071430, Sharpe_retnet#94071432, PctPos_retnet#94071434, TR_retnet#94071436, IR_retnet#94071438, turnover#94071440], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94071193 = NA) OR (year#94071193 = null)) THEN null ELSE cast(year#94071193 as int) END AS year#94071392, CASE WHEN ((retIC#94071194 = NA) OR (retIC#94071194 = null)) THEN null ELSE cast(retIC#94071194 as float) END AS retIC#94071394, CASE WHEN ((resretIC#94071195 = NA) OR (resretIC#94071195 = null)) THEN null ELSE cast(resretIC#94071195 as float) END AS resretIC#94071396, CASE WHEN ((numcos#94071196 = NA) OR (numcos#94071196 = null)) THEN null ELSE cast(numcos#94071196 as float) END AS numcos#94071398, CASE WHEN ((numdates#94071197 = NA) OR (numdates#94071197 = null)) THEN null ELSE cast(numdates#94071197 as int) END AS numdates#94071400, CASE WHEN ((annual_bmret#94071198 = NA) OR (annual_bmret#94071198 = null)) THEN null ELSE cast(annual_bmret#94071198 as float) END AS annual_bmret#94071402, CASE WHEN ((annual_ret#94071199 = NA) OR (annual_ret#94071199 = null)) THEN null ELSE cast(annual_ret#94071199 as float) END AS annual_ret#94071404, CASE WHEN ((std_ret#94071200 = NA) OR (std_ret#94071200 = null)) THEN null ELSE cast(std_ret#94071200 as float) END AS std_ret#94071406, CASE WHEN ((Sharpe_ret#94071201 = NA) OR (Sharpe_ret#94071201 = null)) THEN null ELSE cast(Sharpe_ret#94071201 as float) END AS Sharpe_ret#94071408, CASE WHEN ((PctPos_ret#94071202 = NA) OR (PctPos_ret#94071202 = null)) THEN null ELSE cast(PctPos_ret#94071202 as float) END AS PctPos_ret#94071410, CASE WHEN ((TR_ret#94071203 = NA) OR (TR_ret#94071203 = null)) THEN null ELSE cast(TR_ret#94071203 as float) END AS TR_ret#94071412, CASE WHEN ((IR_ret#94071204 = NA) OR (IR_ret#94071204 = null)) THEN null ELSE cast(IR_ret#94071204 as float) END AS IR_ret#94071414, CASE WHEN ((annual_resret#94071205 = NA) OR (annual_resret#94071205 = null)) THEN null ELSE cast(annual_resret#94071205 as float) END AS annual_resret#94071416, CASE WHEN ((std_resret#94071206 = NA) OR (std_resret#94071206 = null)) THEN null ELSE cast(std_resret#94071206 as float) END AS std_resret#94071418, CASE WHEN ((Sharpe_resret#94071207 = NA) OR (Sharpe_resret#94071207 = null)) THEN null ELSE cast(Sharpe_resret#94071207 as float) END AS Sharpe_resret#94071420, CASE WHEN ((PctPos_resret#94071208 = NA) OR (PctPos_resret#94071208 = null)) THEN null ELSE cast(PctPos_resret#94071208 as float) END AS PctPos_resret#94071422, CASE WHEN ((TR_resret#94071209 = NA) OR (TR_resret#94071209 = null)) THEN null ELSE cast(TR_resret#94071209 as float) END AS TR_resret#94071424, CASE WHEN ((IR_resret#94071210 = NA) OR (IR_resret#94071210 = null)) THEN null ELSE cast(IR_resret#94071210 as float) END AS IR_resret#94071426, CASE WHEN ((annual_retnet#94071211 = NA) OR (annual_retnet#94071211 = null)) THEN null ELSE cast(annual_retnet#94071211 as float) END AS annual_retnet#94071428, CASE WHEN ((std_retnet#94071212 = NA) OR (std_retnet#94071212 = null)) THEN null ELSE cast(std_retnet#94071212 as float) END AS std_retnet#94071430, CASE WHEN ((Sharpe_retnet#94071213 = NA) OR (Sharpe_retnet#94071213 = null)) THEN null ELSE cast(Sharpe_retnet#94071213 as float) END AS Sharpe_retnet#94071432, CASE WHEN ((PctPos_retnet#94071214 = NA) OR (PctPos_retnet#94071214 = null)) THEN null ELSE cast(PctPos_retnet#94071214 as float) END AS PctPos_retnet#94071434, CASE WHEN ((TR_retnet#94071215 = NA) OR (TR_retnet#94071215 = null)) THEN null ELSE cast(TR_retnet#94071215 as float) END AS TR_retnet#94071436, CASE WHEN ((IR_retnet#94071216 = NA) OR (IR_retnet#94071216 = null)) THEN null ELSE cast(IR_retnet#94071216 as float) END AS IR_retnet#94071438, CASE WHEN ((turnover#94071217 = NA) OR (turnover#94071217 = null)) THEN null ELSE cast(turnover#94071217 as float) END AS turnover#94071440] +- FileScan csv [year#94071193,retIC#94071194,resretIC#94071195,numcos#94071196,numdates#94071197,annual_bmret#94071198,annual_ret#94071199,std_ret#94071200,Sharpe_ret#94071201,PctPos_ret#94071202,TR_ret#94071203,IR_ret#94071204,annual_resret#94071205,std_resret#94071206,Sharpe_resret#94071207,PctPos_resret#94071208,TR_resret#94071209,IR_resret#94071210,annual_retnet#94071211,std_retnet#94071212,Sharpe_retnet#94071213,PctPos_retnet#94071214,TR_retnet#94071215,IR_retnet#94071216,turnover#94071217] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/value/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94071392 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94071440, year#94071392] Arguments: [turnover#94071440, year#94071392], [isnotnull(turnover#94071440)] (4) InMemoryRelation Arguments: [year#94071392, retIC#94071394, resretIC#94071396, numcos#94071398, numdates#94071400, annual_bmret#94071402, annual_ret#94071404, std_ret#94071406, Sharpe_ret#94071408, PctPos_ret#94071410, TR_ret#94071412, IR_ret#94071414, annual_resret#94071416, std_resret#94071418, Sharpe_resret#94071420, PctPos_resret#94071422, TR_resret#94071424, IR_resret#94071426, annual_retnet#94071428, std_retnet#94071430, Sharpe_retnet#94071432, PctPos_retnet#94071434, TR_retnet#94071436, IR_retnet#94071438, turnover#94071440], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94071193 = NA) OR (year#94071193 = null)) THEN null ELSE cast(year#94071193 as int) END AS year#94071392, CASE WHEN ((retIC#94071194 = NA) OR (retIC#94071194 = null)) THEN null ELSE cast(retIC#94071194 as float) END AS retIC#94071394, CASE WHEN ((resretIC#94071195 = NA) OR (resretIC#94071195 = null)) THEN null ELSE cast(resretIC#94071195 as float) END AS resretIC#94071396, CASE WHEN ((numcos#94071196 = NA) OR (numcos#94071196 = null)) THEN null ELSE cast(numcos#94071196 as float) END AS numcos#94071398, CASE WHEN ((numdates#94071197 = NA) OR (numdates#94071197 = null)) THEN null ELSE cast(numdates#94071197 as int) END AS numdates#94071400, CASE WHEN ((annual_bmret#94071198 = NA) OR (annual_bmret#94071198 = null)) THEN null ELSE cast(annual_bmret#94071198 as float) END AS annual_bmret#94071402, CASE WHEN ((annual_ret#94071199 = NA) OR (annual_ret#94071199 = null)) THEN null ELSE cast(annual_ret#94071199 as float) END AS annual_ret#94071404, CASE WHEN ((std_ret#94071200 = NA) OR (std_ret#94071200 = null)) THEN null ELSE cast(std_ret#94071200 as float) END AS std_ret#94071406, CASE WHEN ((Sharpe_ret#94071201 = NA) OR (Sharpe_ret#94071201 = null)) THEN null ELSE cast(Sharpe_ret#94071201 as float) END AS Sharpe_ret#94071408, CASE WHEN ((PctPos_ret#94071202 = NA) OR (PctPos_ret#94071202 = null)) THEN null ELSE cast(PctPos_ret#94071202 as float) END AS PctPos_ret#94071410, CASE WHEN ((TR_ret#94071203 = NA) OR (TR_ret#94071203 = null)) THEN null ELSE cast(TR_ret#94071203 as float) END AS TR_ret#94071412, CASE WHEN ((IR_ret#94071204 = NA) OR (IR_ret#94071204 = null)) THEN null ELSE cast(IR_ret#94071204 as float) END AS IR_ret#94071414, CASE WHEN ((annual_resret#94071205 = NA) OR (annual_resret#94071205 = null)) THEN null ELSE cast(annual_resret#94071205 as float) END AS annual_resret#94071416, CASE WHEN ((std_resret#94071206 = NA) OR (std_resret#94071206 = null)) THEN null ELSE cast(std_resret#94071206 as float) END AS std_resret#94071418, CASE WHEN ((Sharpe_resret#94071207 = NA) OR (Sharpe_resret#94071207 = null)) THEN null ELSE cast(Sharpe_resret#94071207 as float) END AS Sharpe_resret#94071420, CASE WHEN ((PctPos_resret#94071208 = NA) OR (PctPos_resret#94071208 = null)) THEN null ELSE cast(PctPos_resret#94071208 as float) END AS PctPos_resret#94071422, CASE WHEN ((TR_resret#94071209 = NA) OR (TR_resret#94071209 = null)) THEN null ELSE cast(TR_resret#94071209 as float) END AS TR_resret#94071424, CASE WHEN ((IR_resret#94071210 = NA) OR (IR_resret#94071210 = null)) THEN null ELSE cast(IR_resret#94071210 as float) END AS IR_resret#94071426, CASE WHEN ((annual_retnet#94071211 = NA) OR (annual_retnet#94071211 = null)) THEN null ELSE cast(annual_retnet#94071211 as float) END AS annual_retnet#94071428, CASE WHEN ((std_retnet#94071212 = NA) OR (std_retnet#94071212 = null)) THEN null ELSE cast(std_retnet#94071212 as float) END AS std_retnet#94071430, CASE WHEN ((Sharpe_retnet#94071213 = NA) OR (Sharpe_retnet#94071213 = null)) THEN null ELSE cast(Sharpe_retnet#94071213 as float) END AS Sharpe_retnet#94071432, CASE WHEN ((PctPos_retnet#94071214 = NA) OR (PctPos_retnet#94071214 = null)) THEN null ELSE cast(PctPos_retnet#94071214 as float) END AS PctPos_retnet#94071434, CASE WHEN ((TR_retnet#94071215 = NA) OR (TR_retnet#94071215 = null)) THEN null ELSE cast(TR_retnet#94071215 as float) END AS TR_retnet#94071436, CASE WHEN ((IR_retnet#94071216 = NA) OR (IR_retnet#94071216 = null)) THEN null ELSE cast(IR_retnet#94071216 as float) END AS IR_retnet#94071438, CASE WHEN ((turnover#94071217 = NA) OR (turnover#94071217 = null)) THEN null ELSE cast(turnover#94071217 as float) END AS turnover#94071440] +- FileScan csv [year#94071193,retIC#94071194,resretIC#94071195,numcos#94071196,numdates#94071197,annual_bmret#94071198,annual_ret#94071199,std_ret#94071200,Sharpe_ret#94071201,PctPos_ret#94071202,TR_ret#94071203,IR_ret#94071204,annual_resret#94071205,std_resret#94071206,Sharpe_resret#94071207,PctPos_resret#94071208,TR_resret#94071209,IR_resret#94071210,annual_retnet#94071211,std_retnet#94071212,Sharpe_retnet#94071213,PctPos_retnet#94071214,TR_retnet#94071215,IR_retnet#94071216,turnover#94071217] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/value/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94071193, retIC#94071194, resretIC#94071195, numcos#94071196, numdates#94071197, annual_bmret#94071198, annual_ret#94071199, std_ret#94071200, Sharpe_ret#94071201, PctPos_ret#94071202, TR_ret#94071203, IR_ret#94071204, annual_resret#94071205, std_resret#94071206, Sharpe_resret#94071207, PctPos_resret#94071208, TR_resret#94071209, IR_resret#94071210, annual_retnet#94071211, std_retnet#94071212, Sharpe_retnet#94071213, PctPos_retnet#94071214, TR_retnet#94071215, IR_retnet#94071216, turnover#94071217] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/value/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94071193 = NA) OR (year#94071193 = null)) THEN null ELSE cast(year#94071193 as int) END AS year#94071392, CASE WHEN ((retIC#94071194 = NA) OR (retIC#94071194 = null)) THEN null ELSE cast(retIC#94071194 as float) END AS retIC#94071394, CASE WHEN ((resretIC#94071195 = NA) OR (resretIC#94071195 = null)) THEN null ELSE cast(resretIC#94071195 as float) END AS resretIC#94071396, CASE WHEN ((numcos#94071196 = NA) OR (numcos#94071196 = null)) THEN null ELSE cast(numcos#94071196 as float) END AS numcos#94071398, CASE WHEN ((numdates#94071197 = NA) OR (numdates#94071197 = null)) THEN null ELSE cast(numdates#94071197 as int) END AS numdates#94071400, CASE WHEN ((annual_bmret#94071198 = NA) OR (annual_bmret#94071198 = null)) THEN null ELSE cast(annual_bmret#94071198 as float) END AS annual_bmret#94071402, CASE WHEN ((annual_ret#94071199 = NA) OR (annual_ret#94071199 = null)) THEN null ELSE cast(annual_ret#94071199 as float) END AS annual_ret#94071404, CASE WHEN ((std_ret#94071200 = NA) OR (std_ret#94071200 = null)) THEN null ELSE cast(std_ret#94071200 as float) END AS std_ret#94071406, CASE WHEN ((Sharpe_ret#94071201 = NA) OR (Sharpe_ret#94071201 = null)) THEN null ELSE cast(Sharpe_ret#94071201 as float) END AS Sharpe_ret#94071408, CASE WHEN ((PctPos_ret#94071202 = NA) OR (PctPos_ret#94071202 = null)) THEN null ELSE cast(PctPos_ret#94071202 as float) END AS PctPos_ret#94071410, CASE WHEN ((TR_ret#94071203 = NA) OR (TR_ret#94071203 = null)) THEN null ELSE cast(TR_ret#94071203 as float) END AS TR_ret#94071412, CASE WHEN ((IR_ret#94071204 = NA) OR (IR_ret#94071204 = null)) THEN null ELSE cast(IR_ret#94071204 as float) END AS IR_ret#94071414, CASE WHEN ((annual_resret#94071205 = NA) OR (annual_resret#94071205 = null)) THEN null ELSE cast(annual_resret#94071205 as float) END AS annual_resret#94071416, CASE WHEN ((std_resret#94071206 = NA) OR (std_resret#94071206 = null)) THEN null ELSE cast(std_resret#94071206 as float) END AS std_resret#94071418, CASE WHEN ((Sharpe_resret#94071207 = NA) OR (Sharpe_resret#94071207 = null)) THEN null ELSE cast(Sharpe_resret#94071207 as float) END AS Sharpe_resret#94071420, CASE WHEN ((PctPos_resret#94071208 = NA) OR (PctPos_resret#94071208 = null)) THEN null ELSE cast(PctPos_resret#94071208 as float) END AS PctPos_resret#94071422, CASE WHEN ((TR_resret#94071209 = NA) OR (TR_resret#94071209 = null)) THEN null ELSE cast(TR_resret#94071209 as float) END AS TR_resret#94071424, CASE WHEN ((IR_resret#94071210 = NA) OR (IR_resret#94071210 = null)) THEN null ELSE cast(IR_resret#94071210 as float) END AS IR_resret#94071426, CASE WHEN ((annual_retnet#94071211 = NA) OR (annual_retnet#94071211 = null)) THEN null ELSE cast(annual_retnet#94071211 as float) END AS annual_retnet#94071428, CASE WHEN ((std_retnet#94071212 = NA) OR (std_retnet#94071212 = null)) THEN null ELSE cast(std_retnet#94071212 as float) END AS std_retnet#94071430, CASE WHEN ((Sharpe_retnet#94071213 = NA) OR (Sharpe_retnet#94071213 = null)) THEN null ELSE cast(Sharpe_retnet#94071213 as float) END AS Sharpe_retnet#94071432, CASE WHEN ((PctPos_retnet#94071214 = NA) OR (PctPos_retnet#94071214 = null)) THEN null ELSE cast(PctPos_retnet#94071214 as float) END AS PctPos_retnet#94071434, CASE WHEN ((TR_retnet#94071215 = NA) OR (TR_retnet#94071215 = null)) THEN null ELSE cast(TR_retnet#94071215 as float) END AS TR_retnet#94071436, CASE WHEN ((IR_retnet#94071216 = NA) OR (IR_retnet#94071216 = null)) THEN null ELSE cast(IR_retnet#94071216 as float) END AS IR_retnet#94071438, CASE WHEN ((turnover#94071217 = NA) OR (turnover#94071217 = null)) THEN null ELSE cast(turnover#94071217 as float) END AS turnover#94071440] Input [25]: [year#94071193, retIC#94071194, resretIC#94071195, numcos#94071196, numdates#94071197, annual_bmret#94071198, annual_ret#94071199, std_ret#94071200, Sharpe_ret#94071201, PctPos_ret#94071202, TR_ret#94071203, IR_ret#94071204, annual_resret#94071205, std_resret#94071206, Sharpe_resret#94071207, PctPos_resret#94071208, TR_resret#94071209, IR_resret#94071210, annual_retnet#94071211, std_retnet#94071212, Sharpe_retnet#94071213, PctPos_retnet#94071214, TR_retnet#94071215, IR_retnet#94071216, turnover#94071217] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94071440, year#94071392] (8) Filter [codegen id : 1] Input [2]: [turnover#94071440, year#94071392] Condition : isnotnull(turnover#94071440) (9) Project [codegen id : 1] Output [3]: [year#94071392, turnover#94071440, (1.0 / cast(turnover#94071440 as double)) AS days_hold#94071475] Input [2]: [turnover#94071440, year#94071392] (10) Exchange Input [3]: [year#94071392, turnover#94071440, days_hold#94071475] Arguments: rangepartitioning(year#94071392 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7510806] (11) Sort [codegen id : 2] Input [3]: [year#94071392, turnover#94071440, days_hold#94071475] Arguments: [year#94071392 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94071392, turnover#94071440, days_hold#94071475] (13) CollectLimit Input [3]: [year#94071392, turnover#94071440, days_hold#94071475] Arguments: 1000000