== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94163653, turnover#94163698, days_hold#94163736] Arguments: [year#94163653, turnover#94163698, days_hold#94163736] (2) InMemoryRelation Arguments: [year#94163653, turnover#94163698, days_hold#94163736], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94163653 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94163653 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7517943] +- *(1) Project [year#94163653, turnover#94163698, (1.0 / cast(turnover#94163698 as double)) AS days_hold#94163736] +- *(1) Filter isnotnull(turnover#94163698) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94163698, year#94163653], [isnotnull(turnover#94163698)] +- InMemoryRelation [year#94163653, retIC#94163654, resretIC#94163655, numcos#94163656, numdates#94163657, annual_bmret#94163658, annual_ret#94163661, std_ret#94163663, Sharpe_ret#94163665, PctPos_ret#94163667, TR_ret#94163669, IR_ret#94163671, annual_resret#94163673, std_resret#94163675, Sharpe_resret#94163677, PctPos_resret#94163679, TR_resret#94163681, IR_resret#94163683, annual_retnet#94163685, std_retnet#94163687, Sharpe_retnet#94163689, PctPos_retnet#94163692, TR_retnet#94163694, IR_retnet#94163696, turnover#94163698], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94163420 = NA) OR (year#94163420 = null)) THEN null ELSE cast(year#94163420 as int) END AS year#94163653, CASE WHEN ((retIC#94163421 = NA) OR (retIC#94163421 = null)) THEN null ELSE cast(retIC#94163421 as float) END AS retIC#94163654, CASE WHEN ((resretIC#94163422 = NA) OR (resretIC#94163422 = null)) THEN null ELSE cast(resretIC#94163422 as float) END AS resretIC#94163655, CASE WHEN ((numcos#94163423 = NA) OR (numcos#94163423 = null)) THEN null ELSE cast(numcos#94163423 as float) END AS numcos#94163656, CASE WHEN ((numdates#94163424 = NA) OR (numdates#94163424 = null)) THEN null ELSE cast(numdates#94163424 as int) END AS numdates#94163657, CASE WHEN ((annual_bmret#94163425 = NA) OR (annual_bmret#94163425 = null)) THEN null ELSE cast(annual_bmret#94163425 as float) END AS annual_bmret#94163658, CASE WHEN ((annual_ret#94163426 = NA) OR (annual_ret#94163426 = null)) THEN null ELSE cast(annual_ret#94163426 as float) END AS annual_ret#94163661, CASE WHEN ((std_ret#94163427 = NA) OR (std_ret#94163427 = null)) THEN null ELSE cast(std_ret#94163427 as float) END AS std_ret#94163663, CASE WHEN ((Sharpe_ret#94163428 = NA) OR (Sharpe_ret#94163428 = null)) THEN null ELSE cast(Sharpe_ret#94163428 as float) END AS Sharpe_ret#94163665, CASE WHEN ((PctPos_ret#94163429 = NA) OR (PctPos_ret#94163429 = null)) THEN null ELSE cast(PctPos_ret#94163429 as float) END AS PctPos_ret#94163667, CASE WHEN ((TR_ret#94163430 = NA) OR (TR_ret#94163430 = null)) THEN null ELSE cast(TR_ret#94163430 as float) END AS TR_ret#94163669, CASE WHEN ((IR_ret#94163431 = NA) OR (IR_ret#94163431 = null)) THEN null ELSE cast(IR_ret#94163431 as float) END AS IR_ret#94163671, CASE WHEN ((annual_resret#94163432 = NA) OR (annual_resret#94163432 = null)) THEN null ELSE cast(annual_resret#94163432 as float) END AS annual_resret#94163673, CASE WHEN ((std_resret#94163433 = NA) OR (std_resret#94163433 = null)) THEN null ELSE cast(std_resret#94163433 as float) END AS std_resret#94163675, CASE WHEN ((Sharpe_resret#94163434 = NA) OR (Sharpe_resret#94163434 = null)) THEN null ELSE cast(Sharpe_resret#94163434 as float) END AS Sharpe_resret#94163677, CASE WHEN ((PctPos_resret#94163435 = NA) OR (PctPos_resret#94163435 = null)) THEN null ELSE cast(PctPos_resret#94163435 as float) END AS PctPos_resret#94163679, CASE WHEN ((TR_resret#94163436 = NA) OR (TR_resret#94163436 = null)) THEN null ELSE cast(TR_resret#94163436 as float) END AS TR_resret#94163681, CASE WHEN ((IR_resret#94163437 = NA) OR (IR_resret#94163437 = null)) THEN null ELSE cast(IR_resret#94163437 as float) END AS IR_resret#94163683, CASE WHEN ((annual_retnet#94163438 = NA) OR (annual_retnet#94163438 = null)) THEN null ELSE cast(annual_retnet#94163438 as float) END AS annual_retnet#94163685, CASE WHEN ((std_retnet#94163439 = NA) OR (std_retnet#94163439 = null)) THEN null ELSE cast(std_retnet#94163439 as float) END AS std_retnet#94163687, CASE WHEN ((Sharpe_retnet#94163440 = NA) OR (Sharpe_retnet#94163440 = null)) THEN null ELSE cast(Sharpe_retnet#94163440 as float) END AS Sharpe_retnet#94163689, CASE WHEN ((PctPos_retnet#94163441 = NA) OR (PctPos_retnet#94163441 = null)) THEN null ELSE cast(PctPos_retnet#94163441 as float) END AS PctPos_retnet#94163692, CASE WHEN ((TR_retnet#94163442 = NA) OR (TR_retnet#94163442 = null)) THEN null ELSE cast(TR_retnet#94163442 as float) END AS TR_retnet#94163694, CASE WHEN ((IR_retnet#94163443 = NA) OR (IR_retnet#94163443 = null)) THEN null ELSE cast(IR_retnet#94163443 as float) END AS IR_retnet#94163696, CASE WHEN ((turnover#94163444 = NA) OR (turnover#94163444 = null)) THEN null ELSE cast(turnover#94163444 as float) END AS turnover#94163698] +- FileScan csv [year#94163420,retIC#94163421,resretIC#94163422,numcos#94163423,numdates#94163424,annual_bmret#94163425,annual_ret#94163426,std_ret#94163427,Sharpe_ret#94163428,PctPos_ret#94163429,TR_ret#94163430,IR_ret#94163431,annual_resret#94163432,std_resret#94163433,Sharpe_resret#94163434,PctPos_resret#94163435,TR_resret#94163436,IR_resret#94163437,annual_retnet#94163438,std_retnet#94163439,Sharpe_retnet#94163440,PctPos_retnet#94163441,TR_retnet#94163442,IR_retnet#94163443,turnover#94163444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/value/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94163653 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94163698, year#94163653] Arguments: [turnover#94163698, year#94163653], [isnotnull(turnover#94163698)] (4) InMemoryRelation Arguments: [year#94163653, retIC#94163654, resretIC#94163655, numcos#94163656, numdates#94163657, annual_bmret#94163658, annual_ret#94163661, std_ret#94163663, Sharpe_ret#94163665, PctPos_ret#94163667, TR_ret#94163669, IR_ret#94163671, annual_resret#94163673, std_resret#94163675, Sharpe_resret#94163677, PctPos_resret#94163679, TR_resret#94163681, IR_resret#94163683, annual_retnet#94163685, std_retnet#94163687, Sharpe_retnet#94163689, PctPos_retnet#94163692, TR_retnet#94163694, IR_retnet#94163696, turnover#94163698], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94163420 = NA) OR (year#94163420 = null)) THEN null ELSE cast(year#94163420 as int) END AS year#94163653, CASE WHEN ((retIC#94163421 = NA) OR (retIC#94163421 = null)) THEN null ELSE cast(retIC#94163421 as float) END AS retIC#94163654, CASE WHEN ((resretIC#94163422 = NA) OR (resretIC#94163422 = null)) THEN null ELSE cast(resretIC#94163422 as float) END AS resretIC#94163655, CASE WHEN ((numcos#94163423 = NA) OR (numcos#94163423 = null)) THEN null ELSE cast(numcos#94163423 as float) END AS numcos#94163656, CASE WHEN ((numdates#94163424 = NA) OR (numdates#94163424 = null)) THEN null ELSE cast(numdates#94163424 as int) END AS numdates#94163657, CASE WHEN ((annual_bmret#94163425 = NA) OR (annual_bmret#94163425 = null)) THEN null ELSE cast(annual_bmret#94163425 as float) END AS annual_bmret#94163658, CASE WHEN ((annual_ret#94163426 = NA) OR (annual_ret#94163426 = null)) THEN null ELSE cast(annual_ret#94163426 as float) END AS annual_ret#94163661, CASE WHEN ((std_ret#94163427 = NA) OR (std_ret#94163427 = null)) THEN null ELSE cast(std_ret#94163427 as float) END AS std_ret#94163663, CASE WHEN ((Sharpe_ret#94163428 = NA) OR (Sharpe_ret#94163428 = null)) THEN null ELSE cast(Sharpe_ret#94163428 as float) END AS Sharpe_ret#94163665, CASE WHEN ((PctPos_ret#94163429 = NA) OR (PctPos_ret#94163429 = null)) THEN null ELSE cast(PctPos_ret#94163429 as float) END AS PctPos_ret#94163667, CASE WHEN ((TR_ret#94163430 = NA) OR (TR_ret#94163430 = null)) THEN null ELSE cast(TR_ret#94163430 as float) END AS TR_ret#94163669, CASE WHEN ((IR_ret#94163431 = NA) OR (IR_ret#94163431 = null)) THEN null ELSE cast(IR_ret#94163431 as float) END AS IR_ret#94163671, CASE WHEN ((annual_resret#94163432 = NA) OR (annual_resret#94163432 = null)) THEN null ELSE cast(annual_resret#94163432 as float) END AS annual_resret#94163673, CASE WHEN ((std_resret#94163433 = NA) OR (std_resret#94163433 = null)) THEN null ELSE cast(std_resret#94163433 as float) END AS std_resret#94163675, CASE WHEN ((Sharpe_resret#94163434 = NA) OR (Sharpe_resret#94163434 = null)) THEN null ELSE cast(Sharpe_resret#94163434 as float) END AS Sharpe_resret#94163677, CASE WHEN ((PctPos_resret#94163435 = NA) OR (PctPos_resret#94163435 = null)) THEN null ELSE cast(PctPos_resret#94163435 as float) END AS PctPos_resret#94163679, CASE WHEN ((TR_resret#94163436 = NA) OR (TR_resret#94163436 = null)) THEN null ELSE cast(TR_resret#94163436 as float) END AS TR_resret#94163681, CASE WHEN ((IR_resret#94163437 = NA) OR (IR_resret#94163437 = null)) THEN null ELSE cast(IR_resret#94163437 as float) END AS IR_resret#94163683, CASE WHEN ((annual_retnet#94163438 = NA) OR (annual_retnet#94163438 = null)) THEN null ELSE cast(annual_retnet#94163438 as float) END AS annual_retnet#94163685, CASE WHEN ((std_retnet#94163439 = NA) OR (std_retnet#94163439 = null)) THEN null ELSE cast(std_retnet#94163439 as float) END AS std_retnet#94163687, CASE WHEN ((Sharpe_retnet#94163440 = NA) OR (Sharpe_retnet#94163440 = null)) THEN null ELSE cast(Sharpe_retnet#94163440 as float) END AS Sharpe_retnet#94163689, CASE WHEN ((PctPos_retnet#94163441 = NA) OR (PctPos_retnet#94163441 = null)) THEN null ELSE cast(PctPos_retnet#94163441 as float) END AS PctPos_retnet#94163692, CASE WHEN ((TR_retnet#94163442 = NA) OR (TR_retnet#94163442 = null)) THEN null ELSE cast(TR_retnet#94163442 as float) END AS TR_retnet#94163694, CASE WHEN ((IR_retnet#94163443 = NA) OR (IR_retnet#94163443 = null)) THEN null ELSE cast(IR_retnet#94163443 as float) END AS IR_retnet#94163696, CASE WHEN ((turnover#94163444 = NA) OR (turnover#94163444 = null)) THEN null ELSE cast(turnover#94163444 as float) END AS turnover#94163698] +- FileScan csv [year#94163420,retIC#94163421,resretIC#94163422,numcos#94163423,numdates#94163424,annual_bmret#94163425,annual_ret#94163426,std_ret#94163427,Sharpe_ret#94163428,PctPos_ret#94163429,TR_ret#94163430,IR_ret#94163431,annual_resret#94163432,std_resret#94163433,Sharpe_resret#94163434,PctPos_resret#94163435,TR_resret#94163436,IR_resret#94163437,annual_retnet#94163438,std_retnet#94163439,Sharpe_retnet#94163440,PctPos_retnet#94163441,TR_retnet#94163442,IR_retnet#94163443,turnover#94163444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/value/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94163420, retIC#94163421, resretIC#94163422, numcos#94163423, numdates#94163424, annual_bmret#94163425, annual_ret#94163426, std_ret#94163427, Sharpe_ret#94163428, PctPos_ret#94163429, TR_ret#94163430, IR_ret#94163431, annual_resret#94163432, std_resret#94163433, Sharpe_resret#94163434, PctPos_resret#94163435, TR_resret#94163436, IR_resret#94163437, annual_retnet#94163438, std_retnet#94163439, Sharpe_retnet#94163440, PctPos_retnet#94163441, TR_retnet#94163442, IR_retnet#94163443, turnover#94163444] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/value/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94163420 = NA) OR (year#94163420 = null)) THEN null ELSE cast(year#94163420 as int) END AS year#94163653, CASE WHEN ((retIC#94163421 = NA) OR (retIC#94163421 = null)) THEN null ELSE cast(retIC#94163421 as float) END AS retIC#94163654, CASE WHEN ((resretIC#94163422 = NA) OR (resretIC#94163422 = null)) THEN null ELSE cast(resretIC#94163422 as float) END AS resretIC#94163655, CASE WHEN ((numcos#94163423 = NA) OR (numcos#94163423 = null)) THEN null ELSE cast(numcos#94163423 as float) END AS numcos#94163656, CASE WHEN ((numdates#94163424 = NA) OR (numdates#94163424 = null)) THEN null ELSE cast(numdates#94163424 as int) END AS numdates#94163657, CASE WHEN ((annual_bmret#94163425 = NA) OR (annual_bmret#94163425 = null)) THEN null ELSE cast(annual_bmret#94163425 as float) END AS annual_bmret#94163658, CASE WHEN ((annual_ret#94163426 = NA) OR (annual_ret#94163426 = null)) THEN null ELSE cast(annual_ret#94163426 as float) END AS annual_ret#94163661, CASE WHEN ((std_ret#94163427 = NA) OR (std_ret#94163427 = null)) THEN null ELSE cast(std_ret#94163427 as float) END AS std_ret#94163663, CASE WHEN ((Sharpe_ret#94163428 = NA) OR (Sharpe_ret#94163428 = null)) THEN null ELSE cast(Sharpe_ret#94163428 as float) END AS Sharpe_ret#94163665, CASE WHEN ((PctPos_ret#94163429 = NA) OR (PctPos_ret#94163429 = null)) THEN null ELSE cast(PctPos_ret#94163429 as float) END AS PctPos_ret#94163667, CASE WHEN ((TR_ret#94163430 = NA) OR (TR_ret#94163430 = null)) THEN null ELSE cast(TR_ret#94163430 as float) END AS TR_ret#94163669, CASE WHEN ((IR_ret#94163431 = NA) OR (IR_ret#94163431 = null)) THEN null ELSE cast(IR_ret#94163431 as float) END AS IR_ret#94163671, CASE WHEN ((annual_resret#94163432 = NA) OR (annual_resret#94163432 = null)) THEN null ELSE cast(annual_resret#94163432 as float) END AS annual_resret#94163673, CASE WHEN ((std_resret#94163433 = NA) OR (std_resret#94163433 = null)) THEN null ELSE cast(std_resret#94163433 as float) END AS std_resret#94163675, CASE WHEN ((Sharpe_resret#94163434 = NA) OR (Sharpe_resret#94163434 = null)) THEN null ELSE cast(Sharpe_resret#94163434 as float) END AS Sharpe_resret#94163677, CASE WHEN ((PctPos_resret#94163435 = NA) OR (PctPos_resret#94163435 = null)) THEN null ELSE cast(PctPos_resret#94163435 as float) END AS PctPos_resret#94163679, CASE WHEN ((TR_resret#94163436 = NA) OR (TR_resret#94163436 = null)) THEN null ELSE cast(TR_resret#94163436 as float) END AS TR_resret#94163681, CASE WHEN ((IR_resret#94163437 = NA) OR (IR_resret#94163437 = null)) THEN null ELSE cast(IR_resret#94163437 as float) END AS IR_resret#94163683, CASE WHEN ((annual_retnet#94163438 = NA) OR (annual_retnet#94163438 = null)) THEN null ELSE cast(annual_retnet#94163438 as float) END AS annual_retnet#94163685, CASE WHEN ((std_retnet#94163439 = NA) OR (std_retnet#94163439 = null)) THEN null ELSE cast(std_retnet#94163439 as float) END AS std_retnet#94163687, CASE WHEN ((Sharpe_retnet#94163440 = NA) OR (Sharpe_retnet#94163440 = null)) THEN null ELSE cast(Sharpe_retnet#94163440 as float) END AS Sharpe_retnet#94163689, CASE WHEN ((PctPos_retnet#94163441 = NA) OR (PctPos_retnet#94163441 = null)) THEN null ELSE cast(PctPos_retnet#94163441 as float) END AS PctPos_retnet#94163692, CASE WHEN ((TR_retnet#94163442 = NA) OR (TR_retnet#94163442 = null)) THEN null ELSE cast(TR_retnet#94163442 as float) END AS TR_retnet#94163694, CASE WHEN ((IR_retnet#94163443 = NA) OR (IR_retnet#94163443 = null)) THEN null ELSE cast(IR_retnet#94163443 as float) END AS IR_retnet#94163696, CASE WHEN ((turnover#94163444 = NA) OR (turnover#94163444 = null)) THEN null ELSE cast(turnover#94163444 as float) END AS turnover#94163698] Input [25]: [year#94163420, retIC#94163421, resretIC#94163422, numcos#94163423, numdates#94163424, annual_bmret#94163425, annual_ret#94163426, std_ret#94163427, Sharpe_ret#94163428, PctPos_ret#94163429, TR_ret#94163430, IR_ret#94163431, annual_resret#94163432, std_resret#94163433, Sharpe_resret#94163434, PctPos_resret#94163435, TR_resret#94163436, IR_resret#94163437, annual_retnet#94163438, std_retnet#94163439, Sharpe_retnet#94163440, PctPos_retnet#94163441, TR_retnet#94163442, IR_retnet#94163443, turnover#94163444] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94163698, year#94163653] (8) Filter [codegen id : 1] Input [2]: [turnover#94163698, year#94163653] Condition : isnotnull(turnover#94163698) (9) Project [codegen id : 1] Output [3]: [year#94163653, turnover#94163698, (1.0 / cast(turnover#94163698 as double)) AS days_hold#94163736] Input [2]: [turnover#94163698, year#94163653] (10) Exchange Input [3]: [year#94163653, turnover#94163698, days_hold#94163736] Arguments: rangepartitioning(year#94163653 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7517943] (11) Sort [codegen id : 2] Input [3]: [year#94163653, turnover#94163698, days_hold#94163736] Arguments: [year#94163653 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94163653, turnover#94163698, days_hold#94163736] (13) CollectLimit Input [3]: [year#94163653, turnover#94163698, days_hold#94163736] Arguments: 10000