== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94085717, turnover#94085840, days_hold#94085908] Arguments: [year#94085717, turnover#94085840, days_hold#94085908] (2) InMemoryRelation Arguments: [year#94085717, turnover#94085840, days_hold#94085908], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94085717 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94085717 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7511948] +- *(1) Project [year#94085717, turnover#94085840, (1.0 / cast(turnover#94085840 as double)) AS days_hold#94085908] +- *(1) Filter isnotnull(turnover#94085840) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94085840, year#94085717], [isnotnull(turnover#94085840)] +- InMemoryRelation [year#94085717, retIC#94085718, resretIC#94085719, numcos#94085720, numdates#94085721, annual_bmret#94085722, annual_ret#94085723, std_ret#94085724, Sharpe_ret#94085725, PctPos_ret#94085726, TR_ret#94085760, IR_ret#94085761, annual_resret#94085762, std_resret#94085796, Sharpe_resret#94085797, PctPos_resret#94085798, TR_resret#94085799, IR_resret#94085800, annual_retnet#94085801, std_retnet#94085802, Sharpe_retnet#94085803, PctPos_retnet#94085837, TR_retnet#94085838, IR_retnet#94085839, turnover#94085840], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94085615 = NA) OR (year#94085615 = null)) THEN null ELSE cast(year#94085615 as int) END AS year#94085717, CASE WHEN ((retIC#94085616 = NA) OR (retIC#94085616 = null)) THEN null ELSE cast(retIC#94085616 as float) END AS retIC#94085718, CASE WHEN ((resretIC#94085617 = NA) OR (resretIC#94085617 = null)) THEN null ELSE cast(resretIC#94085617 as float) END AS resretIC#94085719, CASE WHEN ((numcos#94085618 = NA) OR (numcos#94085618 = null)) THEN null ELSE cast(numcos#94085618 as float) END AS numcos#94085720, CASE WHEN ((numdates#94085619 = NA) OR (numdates#94085619 = null)) THEN null ELSE cast(numdates#94085619 as int) END AS numdates#94085721, CASE WHEN ((annual_bmret#94085620 = NA) OR (annual_bmret#94085620 = null)) THEN null ELSE cast(annual_bmret#94085620 as float) END AS annual_bmret#94085722, CASE WHEN ((annual_ret#94085621 = NA) OR (annual_ret#94085621 = null)) THEN null ELSE cast(annual_ret#94085621 as float) END AS annual_ret#94085723, CASE WHEN ((std_ret#94085622 = NA) OR (std_ret#94085622 = null)) THEN null ELSE cast(std_ret#94085622 as float) END AS std_ret#94085724, CASE WHEN ((Sharpe_ret#94085623 = NA) OR (Sharpe_ret#94085623 = null)) THEN null ELSE cast(Sharpe_ret#94085623 as float) END AS Sharpe_ret#94085725, CASE WHEN ((PctPos_ret#94085624 = NA) OR (PctPos_ret#94085624 = null)) THEN null ELSE cast(PctPos_ret#94085624 as float) END AS PctPos_ret#94085726, CASE WHEN ((TR_ret#94085625 = NA) OR (TR_ret#94085625 = null)) THEN null ELSE cast(TR_ret#94085625 as float) END AS TR_ret#94085760, CASE WHEN ((IR_ret#94085626 = NA) OR (IR_ret#94085626 = null)) THEN null ELSE cast(IR_ret#94085626 as float) END AS IR_ret#94085761, CASE WHEN ((annual_resret#94085627 = NA) OR (annual_resret#94085627 = null)) THEN null ELSE cast(annual_resret#94085627 as float) END AS annual_resret#94085762, CASE WHEN ((std_resret#94085628 = NA) OR (std_resret#94085628 = null)) THEN null ELSE cast(std_resret#94085628 as float) END AS std_resret#94085796, CASE WHEN ((Sharpe_resret#94085629 = NA) OR (Sharpe_resret#94085629 = null)) THEN null ELSE cast(Sharpe_resret#94085629 as float) END AS Sharpe_resret#94085797, CASE WHEN ((PctPos_resret#94085630 = NA) OR (PctPos_resret#94085630 = null)) THEN null ELSE cast(PctPos_resret#94085630 as float) END AS PctPos_resret#94085798, CASE WHEN ((TR_resret#94085631 = NA) OR (TR_resret#94085631 = null)) THEN null ELSE cast(TR_resret#94085631 as float) END AS TR_resret#94085799, CASE WHEN ((IR_resret#94085632 = NA) OR (IR_resret#94085632 = null)) THEN null ELSE cast(IR_resret#94085632 as float) END AS IR_resret#94085800, CASE WHEN ((annual_retnet#94085633 = NA) OR (annual_retnet#94085633 = null)) THEN null ELSE cast(annual_retnet#94085633 as float) END AS annual_retnet#94085801, CASE WHEN ((std_retnet#94085634 = NA) OR (std_retnet#94085634 = null)) THEN null ELSE cast(std_retnet#94085634 as float) END AS std_retnet#94085802, CASE WHEN ((Sharpe_retnet#94085635 = NA) OR (Sharpe_retnet#94085635 = null)) THEN null ELSE cast(Sharpe_retnet#94085635 as float) END AS Sharpe_retnet#94085803, CASE WHEN ((PctPos_retnet#94085636 = NA) OR (PctPos_retnet#94085636 = null)) THEN null ELSE cast(PctPos_retnet#94085636 as float) END AS PctPos_retnet#94085837, CASE WHEN ((TR_retnet#94085637 = NA) OR (TR_retnet#94085637 = null)) THEN null ELSE cast(TR_retnet#94085637 as float) END AS TR_retnet#94085838, CASE WHEN ((IR_retnet#94085638 = NA) OR (IR_retnet#94085638 = null)) THEN null ELSE cast(IR_retnet#94085638 as float) END AS IR_retnet#94085839, CASE WHEN ((turnover#94085639 = NA) OR (turnover#94085639 = null)) THEN null ELSE cast(turnover#94085639 as float) END AS turnover#94085840] +- FileScan csv [year#94085615,retIC#94085616,resretIC#94085617,numcos#94085618,numdates#94085619,annual_bmret#94085620,annual_ret#94085621,std_ret#94085622,Sharpe_ret#94085623,PctPos_ret#94085624,TR_ret#94085625,IR_ret#94085626,annual_resret#94085627,std_resret#94085628,Sharpe_resret#94085629,PctPos_resret#94085630,TR_resret#94085631,IR_resret#94085632,annual_retnet#94085633,std_retnet#94085634,Sharpe_retnet#94085635,PctPos_retnet#94085636,TR_retnet#94085637,IR_retnet#94085638,turnover#94085639] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/volatilit..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94085717 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94085840, year#94085717] Arguments: [turnover#94085840, year#94085717], [isnotnull(turnover#94085840)] (4) InMemoryRelation Arguments: [year#94085717, retIC#94085718, resretIC#94085719, numcos#94085720, numdates#94085721, annual_bmret#94085722, annual_ret#94085723, std_ret#94085724, Sharpe_ret#94085725, PctPos_ret#94085726, TR_ret#94085760, IR_ret#94085761, annual_resret#94085762, std_resret#94085796, Sharpe_resret#94085797, PctPos_resret#94085798, TR_resret#94085799, IR_resret#94085800, annual_retnet#94085801, std_retnet#94085802, Sharpe_retnet#94085803, PctPos_retnet#94085837, TR_retnet#94085838, IR_retnet#94085839, turnover#94085840], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94085615 = NA) OR (year#94085615 = null)) THEN null ELSE cast(year#94085615 as int) END AS year#94085717, CASE WHEN ((retIC#94085616 = NA) OR (retIC#94085616 = null)) THEN null ELSE cast(retIC#94085616 as float) END AS retIC#94085718, CASE WHEN ((resretIC#94085617 = NA) OR (resretIC#94085617 = null)) THEN null ELSE cast(resretIC#94085617 as float) END AS resretIC#94085719, CASE WHEN ((numcos#94085618 = NA) OR (numcos#94085618 = null)) THEN null ELSE cast(numcos#94085618 as float) END AS numcos#94085720, CASE WHEN ((numdates#94085619 = NA) OR (numdates#94085619 = null)) THEN null ELSE cast(numdates#94085619 as int) END AS numdates#94085721, CASE WHEN ((annual_bmret#94085620 = NA) OR (annual_bmret#94085620 = null)) THEN null ELSE cast(annual_bmret#94085620 as float) END AS annual_bmret#94085722, CASE WHEN ((annual_ret#94085621 = NA) OR (annual_ret#94085621 = null)) THEN null ELSE cast(annual_ret#94085621 as float) END AS annual_ret#94085723, CASE WHEN ((std_ret#94085622 = NA) OR (std_ret#94085622 = null)) THEN null ELSE cast(std_ret#94085622 as float) END AS std_ret#94085724, CASE WHEN ((Sharpe_ret#94085623 = NA) OR (Sharpe_ret#94085623 = null)) THEN null ELSE cast(Sharpe_ret#94085623 as float) END AS Sharpe_ret#94085725, CASE WHEN ((PctPos_ret#94085624 = NA) OR (PctPos_ret#94085624 = null)) THEN null ELSE cast(PctPos_ret#94085624 as float) END AS PctPos_ret#94085726, CASE WHEN ((TR_ret#94085625 = NA) OR (TR_ret#94085625 = null)) THEN null ELSE cast(TR_ret#94085625 as float) END AS TR_ret#94085760, CASE WHEN ((IR_ret#94085626 = NA) OR (IR_ret#94085626 = null)) THEN null ELSE cast(IR_ret#94085626 as float) END AS IR_ret#94085761, CASE WHEN ((annual_resret#94085627 = NA) OR (annual_resret#94085627 = null)) THEN null ELSE cast(annual_resret#94085627 as float) END AS annual_resret#94085762, CASE WHEN ((std_resret#94085628 = NA) OR (std_resret#94085628 = null)) THEN null ELSE cast(std_resret#94085628 as float) END AS std_resret#94085796, CASE WHEN ((Sharpe_resret#94085629 = NA) OR (Sharpe_resret#94085629 = null)) THEN null ELSE cast(Sharpe_resret#94085629 as float) END AS Sharpe_resret#94085797, CASE WHEN ((PctPos_resret#94085630 = NA) OR (PctPos_resret#94085630 = null)) THEN null ELSE cast(PctPos_resret#94085630 as float) END AS PctPos_resret#94085798, CASE WHEN ((TR_resret#94085631 = NA) OR (TR_resret#94085631 = null)) THEN null ELSE cast(TR_resret#94085631 as float) END AS TR_resret#94085799, CASE WHEN ((IR_resret#94085632 = NA) OR (IR_resret#94085632 = null)) THEN null ELSE cast(IR_resret#94085632 as float) END AS IR_resret#94085800, CASE WHEN ((annual_retnet#94085633 = NA) OR (annual_retnet#94085633 = null)) THEN null ELSE cast(annual_retnet#94085633 as float) END AS annual_retnet#94085801, CASE WHEN ((std_retnet#94085634 = NA) OR (std_retnet#94085634 = null)) THEN null ELSE cast(std_retnet#94085634 as float) END AS std_retnet#94085802, CASE WHEN ((Sharpe_retnet#94085635 = NA) OR (Sharpe_retnet#94085635 = null)) THEN null ELSE cast(Sharpe_retnet#94085635 as float) END AS Sharpe_retnet#94085803, CASE WHEN ((PctPos_retnet#94085636 = NA) OR (PctPos_retnet#94085636 = null)) THEN null ELSE cast(PctPos_retnet#94085636 as float) END AS PctPos_retnet#94085837, CASE WHEN ((TR_retnet#94085637 = NA) OR (TR_retnet#94085637 = null)) THEN null ELSE cast(TR_retnet#94085637 as float) END AS TR_retnet#94085838, CASE WHEN ((IR_retnet#94085638 = NA) OR (IR_retnet#94085638 = null)) THEN null ELSE cast(IR_retnet#94085638 as float) END AS IR_retnet#94085839, CASE WHEN ((turnover#94085639 = NA) OR (turnover#94085639 = null)) THEN null ELSE cast(turnover#94085639 as float) END AS turnover#94085840] +- FileScan csv [year#94085615,retIC#94085616,resretIC#94085617,numcos#94085618,numdates#94085619,annual_bmret#94085620,annual_ret#94085621,std_ret#94085622,Sharpe_ret#94085623,PctPos_ret#94085624,TR_ret#94085625,IR_ret#94085626,annual_resret#94085627,std_resret#94085628,Sharpe_resret#94085629,PctPos_resret#94085630,TR_resret#94085631,IR_resret#94085632,annual_retnet#94085633,std_retnet#94085634,Sharpe_retnet#94085635,PctPos_retnet#94085636,TR_retnet#94085637,IR_retnet#94085638,turnover#94085639] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/volatilit..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94085615, retIC#94085616, resretIC#94085617, numcos#94085618, numdates#94085619, annual_bmret#94085620, annual_ret#94085621, std_ret#94085622, Sharpe_ret#94085623, PctPos_ret#94085624, TR_ret#94085625, IR_ret#94085626, annual_resret#94085627, std_resret#94085628, Sharpe_resret#94085629, PctPos_resret#94085630, TR_resret#94085631, IR_resret#94085632, annual_retnet#94085633, std_retnet#94085634, Sharpe_retnet#94085635, PctPos_retnet#94085636, TR_retnet#94085637, IR_retnet#94085638, turnover#94085639] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/volatility/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94085615 = NA) OR (year#94085615 = null)) THEN null ELSE cast(year#94085615 as int) END AS year#94085717, CASE WHEN ((retIC#94085616 = NA) OR (retIC#94085616 = null)) THEN null ELSE cast(retIC#94085616 as float) END AS retIC#94085718, CASE WHEN ((resretIC#94085617 = NA) OR (resretIC#94085617 = null)) THEN null ELSE cast(resretIC#94085617 as float) END AS resretIC#94085719, CASE WHEN ((numcos#94085618 = NA) OR (numcos#94085618 = null)) THEN null ELSE cast(numcos#94085618 as float) END AS numcos#94085720, CASE WHEN ((numdates#94085619 = NA) OR (numdates#94085619 = null)) THEN null ELSE cast(numdates#94085619 as int) END AS numdates#94085721, CASE WHEN ((annual_bmret#94085620 = NA) OR (annual_bmret#94085620 = null)) THEN null ELSE cast(annual_bmret#94085620 as float) END AS annual_bmret#94085722, CASE WHEN ((annual_ret#94085621 = NA) OR (annual_ret#94085621 = null)) THEN null ELSE cast(annual_ret#94085621 as float) END AS annual_ret#94085723, CASE WHEN ((std_ret#94085622 = NA) OR (std_ret#94085622 = null)) THEN null ELSE cast(std_ret#94085622 as float) END AS std_ret#94085724, CASE WHEN ((Sharpe_ret#94085623 = NA) OR (Sharpe_ret#94085623 = null)) THEN null ELSE cast(Sharpe_ret#94085623 as float) END AS Sharpe_ret#94085725, CASE WHEN ((PctPos_ret#94085624 = NA) OR (PctPos_ret#94085624 = null)) THEN null ELSE cast(PctPos_ret#94085624 as float) END AS PctPos_ret#94085726, CASE WHEN ((TR_ret#94085625 = NA) OR (TR_ret#94085625 = null)) THEN null ELSE cast(TR_ret#94085625 as float) END AS TR_ret#94085760, CASE WHEN ((IR_ret#94085626 = NA) OR (IR_ret#94085626 = null)) THEN null ELSE cast(IR_ret#94085626 as float) END AS IR_ret#94085761, CASE WHEN ((annual_resret#94085627 = NA) OR (annual_resret#94085627 = null)) THEN null ELSE cast(annual_resret#94085627 as float) END AS annual_resret#94085762, CASE WHEN ((std_resret#94085628 = NA) OR (std_resret#94085628 = null)) THEN null ELSE cast(std_resret#94085628 as float) END AS std_resret#94085796, CASE WHEN ((Sharpe_resret#94085629 = NA) OR (Sharpe_resret#94085629 = null)) THEN null ELSE cast(Sharpe_resret#94085629 as float) END AS Sharpe_resret#94085797, CASE WHEN ((PctPos_resret#94085630 = NA) OR (PctPos_resret#94085630 = null)) THEN null ELSE cast(PctPos_resret#94085630 as float) END AS PctPos_resret#94085798, CASE WHEN ((TR_resret#94085631 = NA) OR (TR_resret#94085631 = null)) THEN null ELSE cast(TR_resret#94085631 as float) END AS TR_resret#94085799, CASE WHEN ((IR_resret#94085632 = NA) OR (IR_resret#94085632 = null)) THEN null ELSE cast(IR_resret#94085632 as float) END AS IR_resret#94085800, CASE WHEN ((annual_retnet#94085633 = NA) OR (annual_retnet#94085633 = null)) THEN null ELSE cast(annual_retnet#94085633 as float) END AS annual_retnet#94085801, CASE WHEN ((std_retnet#94085634 = NA) OR (std_retnet#94085634 = null)) THEN null ELSE cast(std_retnet#94085634 as float) END AS std_retnet#94085802, CASE WHEN ((Sharpe_retnet#94085635 = NA) OR (Sharpe_retnet#94085635 = null)) THEN null ELSE cast(Sharpe_retnet#94085635 as float) END AS Sharpe_retnet#94085803, CASE WHEN ((PctPos_retnet#94085636 = NA) OR (PctPos_retnet#94085636 = null)) THEN null ELSE cast(PctPos_retnet#94085636 as float) END AS PctPos_retnet#94085837, CASE WHEN ((TR_retnet#94085637 = NA) OR (TR_retnet#94085637 = null)) THEN null ELSE cast(TR_retnet#94085637 as float) END AS TR_retnet#94085838, CASE WHEN ((IR_retnet#94085638 = NA) OR (IR_retnet#94085638 = null)) THEN null ELSE cast(IR_retnet#94085638 as float) END AS IR_retnet#94085839, CASE WHEN ((turnover#94085639 = NA) OR (turnover#94085639 = null)) THEN null ELSE cast(turnover#94085639 as float) END AS turnover#94085840] Input [25]: [year#94085615, retIC#94085616, resretIC#94085617, numcos#94085618, numdates#94085619, annual_bmret#94085620, annual_ret#94085621, std_ret#94085622, Sharpe_ret#94085623, PctPos_ret#94085624, TR_ret#94085625, IR_ret#94085626, annual_resret#94085627, std_resret#94085628, Sharpe_resret#94085629, PctPos_resret#94085630, TR_resret#94085631, IR_resret#94085632, annual_retnet#94085633, std_retnet#94085634, Sharpe_retnet#94085635, PctPos_retnet#94085636, TR_retnet#94085637, IR_retnet#94085638, turnover#94085639] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94085840, year#94085717] (8) Filter [codegen id : 1] Input [2]: [turnover#94085840, year#94085717] Condition : isnotnull(turnover#94085840) (9) Project [codegen id : 1] Output [3]: [year#94085717, turnover#94085840, (1.0 / cast(turnover#94085840 as double)) AS days_hold#94085908] Input [2]: [turnover#94085840, year#94085717] (10) Exchange Input [3]: [year#94085717, turnover#94085840, days_hold#94085908] Arguments: rangepartitioning(year#94085717 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7511948] (11) Sort [codegen id : 2] Input [3]: [year#94085717, turnover#94085840, days_hold#94085908] Arguments: [year#94085717 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94085717, turnover#94085840, days_hold#94085908] (13) CollectLimit Input [3]: [year#94085717, turnover#94085840, days_hold#94085908] Arguments: 1000000