== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94342706, turnover#94342830, days_hold#94342908] Arguments: [year#94342706, turnover#94342830, days_hold#94342908] (2) InMemoryRelation Arguments: [year#94342706, turnover#94342830, days_hold#94342908], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94342706 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94342706 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7532482] +- *(1) Project [year#94342706, turnover#94342830, (1.0 / cast(turnover#94342830 as double)) AS days_hold#94342908] +- *(1) Filter isnotnull(turnover#94342830) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94342830, year#94342706], [isnotnull(turnover#94342830)] +- InMemoryRelation [year#94342706, retIC#94342707, resretIC#94342708, numcos#94342709, numdates#94342710, annual_bmret#94342711, annual_ret#94342712, std_ret#94342713, Sharpe_ret#94342715, PctPos_ret#94342716, TR_ret#94342717, IR_ret#94342718, annual_resret#94342719, std_resret#94342720, Sharpe_resret#94342754, PctPos_resret#94342755, TR_resret#94342789, IR_resret#94342790, annual_retnet#94342791, std_retnet#94342792, Sharpe_retnet#94342793, PctPos_retnet#94342794, TR_retnet#94342828, IR_retnet#94342829, turnover#94342830], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94342605 = NA) OR (year#94342605 = null)) THEN null ELSE cast(year#94342605 as float) END AS year#94342706, CASE WHEN ((retIC#94342606 = NA) OR (retIC#94342606 = null)) THEN null ELSE cast(retIC#94342606 as float) END AS retIC#94342707, CASE WHEN ((resretIC#94342607 = NA) OR (resretIC#94342607 = null)) THEN null ELSE cast(resretIC#94342607 as float) END AS resretIC#94342708, CASE WHEN ((numcos#94342608 = NA) OR (numcos#94342608 = null)) THEN null ELSE cast(numcos#94342608 as float) END AS numcos#94342709, CASE WHEN ((numdates#94342609 = NA) OR (numdates#94342609 = null)) THEN null ELSE cast(numdates#94342609 as int) END AS numdates#94342710, CASE WHEN ((annual_bmret#94342610 = NA) OR (annual_bmret#94342610 = null)) THEN null ELSE cast(annual_bmret#94342610 as float) END AS annual_bmret#94342711, CASE WHEN ((annual_ret#94342611 = NA) OR (annual_ret#94342611 = null)) THEN null ELSE cast(annual_ret#94342611 as float) END AS annual_ret#94342712, CASE WHEN ((std_ret#94342612 = NA) OR (std_ret#94342612 = null)) THEN null ELSE cast(std_ret#94342612 as float) END AS std_ret#94342713, CASE WHEN ((Sharpe_ret#94342613 = NA) OR (Sharpe_ret#94342613 = null)) THEN null ELSE cast(Sharpe_ret#94342613 as float) END AS Sharpe_ret#94342715, CASE WHEN ((PctPos_ret#94342614 = NA) OR (PctPos_ret#94342614 = null)) THEN null ELSE cast(PctPos_ret#94342614 as float) END AS PctPos_ret#94342716, CASE WHEN ((TR_ret#94342615 = NA) OR (TR_ret#94342615 = null)) THEN null ELSE cast(TR_ret#94342615 as float) END AS TR_ret#94342717, CASE WHEN ((IR_ret#94342616 = NA) OR (IR_ret#94342616 = null)) THEN null ELSE cast(IR_ret#94342616 as float) END AS IR_ret#94342718, CASE WHEN ((annual_resret#94342617 = NA) OR (annual_resret#94342617 = null)) THEN null ELSE cast(annual_resret#94342617 as float) END AS annual_resret#94342719, CASE WHEN ((std_resret#94342618 = NA) OR (std_resret#94342618 = null)) THEN null ELSE cast(std_resret#94342618 as float) END AS std_resret#94342720, CASE WHEN ((Sharpe_resret#94342619 = NA) OR (Sharpe_resret#94342619 = null)) THEN null ELSE cast(Sharpe_resret#94342619 as float) END AS Sharpe_resret#94342754, CASE WHEN ((PctPos_resret#94342620 = NA) OR (PctPos_resret#94342620 = null)) THEN null ELSE cast(PctPos_resret#94342620 as float) END AS PctPos_resret#94342755, CASE WHEN ((TR_resret#94342621 = NA) OR (TR_resret#94342621 = null)) THEN null ELSE cast(TR_resret#94342621 as float) END AS TR_resret#94342789, CASE WHEN ((IR_resret#94342622 = NA) OR (IR_resret#94342622 = null)) THEN null ELSE cast(IR_resret#94342622 as float) END AS IR_resret#94342790, CASE WHEN ((annual_retnet#94342623 = NA) OR (annual_retnet#94342623 = null)) THEN null ELSE cast(annual_retnet#94342623 as float) END AS annual_retnet#94342791, CASE WHEN ((std_retnet#94342624 = NA) OR (std_retnet#94342624 = null)) THEN null ELSE cast(std_retnet#94342624 as float) END AS std_retnet#94342792, CASE WHEN ((Sharpe_retnet#94342625 = NA) OR (Sharpe_retnet#94342625 = null)) THEN null ELSE cast(Sharpe_retnet#94342625 as float) END AS Sharpe_retnet#94342793, CASE WHEN ((PctPos_retnet#94342626 = NA) OR (PctPos_retnet#94342626 = null)) THEN null ELSE cast(PctPos_retnet#94342626 as float) END AS PctPos_retnet#94342794, CASE WHEN ((TR_retnet#94342627 = NA) OR (TR_retnet#94342627 = null)) THEN null ELSE cast(TR_retnet#94342627 as float) END AS TR_retnet#94342828, CASE WHEN ((IR_retnet#94342628 = NA) OR (IR_retnet#94342628 = null)) THEN null ELSE cast(IR_retnet#94342628 as float) END AS IR_retnet#94342829, CASE WHEN ((turnover#94342629 = NA) OR (turnover#94342629 = null)) THEN null ELSE cast(turnover#94342629 as float) END AS turnover#94342830] +- FileScan csv [year#94342605,retIC#94342606,resretIC#94342607,numcos#94342608,numdates#94342609,annual_bmret#94342610,annual_ret#94342611,std_ret#94342612,Sharpe_ret#94342613,PctPos_ret#94342614,TR_ret#94342615,IR_ret#94342616,annual_resret#94342617,std_resret#94342618,Sharpe_resret#94342619,PctPos_resret#94342620,TR_resret#94342621,IR_resret#94342622,annual_retnet#94342623,std_retnet#94342624,Sharpe_retnet#94342625,PctPos_retnet#94342626,TR_retnet#94342627,IR_retnet#94342628,turnover#94342629] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/estimize_signal_histor..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94342706 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94342830, year#94342706] Arguments: [turnover#94342830, year#94342706], [isnotnull(turnover#94342830)] (4) InMemoryRelation Arguments: [year#94342706, retIC#94342707, resretIC#94342708, numcos#94342709, numdates#94342710, annual_bmret#94342711, annual_ret#94342712, std_ret#94342713, Sharpe_ret#94342715, PctPos_ret#94342716, TR_ret#94342717, IR_ret#94342718, annual_resret#94342719, std_resret#94342720, Sharpe_resret#94342754, PctPos_resret#94342755, TR_resret#94342789, IR_resret#94342790, annual_retnet#94342791, std_retnet#94342792, Sharpe_retnet#94342793, PctPos_retnet#94342794, TR_retnet#94342828, IR_retnet#94342829, turnover#94342830], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94342605 = NA) OR (year#94342605 = null)) THEN null ELSE cast(year#94342605 as float) END AS year#94342706, CASE WHEN ((retIC#94342606 = NA) OR (retIC#94342606 = null)) THEN null ELSE cast(retIC#94342606 as float) END AS retIC#94342707, CASE WHEN ((resretIC#94342607 = NA) OR (resretIC#94342607 = null)) THEN null ELSE cast(resretIC#94342607 as float) END AS resretIC#94342708, CASE WHEN ((numcos#94342608 = NA) OR (numcos#94342608 = null)) THEN null ELSE cast(numcos#94342608 as float) END AS numcos#94342709, CASE WHEN ((numdates#94342609 = NA) OR (numdates#94342609 = null)) THEN null ELSE cast(numdates#94342609 as int) END AS numdates#94342710, CASE WHEN ((annual_bmret#94342610 = NA) OR (annual_bmret#94342610 = null)) THEN null ELSE cast(annual_bmret#94342610 as float) END AS annual_bmret#94342711, CASE WHEN ((annual_ret#94342611 = NA) OR (annual_ret#94342611 = null)) THEN null ELSE cast(annual_ret#94342611 as float) END AS annual_ret#94342712, CASE WHEN ((std_ret#94342612 = NA) OR (std_ret#94342612 = null)) THEN null ELSE cast(std_ret#94342612 as float) END AS std_ret#94342713, CASE WHEN ((Sharpe_ret#94342613 = NA) OR (Sharpe_ret#94342613 = null)) THEN null ELSE cast(Sharpe_ret#94342613 as float) END AS Sharpe_ret#94342715, CASE WHEN ((PctPos_ret#94342614 = NA) OR (PctPos_ret#94342614 = null)) THEN null ELSE cast(PctPos_ret#94342614 as float) END AS PctPos_ret#94342716, CASE WHEN ((TR_ret#94342615 = NA) OR (TR_ret#94342615 = null)) THEN null ELSE cast(TR_ret#94342615 as float) END AS TR_ret#94342717, CASE WHEN ((IR_ret#94342616 = NA) OR (IR_ret#94342616 = null)) THEN null ELSE cast(IR_ret#94342616 as float) END AS IR_ret#94342718, CASE WHEN ((annual_resret#94342617 = NA) OR (annual_resret#94342617 = null)) THEN null ELSE cast(annual_resret#94342617 as float) END AS annual_resret#94342719, CASE WHEN ((std_resret#94342618 = NA) OR (std_resret#94342618 = null)) THEN null ELSE cast(std_resret#94342618 as float) END AS std_resret#94342720, CASE WHEN ((Sharpe_resret#94342619 = NA) OR (Sharpe_resret#94342619 = null)) THEN null ELSE cast(Sharpe_resret#94342619 as float) END AS Sharpe_resret#94342754, CASE WHEN ((PctPos_resret#94342620 = NA) OR (PctPos_resret#94342620 = null)) THEN null ELSE cast(PctPos_resret#94342620 as float) END AS PctPos_resret#94342755, CASE WHEN ((TR_resret#94342621 = NA) OR (TR_resret#94342621 = null)) THEN null ELSE cast(TR_resret#94342621 as float) END AS TR_resret#94342789, CASE WHEN ((IR_resret#94342622 = NA) OR (IR_resret#94342622 = null)) THEN null ELSE cast(IR_resret#94342622 as float) END AS IR_resret#94342790, CASE WHEN ((annual_retnet#94342623 = NA) OR (annual_retnet#94342623 = null)) THEN null ELSE cast(annual_retnet#94342623 as float) END AS annual_retnet#94342791, CASE WHEN ((std_retnet#94342624 = NA) OR (std_retnet#94342624 = null)) THEN null ELSE cast(std_retnet#94342624 as float) END AS std_retnet#94342792, CASE WHEN ((Sharpe_retnet#94342625 = NA) OR (Sharpe_retnet#94342625 = null)) THEN null ELSE cast(Sharpe_retnet#94342625 as float) END AS Sharpe_retnet#94342793, CASE WHEN ((PctPos_retnet#94342626 = NA) OR (PctPos_retnet#94342626 = null)) THEN null ELSE cast(PctPos_retnet#94342626 as float) END AS PctPos_retnet#94342794, CASE WHEN ((TR_retnet#94342627 = NA) OR (TR_retnet#94342627 = null)) THEN null ELSE cast(TR_retnet#94342627 as float) END AS TR_retnet#94342828, CASE WHEN ((IR_retnet#94342628 = NA) OR (IR_retnet#94342628 = null)) THEN null ELSE cast(IR_retnet#94342628 as float) END AS IR_retnet#94342829, CASE WHEN ((turnover#94342629 = NA) OR (turnover#94342629 = null)) THEN null ELSE cast(turnover#94342629 as float) END AS turnover#94342830] +- FileScan csv [year#94342605,retIC#94342606,resretIC#94342607,numcos#94342608,numdates#94342609,annual_bmret#94342610,annual_ret#94342611,std_ret#94342612,Sharpe_ret#94342613,PctPos_ret#94342614,TR_ret#94342615,IR_ret#94342616,annual_resret#94342617,std_resret#94342618,Sharpe_resret#94342619,PctPos_resret#94342620,TR_resret#94342621,IR_resret#94342622,annual_retnet#94342623,std_retnet#94342624,Sharpe_retnet#94342625,PctPos_retnet#94342626,TR_retnet#94342627,IR_retnet#94342628,turnover#94342629] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/estimize_signal_histor..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94342605, retIC#94342606, resretIC#94342607, numcos#94342608, numdates#94342609, annual_bmret#94342610, annual_ret#94342611, std_ret#94342612, Sharpe_ret#94342613, PctPos_ret#94342614, TR_ret#94342615, IR_ret#94342616, annual_resret#94342617, std_resret#94342618, Sharpe_resret#94342619, PctPos_resret#94342620, TR_resret#94342621, IR_resret#94342622, annual_retnet#94342623, std_retnet#94342624, Sharpe_retnet#94342625, PctPos_retnet#94342626, TR_retnet#94342627, IR_retnet#94342628, turnover#94342629] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/estimize_signal_history/estimizesignal_preearnings/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94342605 = NA) OR (year#94342605 = null)) THEN null ELSE cast(year#94342605 as float) END AS year#94342706, CASE WHEN ((retIC#94342606 = NA) OR (retIC#94342606 = null)) THEN null ELSE cast(retIC#94342606 as float) END AS retIC#94342707, CASE WHEN ((resretIC#94342607 = NA) OR (resretIC#94342607 = null)) THEN null ELSE cast(resretIC#94342607 as float) END AS resretIC#94342708, CASE WHEN ((numcos#94342608 = NA) OR (numcos#94342608 = null)) THEN null ELSE cast(numcos#94342608 as float) END AS numcos#94342709, CASE WHEN ((numdates#94342609 = NA) OR (numdates#94342609 = null)) THEN null ELSE cast(numdates#94342609 as int) END AS numdates#94342710, CASE WHEN ((annual_bmret#94342610 = NA) OR (annual_bmret#94342610 = null)) THEN null ELSE cast(annual_bmret#94342610 as float) END AS annual_bmret#94342711, CASE WHEN ((annual_ret#94342611 = NA) OR (annual_ret#94342611 = null)) THEN null ELSE cast(annual_ret#94342611 as float) END AS annual_ret#94342712, CASE WHEN ((std_ret#94342612 = NA) OR (std_ret#94342612 = null)) THEN null ELSE cast(std_ret#94342612 as float) END AS std_ret#94342713, CASE WHEN ((Sharpe_ret#94342613 = NA) OR (Sharpe_ret#94342613 = null)) THEN null ELSE cast(Sharpe_ret#94342613 as float) END AS Sharpe_ret#94342715, CASE WHEN ((PctPos_ret#94342614 = NA) OR (PctPos_ret#94342614 = null)) THEN null ELSE cast(PctPos_ret#94342614 as float) END AS PctPos_ret#94342716, CASE WHEN ((TR_ret#94342615 = NA) OR (TR_ret#94342615 = null)) THEN null ELSE cast(TR_ret#94342615 as float) END AS TR_ret#94342717, CASE WHEN ((IR_ret#94342616 = NA) OR (IR_ret#94342616 = null)) THEN null ELSE cast(IR_ret#94342616 as float) END AS IR_ret#94342718, CASE WHEN ((annual_resret#94342617 = NA) OR (annual_resret#94342617 = null)) THEN null ELSE cast(annual_resret#94342617 as float) END AS annual_resret#94342719, CASE WHEN ((std_resret#94342618 = NA) OR (std_resret#94342618 = null)) THEN null ELSE cast(std_resret#94342618 as float) END AS std_resret#94342720, CASE WHEN ((Sharpe_resret#94342619 = NA) OR (Sharpe_resret#94342619 = null)) THEN null ELSE cast(Sharpe_resret#94342619 as float) END AS Sharpe_resret#94342754, CASE WHEN ((PctPos_resret#94342620 = NA) OR (PctPos_resret#94342620 = null)) THEN null ELSE cast(PctPos_resret#94342620 as float) END AS PctPos_resret#94342755, CASE WHEN ((TR_resret#94342621 = NA) OR (TR_resret#94342621 = null)) THEN null ELSE cast(TR_resret#94342621 as float) END AS TR_resret#94342789, CASE WHEN ((IR_resret#94342622 = NA) OR (IR_resret#94342622 = null)) THEN null ELSE cast(IR_resret#94342622 as float) END AS IR_resret#94342790, CASE WHEN ((annual_retnet#94342623 = NA) OR (annual_retnet#94342623 = null)) THEN null ELSE cast(annual_retnet#94342623 as float) END AS annual_retnet#94342791, CASE WHEN ((std_retnet#94342624 = NA) OR (std_retnet#94342624 = null)) THEN null ELSE cast(std_retnet#94342624 as float) END AS std_retnet#94342792, CASE WHEN ((Sharpe_retnet#94342625 = NA) OR (Sharpe_retnet#94342625 = null)) THEN null ELSE cast(Sharpe_retnet#94342625 as float) END AS Sharpe_retnet#94342793, CASE WHEN ((PctPos_retnet#94342626 = NA) OR (PctPos_retnet#94342626 = null)) THEN null ELSE cast(PctPos_retnet#94342626 as float) END AS PctPos_retnet#94342794, CASE WHEN ((TR_retnet#94342627 = NA) OR (TR_retnet#94342627 = null)) THEN null ELSE cast(TR_retnet#94342627 as float) END AS TR_retnet#94342828, CASE WHEN ((IR_retnet#94342628 = NA) OR (IR_retnet#94342628 = null)) THEN null ELSE cast(IR_retnet#94342628 as float) END AS IR_retnet#94342829, CASE WHEN ((turnover#94342629 = NA) OR (turnover#94342629 = null)) THEN null ELSE cast(turnover#94342629 as float) END AS turnover#94342830] Input [25]: [year#94342605, retIC#94342606, resretIC#94342607, numcos#94342608, numdates#94342609, annual_bmret#94342610, annual_ret#94342611, std_ret#94342612, Sharpe_ret#94342613, PctPos_ret#94342614, TR_ret#94342615, IR_ret#94342616, annual_resret#94342617, std_resret#94342618, Sharpe_resret#94342619, PctPos_resret#94342620, TR_resret#94342621, IR_resret#94342622, annual_retnet#94342623, std_retnet#94342624, Sharpe_retnet#94342625, PctPos_retnet#94342626, TR_retnet#94342627, IR_retnet#94342628, turnover#94342629] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94342830, year#94342706] (8) Filter [codegen id : 1] Input [2]: [turnover#94342830, year#94342706] Condition : isnotnull(turnover#94342830) (9) Project [codegen id : 1] Output [3]: [year#94342706, turnover#94342830, (1.0 / cast(turnover#94342830 as double)) AS days_hold#94342908] Input [2]: [turnover#94342830, year#94342706] (10) Exchange Input [3]: [year#94342706, turnover#94342830, days_hold#94342908] Arguments: rangepartitioning(year#94342706 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7532482] (11) Sort [codegen id : 2] Input [3]: [year#94342706, turnover#94342830, days_hold#94342908] Arguments: [year#94342706 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94342706, turnover#94342830, days_hold#94342908] (13) CollectLimit Input [3]: [year#94342706, turnover#94342830, days_hold#94342908] Arguments: 1000000