== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94313935, turnover#94313982, days_hold#94314018] Arguments: [year#94313935, turnover#94313982, days_hold#94314018] (2) InMemoryRelation Arguments: [year#94313935, turnover#94313982, days_hold#94314018], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94313935 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94313935 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7530190] +- *(1) Project [year#94313935, turnover#94313982, (1.0 / cast(turnover#94313982 as double)) AS days_hold#94314018] +- *(1) Filter isnotnull(turnover#94313982) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94313982, year#94313935], [isnotnull(turnover#94313982)] +- InMemoryRelation [year#94313935, retIC#94313936, resretIC#94313939, numcos#94313941, numdates#94313943, annual_bmret#94313945, annual_ret#94313947, std_ret#94313948, Sharpe_ret#94313951, PctPos_ret#94313953, TR_ret#94313955, IR_ret#94313957, annual_resret#94313958, std_resret#94313960, Sharpe_resret#94313962, PctPos_resret#94313965, TR_resret#94313967, IR_resret#94313968, annual_retnet#94313970, std_retnet#94313972, Sharpe_retnet#94313975, PctPos_retnet#94313977, TR_retnet#94313978, IR_retnet#94313980, turnover#94313982], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94313703 = NA) OR (year#94313703 = null)) THEN null ELSE cast(year#94313703 as float) END AS year#94313935, CASE WHEN ((retIC#94313706 = NA) OR (retIC#94313706 = null)) THEN null ELSE cast(retIC#94313706 as float) END AS retIC#94313936, CASE WHEN ((resretIC#94313708 = NA) OR (resretIC#94313708 = null)) THEN null ELSE cast(resretIC#94313708 as float) END AS resretIC#94313939, CASE WHEN ((numcos#94313710 = NA) OR (numcos#94313710 = null)) THEN null ELSE cast(numcos#94313710 as float) END AS numcos#94313941, CASE WHEN ((numdates#94313712 = NA) OR (numdates#94313712 = null)) THEN null ELSE cast(numdates#94313712 as int) END AS numdates#94313943, CASE WHEN ((annual_bmret#94313713 = NA) OR (annual_bmret#94313713 = null)) THEN null ELSE cast(annual_bmret#94313713 as float) END AS annual_bmret#94313945, CASE WHEN ((annual_ret#94313715 = NA) OR (annual_ret#94313715 = null)) THEN null ELSE cast(annual_ret#94313715 as float) END AS annual_ret#94313947, CASE WHEN ((std_ret#94313717 = NA) OR (std_ret#94313717 = null)) THEN null ELSE cast(std_ret#94313717 as float) END AS std_ret#94313948, CASE WHEN ((Sharpe_ret#94313719 = NA) OR (Sharpe_ret#94313719 = null)) THEN null ELSE cast(Sharpe_ret#94313719 as float) END AS Sharpe_ret#94313951, CASE WHEN ((PctPos_ret#94313721 = NA) OR (PctPos_ret#94313721 = null)) THEN null ELSE cast(PctPos_ret#94313721 as float) END AS PctPos_ret#94313953, CASE WHEN ((TR_ret#94313723 = NA) OR (TR_ret#94313723 = null)) THEN null ELSE cast(TR_ret#94313723 as float) END AS TR_ret#94313955, CASE WHEN ((IR_ret#94313725 = NA) OR (IR_ret#94313725 = null)) THEN null ELSE cast(IR_ret#94313725 as float) END AS IR_ret#94313957, CASE WHEN ((annual_resret#94313727 = NA) OR (annual_resret#94313727 = null)) THEN null ELSE cast(annual_resret#94313727 as float) END AS annual_resret#94313958, CASE WHEN ((std_resret#94313729 = NA) OR (std_resret#94313729 = null)) THEN null ELSE cast(std_resret#94313729 as float) END AS std_resret#94313960, CASE WHEN ((Sharpe_resret#94313731 = NA) OR (Sharpe_resret#94313731 = null)) THEN null ELSE cast(Sharpe_resret#94313731 as float) END AS Sharpe_resret#94313962, CASE WHEN ((PctPos_resret#94313733 = NA) OR (PctPos_resret#94313733 = null)) THEN null ELSE cast(PctPos_resret#94313733 as float) END AS PctPos_resret#94313965, CASE WHEN ((TR_resret#94313735 = NA) OR (TR_resret#94313735 = null)) THEN null ELSE cast(TR_resret#94313735 as float) END AS TR_resret#94313967, CASE WHEN ((IR_resret#94313737 = NA) OR (IR_resret#94313737 = null)) THEN null ELSE cast(IR_resret#94313737 as float) END AS IR_resret#94313968, CASE WHEN ((annual_retnet#94313739 = NA) OR (annual_retnet#94313739 = null)) THEN null ELSE cast(annual_retnet#94313739 as float) END AS annual_retnet#94313970, CASE WHEN ((std_retnet#94313741 = NA) OR (std_retnet#94313741 = null)) THEN null ELSE cast(std_retnet#94313741 as float) END AS std_retnet#94313972, CASE WHEN ((Sharpe_retnet#94313743 = NA) OR (Sharpe_retnet#94313743 = null)) THEN null ELSE cast(Sharpe_retnet#94313743 as float) END AS Sharpe_retnet#94313975, CASE WHEN ((PctPos_retnet#94313745 = NA) OR (PctPos_retnet#94313745 = null)) THEN null ELSE cast(PctPos_retnet#94313745 as float) END AS PctPos_retnet#94313977, CASE WHEN ((TR_retnet#94313747 = NA) OR (TR_retnet#94313747 = null)) THEN null ELSE cast(TR_retnet#94313747 as float) END AS TR_retnet#94313978, CASE WHEN ((IR_retnet#94313749 = NA) OR (IR_retnet#94313749 = null)) THEN null ELSE cast(IR_retnet#94313749 as float) END AS IR_retnet#94313980, CASE WHEN ((turnover#94313751 = NA) OR (turnover#94313751 = null)) THEN null ELSE cast(turnover#94313751 as float) END AS turnover#94313982] +- FileScan csv [year#94313703,retIC#94313706,resretIC#94313708,numcos#94313710,numdates#94313712,annual_bmret#94313713,annual_ret#94313715,std_ret#94313717,Sharpe_ret#94313719,PctPos_ret#94313721,TR_ret#94313723,IR_ret#94313725,annual_resret#94313727,std_resret#94313729,Sharpe_resret#94313731,PctPos_resret#94313733,TR_resret#94313735,IR_resret#94313737,annual_retnet#94313739,std_retnet#94313741,Sharpe_retnet#94313743,PctPos_retnet#94313745,TR_retnet#94313747,IR_retnet#94313749,turnover#94313751] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/estimize_signal_histor..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94313935 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94313982, year#94313935] Arguments: [turnover#94313982, year#94313935], [isnotnull(turnover#94313982)] (4) InMemoryRelation Arguments: [year#94313935, retIC#94313936, resretIC#94313939, numcos#94313941, numdates#94313943, annual_bmret#94313945, annual_ret#94313947, std_ret#94313948, Sharpe_ret#94313951, PctPos_ret#94313953, TR_ret#94313955, IR_ret#94313957, annual_resret#94313958, std_resret#94313960, Sharpe_resret#94313962, PctPos_resret#94313965, TR_resret#94313967, IR_resret#94313968, annual_retnet#94313970, std_retnet#94313972, Sharpe_retnet#94313975, PctPos_retnet#94313977, TR_retnet#94313978, IR_retnet#94313980, turnover#94313982], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94313703 = NA) OR (year#94313703 = null)) THEN null ELSE cast(year#94313703 as float) END AS year#94313935, CASE WHEN ((retIC#94313706 = NA) OR (retIC#94313706 = null)) THEN null ELSE cast(retIC#94313706 as float) END AS retIC#94313936, CASE WHEN ((resretIC#94313708 = NA) OR (resretIC#94313708 = null)) THEN null ELSE cast(resretIC#94313708 as float) END AS resretIC#94313939, CASE WHEN ((numcos#94313710 = NA) OR (numcos#94313710 = null)) THEN null ELSE cast(numcos#94313710 as float) END AS numcos#94313941, CASE WHEN ((numdates#94313712 = NA) OR (numdates#94313712 = null)) THEN null ELSE cast(numdates#94313712 as int) END AS numdates#94313943, CASE WHEN ((annual_bmret#94313713 = NA) OR (annual_bmret#94313713 = null)) THEN null ELSE cast(annual_bmret#94313713 as float) END AS annual_bmret#94313945, CASE WHEN ((annual_ret#94313715 = NA) OR (annual_ret#94313715 = null)) THEN null ELSE cast(annual_ret#94313715 as float) END AS annual_ret#94313947, CASE WHEN ((std_ret#94313717 = NA) OR (std_ret#94313717 = null)) THEN null ELSE cast(std_ret#94313717 as float) END AS std_ret#94313948, CASE WHEN ((Sharpe_ret#94313719 = NA) OR (Sharpe_ret#94313719 = null)) THEN null ELSE cast(Sharpe_ret#94313719 as float) END AS Sharpe_ret#94313951, CASE WHEN ((PctPos_ret#94313721 = NA) OR (PctPos_ret#94313721 = null)) THEN null ELSE cast(PctPos_ret#94313721 as float) END AS PctPos_ret#94313953, CASE WHEN ((TR_ret#94313723 = NA) OR (TR_ret#94313723 = null)) THEN null ELSE cast(TR_ret#94313723 as float) END AS TR_ret#94313955, CASE WHEN ((IR_ret#94313725 = NA) OR (IR_ret#94313725 = null)) THEN null ELSE cast(IR_ret#94313725 as float) END AS IR_ret#94313957, CASE WHEN ((annual_resret#94313727 = NA) OR (annual_resret#94313727 = null)) THEN null ELSE cast(annual_resret#94313727 as float) END AS annual_resret#94313958, CASE WHEN ((std_resret#94313729 = NA) OR (std_resret#94313729 = null)) THEN null ELSE cast(std_resret#94313729 as float) END AS std_resret#94313960, CASE WHEN ((Sharpe_resret#94313731 = NA) OR (Sharpe_resret#94313731 = null)) THEN null ELSE cast(Sharpe_resret#94313731 as float) END AS Sharpe_resret#94313962, CASE WHEN ((PctPos_resret#94313733 = NA) OR (PctPos_resret#94313733 = null)) THEN null ELSE cast(PctPos_resret#94313733 as float) END AS PctPos_resret#94313965, CASE WHEN ((TR_resret#94313735 = NA) OR (TR_resret#94313735 = null)) THEN null ELSE cast(TR_resret#94313735 as float) END AS TR_resret#94313967, CASE WHEN ((IR_resret#94313737 = NA) OR (IR_resret#94313737 = null)) THEN null ELSE cast(IR_resret#94313737 as float) END AS IR_resret#94313968, CASE WHEN ((annual_retnet#94313739 = NA) OR (annual_retnet#94313739 = null)) THEN null ELSE cast(annual_retnet#94313739 as float) END AS annual_retnet#94313970, CASE WHEN ((std_retnet#94313741 = NA) OR (std_retnet#94313741 = null)) THEN null ELSE cast(std_retnet#94313741 as float) END AS std_retnet#94313972, CASE WHEN ((Sharpe_retnet#94313743 = NA) OR (Sharpe_retnet#94313743 = null)) THEN null ELSE cast(Sharpe_retnet#94313743 as float) END AS Sharpe_retnet#94313975, CASE WHEN ((PctPos_retnet#94313745 = NA) OR (PctPos_retnet#94313745 = null)) THEN null ELSE cast(PctPos_retnet#94313745 as float) END AS PctPos_retnet#94313977, CASE WHEN ((TR_retnet#94313747 = NA) OR (TR_retnet#94313747 = null)) THEN null ELSE cast(TR_retnet#94313747 as float) END AS TR_retnet#94313978, CASE WHEN ((IR_retnet#94313749 = NA) OR (IR_retnet#94313749 = null)) THEN null ELSE cast(IR_retnet#94313749 as float) END AS IR_retnet#94313980, CASE WHEN ((turnover#94313751 = NA) OR (turnover#94313751 = null)) THEN null ELSE cast(turnover#94313751 as float) END AS turnover#94313982] +- FileScan csv [year#94313703,retIC#94313706,resretIC#94313708,numcos#94313710,numdates#94313712,annual_bmret#94313713,annual_ret#94313715,std_ret#94313717,Sharpe_ret#94313719,PctPos_ret#94313721,TR_ret#94313723,IR_ret#94313725,annual_resret#94313727,std_resret#94313729,Sharpe_resret#94313731,PctPos_resret#94313733,TR_resret#94313735,IR_resret#94313737,annual_retnet#94313739,std_retnet#94313741,Sharpe_retnet#94313743,PctPos_retnet#94313745,TR_retnet#94313747,IR_retnet#94313749,turnover#94313751] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/estimize_signal_histor..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94313703, retIC#94313706, resretIC#94313708, numcos#94313710, numdates#94313712, annual_bmret#94313713, annual_ret#94313715, std_ret#94313717, Sharpe_ret#94313719, PctPos_ret#94313721, TR_ret#94313723, IR_ret#94313725, annual_resret#94313727, std_resret#94313729, Sharpe_resret#94313731, PctPos_resret#94313733, TR_resret#94313735, IR_resret#94313737, annual_retnet#94313739, std_retnet#94313741, Sharpe_retnet#94313743, PctPos_retnet#94313745, TR_retnet#94313747, IR_retnet#94313749, turnover#94313751] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/estimize_signal_history/estimizesignal/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94313703 = NA) OR (year#94313703 = null)) THEN null ELSE cast(year#94313703 as float) END AS year#94313935, CASE WHEN ((retIC#94313706 = NA) OR (retIC#94313706 = null)) THEN null ELSE cast(retIC#94313706 as float) END AS retIC#94313936, CASE WHEN ((resretIC#94313708 = NA) OR (resretIC#94313708 = null)) THEN null ELSE cast(resretIC#94313708 as float) END AS resretIC#94313939, CASE WHEN ((numcos#94313710 = NA) OR (numcos#94313710 = null)) THEN null ELSE cast(numcos#94313710 as float) END AS numcos#94313941, CASE WHEN ((numdates#94313712 = NA) OR (numdates#94313712 = null)) THEN null ELSE cast(numdates#94313712 as int) END AS numdates#94313943, CASE WHEN ((annual_bmret#94313713 = NA) OR (annual_bmret#94313713 = null)) THEN null ELSE cast(annual_bmret#94313713 as float) END AS annual_bmret#94313945, CASE WHEN ((annual_ret#94313715 = NA) OR (annual_ret#94313715 = null)) THEN null ELSE cast(annual_ret#94313715 as float) END AS annual_ret#94313947, CASE WHEN ((std_ret#94313717 = NA) OR (std_ret#94313717 = null)) THEN null ELSE cast(std_ret#94313717 as float) END AS std_ret#94313948, CASE WHEN ((Sharpe_ret#94313719 = NA) OR (Sharpe_ret#94313719 = null)) THEN null ELSE cast(Sharpe_ret#94313719 as float) END AS Sharpe_ret#94313951, CASE WHEN ((PctPos_ret#94313721 = NA) OR (PctPos_ret#94313721 = null)) THEN null ELSE cast(PctPos_ret#94313721 as float) END AS PctPos_ret#94313953, CASE WHEN ((TR_ret#94313723 = NA) OR (TR_ret#94313723 = null)) THEN null ELSE cast(TR_ret#94313723 as float) END AS TR_ret#94313955, CASE WHEN ((IR_ret#94313725 = NA) OR (IR_ret#94313725 = null)) THEN null ELSE cast(IR_ret#94313725 as float) END AS IR_ret#94313957, CASE WHEN ((annual_resret#94313727 = NA) OR (annual_resret#94313727 = null)) THEN null ELSE cast(annual_resret#94313727 as float) END AS annual_resret#94313958, CASE WHEN ((std_resret#94313729 = NA) OR (std_resret#94313729 = null)) THEN null ELSE cast(std_resret#94313729 as float) END AS std_resret#94313960, CASE WHEN ((Sharpe_resret#94313731 = NA) OR (Sharpe_resret#94313731 = null)) THEN null ELSE cast(Sharpe_resret#94313731 as float) END AS Sharpe_resret#94313962, CASE WHEN ((PctPos_resret#94313733 = NA) OR (PctPos_resret#94313733 = null)) THEN null ELSE cast(PctPos_resret#94313733 as float) END AS PctPos_resret#94313965, CASE WHEN ((TR_resret#94313735 = NA) OR (TR_resret#94313735 = null)) THEN null ELSE cast(TR_resret#94313735 as float) END AS TR_resret#94313967, CASE WHEN ((IR_resret#94313737 = NA) OR (IR_resret#94313737 = null)) THEN null ELSE cast(IR_resret#94313737 as float) END AS IR_resret#94313968, CASE WHEN ((annual_retnet#94313739 = NA) OR (annual_retnet#94313739 = null)) THEN null ELSE cast(annual_retnet#94313739 as float) END AS annual_retnet#94313970, CASE WHEN ((std_retnet#94313741 = NA) OR (std_retnet#94313741 = null)) THEN null ELSE cast(std_retnet#94313741 as float) END AS std_retnet#94313972, CASE WHEN ((Sharpe_retnet#94313743 = NA) OR (Sharpe_retnet#94313743 = null)) THEN null ELSE cast(Sharpe_retnet#94313743 as float) END AS Sharpe_retnet#94313975, CASE WHEN ((PctPos_retnet#94313745 = NA) OR (PctPos_retnet#94313745 = null)) THEN null ELSE cast(PctPos_retnet#94313745 as float) END AS PctPos_retnet#94313977, CASE WHEN ((TR_retnet#94313747 = NA) OR (TR_retnet#94313747 = null)) THEN null ELSE cast(TR_retnet#94313747 as float) END AS TR_retnet#94313978, CASE WHEN ((IR_retnet#94313749 = NA) OR (IR_retnet#94313749 = null)) THEN null ELSE cast(IR_retnet#94313749 as float) END AS IR_retnet#94313980, CASE WHEN ((turnover#94313751 = NA) OR (turnover#94313751 = null)) THEN null ELSE cast(turnover#94313751 as float) END AS turnover#94313982] Input [25]: [year#94313703, retIC#94313706, resretIC#94313708, numcos#94313710, numdates#94313712, annual_bmret#94313713, annual_ret#94313715, std_ret#94313717, Sharpe_ret#94313719, PctPos_ret#94313721, TR_ret#94313723, IR_ret#94313725, annual_resret#94313727, std_resret#94313729, Sharpe_resret#94313731, PctPos_resret#94313733, TR_resret#94313735, IR_resret#94313737, annual_retnet#94313739, std_retnet#94313741, Sharpe_retnet#94313743, PctPos_retnet#94313745, TR_retnet#94313747, IR_retnet#94313749, turnover#94313751] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94313982, year#94313935] (8) Filter [codegen id : 1] Input [2]: [turnover#94313982, year#94313935] Condition : isnotnull(turnover#94313982) (9) Project [codegen id : 1] Output [3]: [year#94313935, turnover#94313982, (1.0 / cast(turnover#94313982 as double)) AS days_hold#94314018] Input [2]: [turnover#94313982, year#94313935] (10) Exchange Input [3]: [year#94313935, turnover#94313982, days_hold#94314018] Arguments: rangepartitioning(year#94313935 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7530190] (11) Sort [codegen id : 2] Input [3]: [year#94313935, turnover#94313982, days_hold#94314018] Arguments: [year#94313935 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94313935, turnover#94313982, days_hold#94314018] (13) CollectLimit Input [3]: [year#94313935, turnover#94313982, days_hold#94314018] Arguments: 1000000