== Physical Plan == CollectLimit (12) +- * ColumnarToRow (11) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (10) +- Exchange (9) +- * Project (8) +- * Filter (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94014248, turnover#94014292, days_hold#94145431] Arguments: [year#94014248, turnover#94014292, days_hold#94145431] (2) InMemoryRelation Arguments: [year#94014248, turnover#94014292, days_hold#94145431], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94014248 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94014248 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7506265] +- *(1) Project [year#94014248, turnover#94014292, (1.0 / cast(turnover#94014292 as double)) AS days_hold#94014331] +- *(1) Filter isnotnull(turnover#94014292) +- InMemoryTableScan [turnover#94014292, year#94014248], [isnotnull(turnover#94014292)] +- InMemoryRelation [year#94014248, retIC#94014249, resretIC#94014250, numcos#94014251, numdates#94014253, annual_bmret#94014255, annual_ret#94014257, std_ret#94014259, Sharpe_ret#94014261, PctPos_ret#94014262, TR_ret#94014264, IR_ret#94014266, annual_resret#94014268, std_resret#94014270, Sharpe_resret#94014272, PctPos_resret#94014274, TR_resret#94014275, IR_resret#94014277, annual_retnet#94014279, std_retnet#94014281, Sharpe_retnet#94014283, PctPos_retnet#94014285, TR_retnet#94014287, IR_retnet#94014289, turnover#94014292], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94014016 = NA) OR (year#94014016 = null)) THEN null ELSE cast(year#94014016 as float) END AS year#94014248, CASE WHEN ((retIC#94014017 = NA) OR (retIC#94014017 = null)) THEN null ELSE cast(retIC#94014017 as float) END AS retIC#94014249, CASE WHEN ((resretIC#94014018 = NA) OR (resretIC#94014018 = null)) THEN null ELSE cast(resretIC#94014018 as float) END AS resretIC#94014250, CASE WHEN ((numcos#94014019 = NA) OR (numcos#94014019 = null)) THEN null ELSE cast(numcos#94014019 as float) END AS numcos#94014251, CASE WHEN ((numdates#94014020 = NA) OR (numdates#94014020 = null)) THEN null ELSE cast(numdates#94014020 as int) END AS numdates#94014253, CASE WHEN (annual_bmret#94014021 = null) THEN null ELSE annual_bmret#94014021 END AS annual_bmret#94014255, CASE WHEN ((annual_ret#94014022 = NA) OR (annual_ret#94014022 = null)) THEN null ELSE cast(annual_ret#94014022 as float) END AS annual_ret#94014257, CASE WHEN ((std_ret#94014023 = NA) OR (std_ret#94014023 = null)) THEN null ELSE cast(std_ret#94014023 as float) END AS std_ret#94014259, CASE WHEN ((Sharpe_ret#94014024 = NA) OR (Sharpe_ret#94014024 = null)) THEN null ELSE cast(Sharpe_ret#94014024 as float) END AS Sharpe_ret#94014261, CASE WHEN ((PctPos_ret#94014025 = NA) OR (PctPos_ret#94014025 = null)) THEN null ELSE cast(PctPos_ret#94014025 as float) END AS PctPos_ret#94014262, CASE WHEN (TR_ret#94014026 = null) THEN null ELSE TR_ret#94014026 END AS TR_ret#94014264, CASE WHEN (IR_ret#94014027 = null) THEN null ELSE IR_ret#94014027 END AS IR_ret#94014266, CASE WHEN ((annual_resret#94014028 = NA) OR (annual_resret#94014028 = null)) THEN null ELSE cast(annual_resret#94014028 as float) END AS annual_resret#94014268, CASE WHEN ((std_resret#94014029 = NA) OR (std_resret#94014029 = null)) THEN null ELSE cast(std_resret#94014029 as float) END AS std_resret#94014270, CASE WHEN ((Sharpe_resret#94014030 = NA) OR (Sharpe_resret#94014030 = null)) THEN null ELSE cast(Sharpe_resret#94014030 as float) END AS Sharpe_resret#94014272, CASE WHEN ((PctPos_resret#94014031 = NA) OR (PctPos_resret#94014031 = null)) THEN null ELSE cast(PctPos_resret#94014031 as float) END AS PctPos_resret#94014274, CASE WHEN (TR_resret#94014032 = null) THEN null ELSE TR_resret#94014032 END AS TR_resret#94014275, CASE WHEN (IR_resret#94014033 = null) THEN null ELSE IR_resret#94014033 END AS IR_resret#94014277, CASE WHEN ((annual_retnet#94014034 = NA) OR (annual_retnet#94014034 = null)) THEN null ELSE cast(annual_retnet#94014034 as float) END AS annual_retnet#94014279, CASE WHEN ((std_retnet#94014035 = NA) OR (std_retnet#94014035 = null)) THEN null ELSE cast(std_retnet#94014035 as float) END AS std_retnet#94014281, CASE WHEN ((Sharpe_retnet#94014036 = NA) OR (Sharpe_retnet#94014036 = null)) THEN null ELSE cast(Sharpe_retnet#94014036 as float) END AS Sharpe_retnet#94014283, CASE WHEN ((PctPos_retnet#94014037 = NA) OR (PctPos_retnet#94014037 = null)) THEN null ELSE cast(PctPos_retnet#94014037 as float) END AS PctPos_retnet#94014285, CASE WHEN (TR_retnet#94014038 = null) THEN null ELSE TR_retnet#94014038 END AS TR_retnet#94014287, CASE WHEN (IR_retnet#94014039 = null) THEN null ELSE IR_retnet#94014039 END AS IR_retnet#94014289, CASE WHEN ((turnover#94014040 = NA) OR (turnover#94014040 = null)) THEN null ELSE cast(turnover#94014040 as float) END AS turnover#94014292] +- FileScan csv [year#94014016,retIC#94014017,resretIC#94014018,numcos#94014019,numdates#94014020,annual_bmret#94014021,annual_ret#94014022,std_ret#94014023,Sharpe_ret#94014024,PctPos_ret#94014025,TR_ret#94014026,IR_ret#94014027,annual_resret#94014028,std_resret#94014029,Sharpe_resret#94014030,PctPos_resret#94014031,TR_resret#94014032,IR_resret#94014033,annual_retnet#94014034,std_retnet#94014035,Sharpe_retnet#94014036,PctPos_retnet#94014037,TR_retnet#94014038,IR_retnet#94014039,turnover#94014040] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/rankviews_history/rank..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94014248 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94014292, year#94014248] Arguments: [turnover#94014292, year#94014248], [isnotnull(turnover#94014292)] (4) InMemoryRelation Arguments: [year#94014248, retIC#94014249, resretIC#94014250, numcos#94014251, numdates#94014253, annual_bmret#94014255, annual_ret#94014257, std_ret#94014259, Sharpe_ret#94014261, PctPos_ret#94014262, TR_ret#94014264, IR_ret#94014266, annual_resret#94014268, std_resret#94014270, Sharpe_resret#94014272, PctPos_resret#94014274, TR_resret#94014275, IR_resret#94014277, annual_retnet#94014279, std_retnet#94014281, Sharpe_retnet#94014283, PctPos_retnet#94014285, TR_retnet#94014287, IR_retnet#94014289, turnover#94014292], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94014016 = NA) OR (year#94014016 = null)) THEN null ELSE cast(year#94014016 as float) END AS year#94014248, CASE WHEN ((retIC#94014017 = NA) OR (retIC#94014017 = null)) THEN null ELSE cast(retIC#94014017 as float) END AS retIC#94014249, CASE WHEN ((resretIC#94014018 = NA) OR (resretIC#94014018 = null)) THEN null ELSE cast(resretIC#94014018 as float) END AS resretIC#94014250, CASE WHEN ((numcos#94014019 = NA) OR (numcos#94014019 = null)) THEN null ELSE cast(numcos#94014019 as float) END AS numcos#94014251, CASE WHEN ((numdates#94014020 = NA) OR (numdates#94014020 = null)) THEN null ELSE cast(numdates#94014020 as int) END AS numdates#94014253, CASE WHEN (annual_bmret#94014021 = null) THEN null ELSE annual_bmret#94014021 END AS annual_bmret#94014255, CASE WHEN ((annual_ret#94014022 = NA) OR (annual_ret#94014022 = null)) THEN null ELSE cast(annual_ret#94014022 as float) END AS annual_ret#94014257, CASE WHEN ((std_ret#94014023 = NA) OR (std_ret#94014023 = null)) THEN null ELSE cast(std_ret#94014023 as float) END AS std_ret#94014259, CASE WHEN ((Sharpe_ret#94014024 = NA) OR (Sharpe_ret#94014024 = null)) THEN null ELSE cast(Sharpe_ret#94014024 as float) END AS Sharpe_ret#94014261, CASE WHEN ((PctPos_ret#94014025 = NA) OR (PctPos_ret#94014025 = null)) THEN null ELSE cast(PctPos_ret#94014025 as float) END AS PctPos_ret#94014262, CASE WHEN (TR_ret#94014026 = null) THEN null ELSE TR_ret#94014026 END AS TR_ret#94014264, CASE WHEN (IR_ret#94014027 = null) THEN null ELSE IR_ret#94014027 END AS IR_ret#94014266, CASE WHEN ((annual_resret#94014028 = NA) OR (annual_resret#94014028 = null)) THEN null ELSE cast(annual_resret#94014028 as float) END AS annual_resret#94014268, CASE WHEN ((std_resret#94014029 = NA) OR (std_resret#94014029 = null)) THEN null ELSE cast(std_resret#94014029 as float) END AS std_resret#94014270, CASE WHEN ((Sharpe_resret#94014030 = NA) OR (Sharpe_resret#94014030 = null)) THEN null ELSE cast(Sharpe_resret#94014030 as float) END AS Sharpe_resret#94014272, CASE WHEN ((PctPos_resret#94014031 = NA) OR (PctPos_resret#94014031 = null)) THEN null ELSE cast(PctPos_resret#94014031 as float) END AS PctPos_resret#94014274, CASE WHEN (TR_resret#94014032 = null) THEN null ELSE TR_resret#94014032 END AS TR_resret#94014275, CASE WHEN (IR_resret#94014033 = null) THEN null ELSE IR_resret#94014033 END AS IR_resret#94014277, CASE WHEN ((annual_retnet#94014034 = NA) OR (annual_retnet#94014034 = null)) THEN null ELSE cast(annual_retnet#94014034 as float) END AS annual_retnet#94014279, CASE WHEN ((std_retnet#94014035 = NA) OR (std_retnet#94014035 = null)) THEN null ELSE cast(std_retnet#94014035 as float) END AS std_retnet#94014281, CASE WHEN ((Sharpe_retnet#94014036 = NA) OR (Sharpe_retnet#94014036 = null)) THEN null ELSE cast(Sharpe_retnet#94014036 as float) END AS Sharpe_retnet#94014283, CASE WHEN ((PctPos_retnet#94014037 = NA) OR (PctPos_retnet#94014037 = null)) THEN null ELSE cast(PctPos_retnet#94014037 as float) END AS PctPos_retnet#94014285, CASE WHEN (TR_retnet#94014038 = null) THEN null ELSE TR_retnet#94014038 END AS TR_retnet#94014287, CASE WHEN (IR_retnet#94014039 = null) THEN null ELSE IR_retnet#94014039 END AS IR_retnet#94014289, CASE WHEN ((turnover#94014040 = NA) OR (turnover#94014040 = null)) THEN null ELSE cast(turnover#94014040 as float) END AS turnover#94014292] +- FileScan csv [year#94014016,retIC#94014017,resretIC#94014018,numcos#94014019,numdates#94014020,annual_bmret#94014021,annual_ret#94014022,std_ret#94014023,Sharpe_ret#94014024,PctPos_ret#94014025,TR_ret#94014026,IR_ret#94014027,annual_resret#94014028,std_resret#94014029,Sharpe_resret#94014030,PctPos_resret#94014031,TR_resret#94014032,IR_resret#94014033,annual_retnet#94014034,std_retnet#94014035,Sharpe_retnet#94014036,PctPos_retnet#94014037,TR_retnet#94014038,IR_retnet#94014039,turnover#94014040] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/rankviews_history/rank..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94014016, retIC#94014017, resretIC#94014018, numcos#94014019, numdates#94014020, annual_bmret#94014021, annual_ret#94014022, std_ret#94014023, Sharpe_ret#94014024, PctPos_ret#94014025, TR_ret#94014026, IR_ret#94014027, annual_resret#94014028, std_resret#94014029, Sharpe_resret#94014030, PctPos_resret#94014031, TR_resret#94014032, IR_resret#94014033, annual_retnet#94014034, std_retnet#94014035, Sharpe_retnet#94014036, PctPos_retnet#94014037, TR_retnet#94014038, IR_retnet#94014039, turnover#94014040] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/rankviews_history/rankviews/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94014016 = NA) OR (year#94014016 = null)) THEN null ELSE cast(year#94014016 as float) END AS year#94014248, CASE WHEN ((retIC#94014017 = NA) OR (retIC#94014017 = null)) THEN null ELSE cast(retIC#94014017 as float) END AS retIC#94014249, CASE WHEN ((resretIC#94014018 = NA) OR (resretIC#94014018 = null)) THEN null ELSE cast(resretIC#94014018 as float) END AS resretIC#94014250, CASE WHEN ((numcos#94014019 = NA) OR (numcos#94014019 = null)) THEN null ELSE cast(numcos#94014019 as float) END AS numcos#94014251, CASE WHEN ((numdates#94014020 = NA) OR (numdates#94014020 = null)) THEN null ELSE cast(numdates#94014020 as int) END AS numdates#94014253, CASE WHEN (annual_bmret#94014021 = null) THEN null ELSE annual_bmret#94014021 END AS annual_bmret#94014255, CASE WHEN ((annual_ret#94014022 = NA) OR (annual_ret#94014022 = null)) THEN null ELSE cast(annual_ret#94014022 as float) END AS annual_ret#94014257, CASE WHEN ((std_ret#94014023 = NA) OR (std_ret#94014023 = null)) THEN null ELSE cast(std_ret#94014023 as float) END AS std_ret#94014259, CASE WHEN ((Sharpe_ret#94014024 = NA) OR (Sharpe_ret#94014024 = null)) THEN null ELSE cast(Sharpe_ret#94014024 as float) END AS Sharpe_ret#94014261, CASE WHEN ((PctPos_ret#94014025 = NA) OR (PctPos_ret#94014025 = null)) THEN null ELSE cast(PctPos_ret#94014025 as float) END AS PctPos_ret#94014262, CASE WHEN (TR_ret#94014026 = null) THEN null ELSE TR_ret#94014026 END AS TR_ret#94014264, CASE WHEN (IR_ret#94014027 = null) THEN null ELSE IR_ret#94014027 END AS IR_ret#94014266, CASE WHEN ((annual_resret#94014028 = NA) OR (annual_resret#94014028 = null)) THEN null ELSE cast(annual_resret#94014028 as float) END AS annual_resret#94014268, CASE WHEN ((std_resret#94014029 = NA) OR (std_resret#94014029 = null)) THEN null ELSE cast(std_resret#94014029 as float) END AS std_resret#94014270, CASE WHEN ((Sharpe_resret#94014030 = NA) OR (Sharpe_resret#94014030 = null)) THEN null ELSE cast(Sharpe_resret#94014030 as float) END AS Sharpe_resret#94014272, CASE WHEN ((PctPos_resret#94014031 = NA) OR (PctPos_resret#94014031 = null)) THEN null ELSE cast(PctPos_resret#94014031 as float) END AS PctPos_resret#94014274, CASE WHEN (TR_resret#94014032 = null) THEN null ELSE TR_resret#94014032 END AS TR_resret#94014275, CASE WHEN (IR_resret#94014033 = null) THEN null ELSE IR_resret#94014033 END AS IR_resret#94014277, CASE WHEN ((annual_retnet#94014034 = NA) OR (annual_retnet#94014034 = null)) THEN null ELSE cast(annual_retnet#94014034 as float) END AS annual_retnet#94014279, CASE WHEN ((std_retnet#94014035 = NA) OR (std_retnet#94014035 = null)) THEN null ELSE cast(std_retnet#94014035 as float) END AS std_retnet#94014281, CASE WHEN ((Sharpe_retnet#94014036 = NA) OR (Sharpe_retnet#94014036 = null)) THEN null ELSE cast(Sharpe_retnet#94014036 as float) END AS Sharpe_retnet#94014283, CASE WHEN ((PctPos_retnet#94014037 = NA) OR (PctPos_retnet#94014037 = null)) THEN null ELSE cast(PctPos_retnet#94014037 as float) END AS PctPos_retnet#94014285, CASE WHEN (TR_retnet#94014038 = null) THEN null ELSE TR_retnet#94014038 END AS TR_retnet#94014287, CASE WHEN (IR_retnet#94014039 = null) THEN null ELSE IR_retnet#94014039 END AS IR_retnet#94014289, CASE WHEN ((turnover#94014040 = NA) OR (turnover#94014040 = null)) THEN null ELSE cast(turnover#94014040 as float) END AS turnover#94014292] Input [25]: [year#94014016, retIC#94014017, resretIC#94014018, numcos#94014019, numdates#94014020, annual_bmret#94014021, annual_ret#94014022, std_ret#94014023, Sharpe_ret#94014024, PctPos_ret#94014025, TR_ret#94014026, IR_ret#94014027, annual_resret#94014028, std_resret#94014029, Sharpe_resret#94014030, PctPos_resret#94014031, TR_resret#94014032, IR_resret#94014033, annual_retnet#94014034, std_retnet#94014035, Sharpe_retnet#94014036, PctPos_retnet#94014037, TR_retnet#94014038, IR_retnet#94014039, turnover#94014040] (7) Filter [codegen id : 1] Input [2]: [turnover#94014292, year#94014248] Condition : isnotnull(turnover#94014292) (8) Project [codegen id : 1] Output [3]: [year#94014248, turnover#94014292, (1.0 / cast(turnover#94014292 as double)) AS days_hold#94014331] Input [2]: [turnover#94014292, year#94014248] (9) Exchange Input [3]: [year#94014248, turnover#94014292, days_hold#94014331] Arguments: rangepartitioning(year#94014248 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7506265] (10) Sort [codegen id : 2] Input [3]: [year#94014248, turnover#94014292, days_hold#94014331] Arguments: [year#94014248 ASC NULLS FIRST], true, 0 (11) ColumnarToRow [codegen id : 1] Input [3]: [year#94014248, turnover#94014292, days_hold#94145431] (12) CollectLimit Input [3]: [year#94014248, turnover#94014292, days_hold#94145431] Arguments: 10000