== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94057211, turnover#94057323, days_hold#94057393] Arguments: [year#94057211, turnover#94057323, days_hold#94057393] (2) InMemoryRelation Arguments: [year#94057211, turnover#94057323, days_hold#94057393], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94057211 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94057211 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7509671] +- *(1) Project [year#94057211, turnover#94057323, (1.0 / cast(turnover#94057323 as double)) AS days_hold#94057393] +- *(1) Filter isnotnull(turnover#94057323) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94057323, year#94057211], [isnotnull(turnover#94057323)] +- InMemoryRelation [year#94057211, retIC#94057212, resretIC#94057213, numcos#94057247, numdates#94057248, annual_bmret#94057282, annual_ret#94057283, std_ret#94057284, Sharpe_ret#94057286, PctPos_ret#94057288, TR_ret#94057290, IR_ret#94057293, annual_resret#94057295, std_resret#94057297, Sharpe_resret#94057300, PctPos_resret#94057302, TR_resret#94057304, IR_resret#94057307, annual_retnet#94057309, std_retnet#94057312, Sharpe_retnet#94057314, PctPos_retnet#94057316, TR_retnet#94057318, IR_retnet#94057321, turnover#94057323], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94057043 = NA) OR (year#94057043 = null)) THEN null ELSE cast(year#94057043 as int) END AS year#94057211, CASE WHEN ((retIC#94057044 = NA) OR (retIC#94057044 = null)) THEN null ELSE cast(retIC#94057044 as float) END AS retIC#94057212, CASE WHEN ((resretIC#94057045 = NA) OR (resretIC#94057045 = null)) THEN null ELSE cast(resretIC#94057045 as float) END AS resretIC#94057213, CASE WHEN ((numcos#94057046 = NA) OR (numcos#94057046 = null)) THEN null ELSE cast(numcos#94057046 as float) END AS numcos#94057247, CASE WHEN ((numdates#94057047 = NA) OR (numdates#94057047 = null)) THEN null ELSE cast(numdates#94057047 as int) END AS numdates#94057248, CASE WHEN ((annual_bmret#94057048 = NA) OR (annual_bmret#94057048 = null)) THEN null ELSE cast(annual_bmret#94057048 as float) END AS annual_bmret#94057282, CASE WHEN ((annual_ret#94057049 = NA) OR (annual_ret#94057049 = null)) THEN null ELSE cast(annual_ret#94057049 as float) END AS annual_ret#94057283, CASE WHEN ((std_ret#94057050 = NA) OR (std_ret#94057050 = null)) THEN null ELSE cast(std_ret#94057050 as float) END AS std_ret#94057284, CASE WHEN ((Sharpe_ret#94057051 = NA) OR (Sharpe_ret#94057051 = null)) THEN null ELSE cast(Sharpe_ret#94057051 as float) END AS Sharpe_ret#94057286, CASE WHEN ((PctPos_ret#94057052 = NA) OR (PctPos_ret#94057052 = null)) THEN null ELSE cast(PctPos_ret#94057052 as float) END AS PctPos_ret#94057288, CASE WHEN ((TR_ret#94057053 = NA) OR (TR_ret#94057053 = null)) THEN null ELSE cast(TR_ret#94057053 as float) END AS TR_ret#94057290, CASE WHEN ((IR_ret#94057054 = NA) OR (IR_ret#94057054 = null)) THEN null ELSE cast(IR_ret#94057054 as float) END AS IR_ret#94057293, CASE WHEN ((annual_resret#94057055 = NA) OR (annual_resret#94057055 = null)) THEN null ELSE cast(annual_resret#94057055 as float) END AS annual_resret#94057295, CASE WHEN ((std_resret#94057056 = NA) OR (std_resret#94057056 = null)) THEN null ELSE cast(std_resret#94057056 as float) END AS std_resret#94057297, CASE WHEN ((Sharpe_resret#94057057 = NA) OR (Sharpe_resret#94057057 = null)) THEN null ELSE cast(Sharpe_resret#94057057 as float) END AS Sharpe_resret#94057300, CASE WHEN ((PctPos_resret#94057058 = NA) OR (PctPos_resret#94057058 = null)) THEN null ELSE cast(PctPos_resret#94057058 as float) END AS PctPos_resret#94057302, CASE WHEN ((TR_resret#94057059 = NA) OR (TR_resret#94057059 = null)) THEN null ELSE cast(TR_resret#94057059 as float) END AS TR_resret#94057304, CASE WHEN ((IR_resret#94057060 = NA) OR (IR_resret#94057060 = null)) THEN null ELSE cast(IR_resret#94057060 as float) END AS IR_resret#94057307, CASE WHEN ((annual_retnet#94057061 = NA) OR (annual_retnet#94057061 = null)) THEN null ELSE cast(annual_retnet#94057061 as float) END AS annual_retnet#94057309, CASE WHEN ((std_retnet#94057062 = NA) OR (std_retnet#94057062 = null)) THEN null ELSE cast(std_retnet#94057062 as float) END AS std_retnet#94057312, CASE WHEN ((Sharpe_retnet#94057063 = NA) OR (Sharpe_retnet#94057063 = null)) THEN null ELSE cast(Sharpe_retnet#94057063 as float) END AS Sharpe_retnet#94057314, CASE WHEN ((PctPos_retnet#94057064 = NA) OR (PctPos_retnet#94057064 = null)) THEN null ELSE cast(PctPos_retnet#94057064 as float) END AS PctPos_retnet#94057316, CASE WHEN ((TR_retnet#94057065 = NA) OR (TR_retnet#94057065 = null)) THEN null ELSE cast(TR_retnet#94057065 as float) END AS TR_retnet#94057318, CASE WHEN ((IR_retnet#94057066 = NA) OR (IR_retnet#94057066 = null)) THEN null ELSE cast(IR_retnet#94057066 as float) END AS IR_retnet#94057321, CASE WHEN ((turnover#94057067 = NA) OR (turnover#94057067 = null)) THEN null ELSE cast(turnover#94057067 as float) END AS turnover#94057323] +- FileScan csv [year#94057043,retIC#94057044,resretIC#94057045,numcos#94057046,numdates#94057047,annual_bmret#94057048,annual_ret#94057049,std_ret#94057050,Sharpe_ret#94057051,PctPos_ret#94057052,TR_ret#94057053,IR_ret#94057054,annual_resret#94057055,std_resret#94057056,Sharpe_resret#94057057,PctPos_resret#94057058,TR_resret#94057059,IR_resret#94057060,annual_retnet#94057061,std_retnet#94057062,Sharpe_retnet#94057063,PctPos_retnet#94057064,TR_retnet#94057065,IR_retnet#94057066,turnover#94057067] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/momentum/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94057211 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94057323, year#94057211] Arguments: [turnover#94057323, year#94057211], [isnotnull(turnover#94057323)] (4) InMemoryRelation Arguments: [year#94057211, retIC#94057212, resretIC#94057213, numcos#94057247, numdates#94057248, annual_bmret#94057282, annual_ret#94057283, std_ret#94057284, Sharpe_ret#94057286, PctPos_ret#94057288, TR_ret#94057290, IR_ret#94057293, annual_resret#94057295, std_resret#94057297, Sharpe_resret#94057300, PctPos_resret#94057302, TR_resret#94057304, IR_resret#94057307, annual_retnet#94057309, std_retnet#94057312, Sharpe_retnet#94057314, PctPos_retnet#94057316, TR_retnet#94057318, IR_retnet#94057321, turnover#94057323], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94057043 = NA) OR (year#94057043 = null)) THEN null ELSE cast(year#94057043 as int) END AS year#94057211, CASE WHEN ((retIC#94057044 = NA) OR (retIC#94057044 = null)) THEN null ELSE cast(retIC#94057044 as float) END AS retIC#94057212, CASE WHEN ((resretIC#94057045 = NA) OR (resretIC#94057045 = null)) THEN null ELSE cast(resretIC#94057045 as float) END AS resretIC#94057213, CASE WHEN ((numcos#94057046 = NA) OR (numcos#94057046 = null)) THEN null ELSE cast(numcos#94057046 as float) END AS numcos#94057247, CASE WHEN ((numdates#94057047 = NA) OR (numdates#94057047 = null)) THEN null ELSE cast(numdates#94057047 as int) END AS numdates#94057248, CASE WHEN ((annual_bmret#94057048 = NA) OR (annual_bmret#94057048 = null)) THEN null ELSE cast(annual_bmret#94057048 as float) END AS annual_bmret#94057282, CASE WHEN ((annual_ret#94057049 = NA) OR (annual_ret#94057049 = null)) THEN null ELSE cast(annual_ret#94057049 as float) END AS annual_ret#94057283, CASE WHEN ((std_ret#94057050 = NA) OR (std_ret#94057050 = null)) THEN null ELSE cast(std_ret#94057050 as float) END AS std_ret#94057284, CASE WHEN ((Sharpe_ret#94057051 = NA) OR (Sharpe_ret#94057051 = null)) THEN null ELSE cast(Sharpe_ret#94057051 as float) END AS Sharpe_ret#94057286, CASE WHEN ((PctPos_ret#94057052 = NA) OR (PctPos_ret#94057052 = null)) THEN null ELSE cast(PctPos_ret#94057052 as float) END AS PctPos_ret#94057288, CASE WHEN ((TR_ret#94057053 = NA) OR (TR_ret#94057053 = null)) THEN null ELSE cast(TR_ret#94057053 as float) END AS TR_ret#94057290, CASE WHEN ((IR_ret#94057054 = NA) OR (IR_ret#94057054 = null)) THEN null ELSE cast(IR_ret#94057054 as float) END AS IR_ret#94057293, CASE WHEN ((annual_resret#94057055 = NA) OR (annual_resret#94057055 = null)) THEN null ELSE cast(annual_resret#94057055 as float) END AS annual_resret#94057295, CASE WHEN ((std_resret#94057056 = NA) OR (std_resret#94057056 = null)) THEN null ELSE cast(std_resret#94057056 as float) END AS std_resret#94057297, CASE WHEN ((Sharpe_resret#94057057 = NA) OR (Sharpe_resret#94057057 = null)) THEN null ELSE cast(Sharpe_resret#94057057 as float) END AS Sharpe_resret#94057300, CASE WHEN ((PctPos_resret#94057058 = NA) OR (PctPos_resret#94057058 = null)) THEN null ELSE cast(PctPos_resret#94057058 as float) END AS PctPos_resret#94057302, CASE WHEN ((TR_resret#94057059 = NA) OR (TR_resret#94057059 = null)) THEN null ELSE cast(TR_resret#94057059 as float) END AS TR_resret#94057304, CASE WHEN ((IR_resret#94057060 = NA) OR (IR_resret#94057060 = null)) THEN null ELSE cast(IR_resret#94057060 as float) END AS IR_resret#94057307, CASE WHEN ((annual_retnet#94057061 = NA) OR (annual_retnet#94057061 = null)) THEN null ELSE cast(annual_retnet#94057061 as float) END AS annual_retnet#94057309, CASE WHEN ((std_retnet#94057062 = NA) OR (std_retnet#94057062 = null)) THEN null ELSE cast(std_retnet#94057062 as float) END AS std_retnet#94057312, CASE WHEN ((Sharpe_retnet#94057063 = NA) OR (Sharpe_retnet#94057063 = null)) THEN null ELSE cast(Sharpe_retnet#94057063 as float) END AS Sharpe_retnet#94057314, CASE WHEN ((PctPos_retnet#94057064 = NA) OR (PctPos_retnet#94057064 = null)) THEN null ELSE cast(PctPos_retnet#94057064 as float) END AS PctPos_retnet#94057316, CASE WHEN ((TR_retnet#94057065 = NA) OR (TR_retnet#94057065 = null)) THEN null ELSE cast(TR_retnet#94057065 as float) END AS TR_retnet#94057318, CASE WHEN ((IR_retnet#94057066 = NA) OR (IR_retnet#94057066 = null)) THEN null ELSE cast(IR_retnet#94057066 as float) END AS IR_retnet#94057321, CASE WHEN ((turnover#94057067 = NA) OR (turnover#94057067 = null)) THEN null ELSE cast(turnover#94057067 as float) END AS turnover#94057323] +- FileScan csv [year#94057043,retIC#94057044,resretIC#94057045,numcos#94057046,numdates#94057047,annual_bmret#94057048,annual_ret#94057049,std_ret#94057050,Sharpe_ret#94057051,PctPos_ret#94057052,TR_ret#94057053,IR_ret#94057054,annual_resret#94057055,std_resret#94057056,Sharpe_resret#94057057,PctPos_resret#94057058,TR_resret#94057059,IR_resret#94057060,annual_retnet#94057061,std_retnet#94057062,Sharpe_retnet#94057063,PctPos_retnet#94057064,TR_retnet#94057065,IR_retnet#94057066,turnover#94057067] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/momentum/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94057043, retIC#94057044, resretIC#94057045, numcos#94057046, numdates#94057047, annual_bmret#94057048, annual_ret#94057049, std_ret#94057050, Sharpe_ret#94057051, PctPos_ret#94057052, TR_ret#94057053, IR_ret#94057054, annual_resret#94057055, std_resret#94057056, Sharpe_resret#94057057, PctPos_resret#94057058, TR_resret#94057059, IR_resret#94057060, annual_retnet#94057061, std_retnet#94057062, Sharpe_retnet#94057063, PctPos_retnet#94057064, TR_retnet#94057065, IR_retnet#94057066, turnover#94057067] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/momentum/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94057043 = NA) OR (year#94057043 = null)) THEN null ELSE cast(year#94057043 as int) END AS year#94057211, CASE WHEN ((retIC#94057044 = NA) OR (retIC#94057044 = null)) THEN null ELSE cast(retIC#94057044 as float) END AS retIC#94057212, CASE WHEN ((resretIC#94057045 = NA) OR (resretIC#94057045 = null)) THEN null ELSE cast(resretIC#94057045 as float) END AS resretIC#94057213, CASE WHEN ((numcos#94057046 = NA) OR (numcos#94057046 = null)) THEN null ELSE cast(numcos#94057046 as float) END AS numcos#94057247, CASE WHEN ((numdates#94057047 = NA) OR (numdates#94057047 = null)) THEN null ELSE cast(numdates#94057047 as int) END AS numdates#94057248, CASE WHEN ((annual_bmret#94057048 = NA) OR (annual_bmret#94057048 = null)) THEN null ELSE cast(annual_bmret#94057048 as float) END AS annual_bmret#94057282, CASE WHEN ((annual_ret#94057049 = NA) OR (annual_ret#94057049 = null)) THEN null ELSE cast(annual_ret#94057049 as float) END AS annual_ret#94057283, CASE WHEN ((std_ret#94057050 = NA) OR (std_ret#94057050 = null)) THEN null ELSE cast(std_ret#94057050 as float) END AS std_ret#94057284, CASE WHEN ((Sharpe_ret#94057051 = NA) OR (Sharpe_ret#94057051 = null)) THEN null ELSE cast(Sharpe_ret#94057051 as float) END AS Sharpe_ret#94057286, CASE WHEN ((PctPos_ret#94057052 = NA) OR (PctPos_ret#94057052 = null)) THEN null ELSE cast(PctPos_ret#94057052 as float) END AS PctPos_ret#94057288, CASE WHEN ((TR_ret#94057053 = NA) OR (TR_ret#94057053 = null)) THEN null ELSE cast(TR_ret#94057053 as float) END AS TR_ret#94057290, CASE WHEN ((IR_ret#94057054 = NA) OR (IR_ret#94057054 = null)) THEN null ELSE cast(IR_ret#94057054 as float) END AS IR_ret#94057293, CASE WHEN ((annual_resret#94057055 = NA) OR (annual_resret#94057055 = null)) THEN null ELSE cast(annual_resret#94057055 as float) END AS annual_resret#94057295, CASE WHEN ((std_resret#94057056 = NA) OR (std_resret#94057056 = null)) THEN null ELSE cast(std_resret#94057056 as float) END AS std_resret#94057297, CASE WHEN ((Sharpe_resret#94057057 = NA) OR (Sharpe_resret#94057057 = null)) THEN null ELSE cast(Sharpe_resret#94057057 as float) END AS Sharpe_resret#94057300, CASE WHEN ((PctPos_resret#94057058 = NA) OR (PctPos_resret#94057058 = null)) THEN null ELSE cast(PctPos_resret#94057058 as float) END AS PctPos_resret#94057302, CASE WHEN ((TR_resret#94057059 = NA) OR (TR_resret#94057059 = null)) THEN null ELSE cast(TR_resret#94057059 as float) END AS TR_resret#94057304, CASE WHEN ((IR_resret#94057060 = NA) OR (IR_resret#94057060 = null)) THEN null ELSE cast(IR_resret#94057060 as float) END AS IR_resret#94057307, CASE WHEN ((annual_retnet#94057061 = NA) OR (annual_retnet#94057061 = null)) THEN null ELSE cast(annual_retnet#94057061 as float) END AS annual_retnet#94057309, CASE WHEN ((std_retnet#94057062 = NA) OR (std_retnet#94057062 = null)) THEN null ELSE cast(std_retnet#94057062 as float) END AS std_retnet#94057312, CASE WHEN ((Sharpe_retnet#94057063 = NA) OR (Sharpe_retnet#94057063 = null)) THEN null ELSE cast(Sharpe_retnet#94057063 as float) END AS Sharpe_retnet#94057314, CASE WHEN ((PctPos_retnet#94057064 = NA) OR (PctPos_retnet#94057064 = null)) THEN null ELSE cast(PctPos_retnet#94057064 as float) END AS PctPos_retnet#94057316, CASE WHEN ((TR_retnet#94057065 = NA) OR (TR_retnet#94057065 = null)) THEN null ELSE cast(TR_retnet#94057065 as float) END AS TR_retnet#94057318, CASE WHEN ((IR_retnet#94057066 = NA) OR (IR_retnet#94057066 = null)) THEN null ELSE cast(IR_retnet#94057066 as float) END AS IR_retnet#94057321, CASE WHEN ((turnover#94057067 = NA) OR (turnover#94057067 = null)) THEN null ELSE cast(turnover#94057067 as float) END AS turnover#94057323] Input [25]: [year#94057043, retIC#94057044, resretIC#94057045, numcos#94057046, numdates#94057047, annual_bmret#94057048, annual_ret#94057049, std_ret#94057050, Sharpe_ret#94057051, PctPos_ret#94057052, TR_ret#94057053, IR_ret#94057054, annual_resret#94057055, std_resret#94057056, Sharpe_resret#94057057, PctPos_resret#94057058, TR_resret#94057059, IR_resret#94057060, annual_retnet#94057061, std_retnet#94057062, Sharpe_retnet#94057063, PctPos_retnet#94057064, TR_retnet#94057065, IR_retnet#94057066, turnover#94057067] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94057323, year#94057211] (8) Filter [codegen id : 1] Input [2]: [turnover#94057323, year#94057211] Condition : isnotnull(turnover#94057323) (9) Project [codegen id : 1] Output [3]: [year#94057211, turnover#94057323, (1.0 / cast(turnover#94057323 as double)) AS days_hold#94057393] Input [2]: [turnover#94057323, year#94057211] (10) Exchange Input [3]: [year#94057211, turnover#94057323, days_hold#94057393] Arguments: rangepartitioning(year#94057211 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7509671] (11) Sort [codegen id : 2] Input [3]: [year#94057211, turnover#94057323, days_hold#94057393] Arguments: [year#94057211 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94057211, turnover#94057323, days_hold#94057393] (13) CollectLimit Input [3]: [year#94057211, turnover#94057323, days_hold#94057393] Arguments: 1000000