== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94028363, turnover#94028408, days_hold#94028446] Arguments: [year#94028363, turnover#94028408, days_hold#94028446] (2) InMemoryRelation Arguments: [year#94028363, turnover#94028408, days_hold#94028446], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94028363 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94028363 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7507373] +- *(1) Project [year#94028363, turnover#94028408, (1.0 / cast(turnover#94028408 as double)) AS days_hold#94028446] +- *(1) Filter isnotnull(turnover#94028408) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94028408, year#94028363], [isnotnull(turnover#94028408)] +- InMemoryRelation [year#94028363, retIC#94028364, resretIC#94028365, numcos#94028366, numdates#94028367, annual_bmret#94028369, annual_ret#94028371, std_ret#94028373, Sharpe_ret#94028375, PctPos_ret#94028377, TR_ret#94028380, IR_ret#94028381, annual_resret#94028383, std_resret#94028385, Sharpe_resret#94028388, PctPos_resret#94028390, TR_resret#94028392, IR_resret#94028394, annual_retnet#94028396, std_retnet#94028398, Sharpe_retnet#94028400, PctPos_retnet#94028401, TR_retnet#94028404, IR_retnet#94028406, turnover#94028408], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94028131 = NA) OR (year#94028131 = null)) THEN null ELSE cast(year#94028131 as int) END AS year#94028363, CASE WHEN ((retIC#94028132 = NA) OR (retIC#94028132 = null)) THEN null ELSE cast(retIC#94028132 as float) END AS retIC#94028364, CASE WHEN ((resretIC#94028133 = NA) OR (resretIC#94028133 = null)) THEN null ELSE cast(resretIC#94028133 as float) END AS resretIC#94028365, CASE WHEN ((numcos#94028134 = NA) OR (numcos#94028134 = null)) THEN null ELSE cast(numcos#94028134 as float) END AS numcos#94028366, CASE WHEN ((numdates#94028135 = NA) OR (numdates#94028135 = null)) THEN null ELSE cast(numdates#94028135 as int) END AS numdates#94028367, CASE WHEN ((annual_bmret#94028136 = NA) OR (annual_bmret#94028136 = null)) THEN null ELSE cast(annual_bmret#94028136 as float) END AS annual_bmret#94028369, CASE WHEN ((annual_ret#94028137 = NA) OR (annual_ret#94028137 = null)) THEN null ELSE cast(annual_ret#94028137 as float) END AS annual_ret#94028371, CASE WHEN ((std_ret#94028138 = NA) OR (std_ret#94028138 = null)) THEN null ELSE cast(std_ret#94028138 as float) END AS std_ret#94028373, CASE WHEN ((Sharpe_ret#94028139 = NA) OR (Sharpe_ret#94028139 = null)) THEN null ELSE cast(Sharpe_ret#94028139 as float) END AS Sharpe_ret#94028375, CASE WHEN ((PctPos_ret#94028140 = NA) OR (PctPos_ret#94028140 = null)) THEN null ELSE cast(PctPos_ret#94028140 as float) END AS PctPos_ret#94028377, CASE WHEN ((TR_ret#94028141 = NA) OR (TR_ret#94028141 = null)) THEN null ELSE cast(TR_ret#94028141 as float) END AS TR_ret#94028380, CASE WHEN ((IR_ret#94028142 = NA) OR (IR_ret#94028142 = null)) THEN null ELSE cast(IR_ret#94028142 as float) END AS IR_ret#94028381, CASE WHEN ((annual_resret#94028143 = NA) OR (annual_resret#94028143 = null)) THEN null ELSE cast(annual_resret#94028143 as float) END AS annual_resret#94028383, CASE WHEN ((std_resret#94028144 = NA) OR (std_resret#94028144 = null)) THEN null ELSE cast(std_resret#94028144 as float) END AS std_resret#94028385, CASE WHEN ((Sharpe_resret#94028145 = NA) OR (Sharpe_resret#94028145 = null)) THEN null ELSE cast(Sharpe_resret#94028145 as float) END AS Sharpe_resret#94028388, CASE WHEN ((PctPos_resret#94028146 = NA) OR (PctPos_resret#94028146 = null)) THEN null ELSE cast(PctPos_resret#94028146 as float) END AS PctPos_resret#94028390, CASE WHEN ((TR_resret#94028147 = NA) OR (TR_resret#94028147 = null)) THEN null ELSE cast(TR_resret#94028147 as float) END AS TR_resret#94028392, CASE WHEN ((IR_resret#94028148 = NA) OR (IR_resret#94028148 = null)) THEN null ELSE cast(IR_resret#94028148 as float) END AS IR_resret#94028394, CASE WHEN ((annual_retnet#94028149 = NA) OR (annual_retnet#94028149 = null)) THEN null ELSE cast(annual_retnet#94028149 as float) END AS annual_retnet#94028396, CASE WHEN ((std_retnet#94028150 = NA) OR (std_retnet#94028150 = null)) THEN null ELSE cast(std_retnet#94028150 as float) END AS std_retnet#94028398, CASE WHEN ((Sharpe_retnet#94028151 = NA) OR (Sharpe_retnet#94028151 = null)) THEN null ELSE cast(Sharpe_retnet#94028151 as float) END AS Sharpe_retnet#94028400, CASE WHEN ((PctPos_retnet#94028152 = NA) OR (PctPos_retnet#94028152 = null)) THEN null ELSE cast(PctPos_retnet#94028152 as float) END AS PctPos_retnet#94028401, CASE WHEN ((TR_retnet#94028153 = NA) OR (TR_retnet#94028153 = null)) THEN null ELSE cast(TR_retnet#94028153 as float) END AS TR_retnet#94028404, CASE WHEN ((IR_retnet#94028154 = NA) OR (IR_retnet#94028154 = null)) THEN null ELSE cast(IR_retnet#94028154 as float) END AS IR_retnet#94028406, CASE WHEN ((turnover#94028155 = NA) OR (turnover#94028155 = null)) THEN null ELSE cast(turnover#94028155 as float) END AS turnover#94028408] +- FileScan csv [year#94028131,retIC#94028132,resretIC#94028133,numcos#94028134,numdates#94028135,annual_bmret#94028136,annual_ret#94028137,std_ret#94028138,Sharpe_ret#94028139,PctPos_ret#94028140,TR_ret#94028141,IR_ret#94028142,annual_resret#94028143,std_resret#94028144,Sharpe_resret#94028145,PctPos_resret#94028146,TR_resret#94028147,IR_resret#94028148,annual_retnet#94028149,std_retnet#94028150,Sharpe_retnet#94028151,PctPos_retnet#94028152,TR_retnet#94028153,IR_retnet#94028154,turnover#94028155] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/growth/st..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94028363 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94028408, year#94028363] Arguments: [turnover#94028408, year#94028363], [isnotnull(turnover#94028408)] (4) InMemoryRelation Arguments: [year#94028363, retIC#94028364, resretIC#94028365, numcos#94028366, numdates#94028367, annual_bmret#94028369, annual_ret#94028371, std_ret#94028373, Sharpe_ret#94028375, PctPos_ret#94028377, TR_ret#94028380, IR_ret#94028381, annual_resret#94028383, std_resret#94028385, Sharpe_resret#94028388, PctPos_resret#94028390, TR_resret#94028392, IR_resret#94028394, annual_retnet#94028396, std_retnet#94028398, Sharpe_retnet#94028400, PctPos_retnet#94028401, TR_retnet#94028404, IR_retnet#94028406, turnover#94028408], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94028131 = NA) OR (year#94028131 = null)) THEN null ELSE cast(year#94028131 as int) END AS year#94028363, CASE WHEN ((retIC#94028132 = NA) OR (retIC#94028132 = null)) THEN null ELSE cast(retIC#94028132 as float) END AS retIC#94028364, CASE WHEN ((resretIC#94028133 = NA) OR (resretIC#94028133 = null)) THEN null ELSE cast(resretIC#94028133 as float) END AS resretIC#94028365, CASE WHEN ((numcos#94028134 = NA) OR (numcos#94028134 = null)) THEN null ELSE cast(numcos#94028134 as float) END AS numcos#94028366, CASE WHEN ((numdates#94028135 = NA) OR (numdates#94028135 = null)) THEN null ELSE cast(numdates#94028135 as int) END AS numdates#94028367, CASE WHEN ((annual_bmret#94028136 = NA) OR (annual_bmret#94028136 = null)) THEN null ELSE cast(annual_bmret#94028136 as float) END AS annual_bmret#94028369, CASE WHEN ((annual_ret#94028137 = NA) OR (annual_ret#94028137 = null)) THEN null ELSE cast(annual_ret#94028137 as float) END AS annual_ret#94028371, CASE WHEN ((std_ret#94028138 = NA) OR (std_ret#94028138 = null)) THEN null ELSE cast(std_ret#94028138 as float) END AS std_ret#94028373, CASE WHEN ((Sharpe_ret#94028139 = NA) OR (Sharpe_ret#94028139 = null)) THEN null ELSE cast(Sharpe_ret#94028139 as float) END AS Sharpe_ret#94028375, CASE WHEN ((PctPos_ret#94028140 = NA) OR (PctPos_ret#94028140 = null)) THEN null ELSE cast(PctPos_ret#94028140 as float) END AS PctPos_ret#94028377, CASE WHEN ((TR_ret#94028141 = NA) OR (TR_ret#94028141 = null)) THEN null ELSE cast(TR_ret#94028141 as float) END AS TR_ret#94028380, CASE WHEN ((IR_ret#94028142 = NA) OR (IR_ret#94028142 = null)) THEN null ELSE cast(IR_ret#94028142 as float) END AS IR_ret#94028381, CASE WHEN ((annual_resret#94028143 = NA) OR (annual_resret#94028143 = null)) THEN null ELSE cast(annual_resret#94028143 as float) END AS annual_resret#94028383, CASE WHEN ((std_resret#94028144 = NA) OR (std_resret#94028144 = null)) THEN null ELSE cast(std_resret#94028144 as float) END AS std_resret#94028385, CASE WHEN ((Sharpe_resret#94028145 = NA) OR (Sharpe_resret#94028145 = null)) THEN null ELSE cast(Sharpe_resret#94028145 as float) END AS Sharpe_resret#94028388, CASE WHEN ((PctPos_resret#94028146 = NA) OR (PctPos_resret#94028146 = null)) THEN null ELSE cast(PctPos_resret#94028146 as float) END AS PctPos_resret#94028390, CASE WHEN ((TR_resret#94028147 = NA) OR (TR_resret#94028147 = null)) THEN null ELSE cast(TR_resret#94028147 as float) END AS TR_resret#94028392, CASE WHEN ((IR_resret#94028148 = NA) OR (IR_resret#94028148 = null)) THEN null ELSE cast(IR_resret#94028148 as float) END AS IR_resret#94028394, CASE WHEN ((annual_retnet#94028149 = NA) OR (annual_retnet#94028149 = null)) THEN null ELSE cast(annual_retnet#94028149 as float) END AS annual_retnet#94028396, CASE WHEN ((std_retnet#94028150 = NA) OR (std_retnet#94028150 = null)) THEN null ELSE cast(std_retnet#94028150 as float) END AS std_retnet#94028398, CASE WHEN ((Sharpe_retnet#94028151 = NA) OR (Sharpe_retnet#94028151 = null)) THEN null ELSE cast(Sharpe_retnet#94028151 as float) END AS Sharpe_retnet#94028400, CASE WHEN ((PctPos_retnet#94028152 = NA) OR (PctPos_retnet#94028152 = null)) THEN null ELSE cast(PctPos_retnet#94028152 as float) END AS PctPos_retnet#94028401, CASE WHEN ((TR_retnet#94028153 = NA) OR (TR_retnet#94028153 = null)) THEN null ELSE cast(TR_retnet#94028153 as float) END AS TR_retnet#94028404, CASE WHEN ((IR_retnet#94028154 = NA) OR (IR_retnet#94028154 = null)) THEN null ELSE cast(IR_retnet#94028154 as float) END AS IR_retnet#94028406, CASE WHEN ((turnover#94028155 = NA) OR (turnover#94028155 = null)) THEN null ELSE cast(turnover#94028155 as float) END AS turnover#94028408] +- FileScan csv [year#94028131,retIC#94028132,resretIC#94028133,numcos#94028134,numdates#94028135,annual_bmret#94028136,annual_ret#94028137,std_ret#94028138,Sharpe_ret#94028139,PctPos_ret#94028140,TR_ret#94028141,IR_ret#94028142,annual_resret#94028143,std_resret#94028144,Sharpe_resret#94028145,PctPos_resret#94028146,TR_resret#94028147,IR_resret#94028148,annual_retnet#94028149,std_retnet#94028150,Sharpe_retnet#94028151,PctPos_retnet#94028152,TR_retnet#94028153,IR_retnet#94028154,turnover#94028155] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/growth/st..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94028131, retIC#94028132, resretIC#94028133, numcos#94028134, numdates#94028135, annual_bmret#94028136, annual_ret#94028137, std_ret#94028138, Sharpe_ret#94028139, PctPos_ret#94028140, TR_ret#94028141, IR_ret#94028142, annual_resret#94028143, std_resret#94028144, Sharpe_resret#94028145, PctPos_resret#94028146, TR_resret#94028147, IR_resret#94028148, annual_retnet#94028149, std_retnet#94028150, Sharpe_retnet#94028151, PctPos_retnet#94028152, TR_retnet#94028153, IR_retnet#94028154, turnover#94028155] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/growth/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94028131 = NA) OR (year#94028131 = null)) THEN null ELSE cast(year#94028131 as int) END AS year#94028363, CASE WHEN ((retIC#94028132 = NA) OR (retIC#94028132 = null)) THEN null ELSE cast(retIC#94028132 as float) END AS retIC#94028364, CASE WHEN ((resretIC#94028133 = NA) OR (resretIC#94028133 = null)) THEN null ELSE cast(resretIC#94028133 as float) END AS resretIC#94028365, CASE WHEN ((numcos#94028134 = NA) OR (numcos#94028134 = null)) THEN null ELSE cast(numcos#94028134 as float) END AS numcos#94028366, CASE WHEN ((numdates#94028135 = NA) OR (numdates#94028135 = null)) THEN null ELSE cast(numdates#94028135 as int) END AS numdates#94028367, CASE WHEN ((annual_bmret#94028136 = NA) OR (annual_bmret#94028136 = null)) THEN null ELSE cast(annual_bmret#94028136 as float) END AS annual_bmret#94028369, CASE WHEN ((annual_ret#94028137 = NA) OR (annual_ret#94028137 = null)) THEN null ELSE cast(annual_ret#94028137 as float) END AS annual_ret#94028371, CASE WHEN ((std_ret#94028138 = NA) OR (std_ret#94028138 = null)) THEN null ELSE cast(std_ret#94028138 as float) END AS std_ret#94028373, CASE WHEN ((Sharpe_ret#94028139 = NA) OR (Sharpe_ret#94028139 = null)) THEN null ELSE cast(Sharpe_ret#94028139 as float) END AS Sharpe_ret#94028375, CASE WHEN ((PctPos_ret#94028140 = NA) OR (PctPos_ret#94028140 = null)) THEN null ELSE cast(PctPos_ret#94028140 as float) END AS PctPos_ret#94028377, CASE WHEN ((TR_ret#94028141 = NA) OR (TR_ret#94028141 = null)) THEN null ELSE cast(TR_ret#94028141 as float) END AS TR_ret#94028380, CASE WHEN ((IR_ret#94028142 = NA) OR (IR_ret#94028142 = null)) THEN null ELSE cast(IR_ret#94028142 as float) END AS IR_ret#94028381, CASE WHEN ((annual_resret#94028143 = NA) OR (annual_resret#94028143 = null)) THEN null ELSE cast(annual_resret#94028143 as float) END AS annual_resret#94028383, CASE WHEN ((std_resret#94028144 = NA) OR (std_resret#94028144 = null)) THEN null ELSE cast(std_resret#94028144 as float) END AS std_resret#94028385, CASE WHEN ((Sharpe_resret#94028145 = NA) OR (Sharpe_resret#94028145 = null)) THEN null ELSE cast(Sharpe_resret#94028145 as float) END AS Sharpe_resret#94028388, CASE WHEN ((PctPos_resret#94028146 = NA) OR (PctPos_resret#94028146 = null)) THEN null ELSE cast(PctPos_resret#94028146 as float) END AS PctPos_resret#94028390, CASE WHEN ((TR_resret#94028147 = NA) OR (TR_resret#94028147 = null)) THEN null ELSE cast(TR_resret#94028147 as float) END AS TR_resret#94028392, CASE WHEN ((IR_resret#94028148 = NA) OR (IR_resret#94028148 = null)) THEN null ELSE cast(IR_resret#94028148 as float) END AS IR_resret#94028394, CASE WHEN ((annual_retnet#94028149 = NA) OR (annual_retnet#94028149 = null)) THEN null ELSE cast(annual_retnet#94028149 as float) END AS annual_retnet#94028396, CASE WHEN ((std_retnet#94028150 = NA) OR (std_retnet#94028150 = null)) THEN null ELSE cast(std_retnet#94028150 as float) END AS std_retnet#94028398, CASE WHEN ((Sharpe_retnet#94028151 = NA) OR (Sharpe_retnet#94028151 = null)) THEN null ELSE cast(Sharpe_retnet#94028151 as float) END AS Sharpe_retnet#94028400, CASE WHEN ((PctPos_retnet#94028152 = NA) OR (PctPos_retnet#94028152 = null)) THEN null ELSE cast(PctPos_retnet#94028152 as float) END AS PctPos_retnet#94028401, CASE WHEN ((TR_retnet#94028153 = NA) OR (TR_retnet#94028153 = null)) THEN null ELSE cast(TR_retnet#94028153 as float) END AS TR_retnet#94028404, CASE WHEN ((IR_retnet#94028154 = NA) OR (IR_retnet#94028154 = null)) THEN null ELSE cast(IR_retnet#94028154 as float) END AS IR_retnet#94028406, CASE WHEN ((turnover#94028155 = NA) OR (turnover#94028155 = null)) THEN null ELSE cast(turnover#94028155 as float) END AS turnover#94028408] Input [25]: [year#94028131, retIC#94028132, resretIC#94028133, numcos#94028134, numdates#94028135, annual_bmret#94028136, annual_ret#94028137, std_ret#94028138, Sharpe_ret#94028139, PctPos_ret#94028140, TR_ret#94028141, IR_ret#94028142, annual_resret#94028143, std_resret#94028144, Sharpe_resret#94028145, PctPos_resret#94028146, TR_resret#94028147, IR_resret#94028148, annual_retnet#94028149, std_retnet#94028150, Sharpe_retnet#94028151, PctPos_retnet#94028152, TR_retnet#94028153, IR_retnet#94028154, turnover#94028155] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94028408, year#94028363] (8) Filter [codegen id : 1] Input [2]: [turnover#94028408, year#94028363] Condition : isnotnull(turnover#94028408) (9) Project [codegen id : 1] Output [3]: [year#94028363, turnover#94028408, (1.0 / cast(turnover#94028408 as double)) AS days_hold#94028446] Input [2]: [turnover#94028408, year#94028363] (10) Exchange Input [3]: [year#94028363, turnover#94028408, days_hold#94028446] Arguments: rangepartitioning(year#94028363 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7507373] (11) Sort [codegen id : 2] Input [3]: [year#94028363, turnover#94028408, days_hold#94028446] Arguments: [year#94028363 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94028363, turnover#94028408, days_hold#94028446] (13) CollectLimit Input [3]: [year#94028363, turnover#94028408, days_hold#94028446] Arguments: 1000000