== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94769238, turnover#94769362, days_hold#94769436] Arguments: [year#94769238, turnover#94769362, days_hold#94769436] (2) InMemoryRelation Arguments: [year#94769238, turnover#94769362, days_hold#94769436], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94769238 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94769238 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7566131] +- *(1) Project [year#94769238, turnover#94769362, (1.0 / cast(turnover#94769362 as double)) AS days_hold#94769436] +- *(1) Filter isnotnull(turnover#94769362) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94769362, year#94769238], [isnotnull(turnover#94769362)] +- InMemoryRelation [year#94769238, retIC#94769239, resretIC#94769240, numcos#94769241, numdates#94769242, annual_bmret#94769243, annual_ret#94769244, std_ret#94769245, Sharpe_ret#94769246, PctPos_ret#94769247, TR_ret#94769249, IR_ret#94769250, annual_resret#94769251, std_resret#94769252, Sharpe_resret#94769253, PctPos_resret#94769254, TR_resret#94769255, IR_resret#94769256, annual_retnet#94769257, std_retnet#94769258, Sharpe_retnet#94769292, PctPos_retnet#94769326, TR_retnet#94769327, IR_retnet#94769328, turnover#94769362], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94769137 = NA) OR (year#94769137 = null)) THEN null ELSE cast(year#94769137 as float) END AS year#94769238, CASE WHEN ((retIC#94769138 = NA) OR (retIC#94769138 = null)) THEN null ELSE cast(retIC#94769138 as float) END AS retIC#94769239, CASE WHEN ((resretIC#94769139 = NA) OR (resretIC#94769139 = null)) THEN null ELSE cast(resretIC#94769139 as float) END AS resretIC#94769240, CASE WHEN ((numcos#94769140 = NA) OR (numcos#94769140 = null)) THEN null ELSE cast(numcos#94769140 as float) END AS numcos#94769241, CASE WHEN ((numdates#94769141 = NA) OR (numdates#94769141 = null)) THEN null ELSE cast(numdates#94769141 as int) END AS numdates#94769242, CASE WHEN ((annual_bmret#94769142 = NA) OR (annual_bmret#94769142 = null)) THEN null ELSE cast(annual_bmret#94769142 as float) END AS annual_bmret#94769243, CASE WHEN ((annual_ret#94769143 = NA) OR (annual_ret#94769143 = null)) THEN null ELSE cast(annual_ret#94769143 as float) END AS annual_ret#94769244, CASE WHEN ((std_ret#94769144 = NA) OR (std_ret#94769144 = null)) THEN null ELSE cast(std_ret#94769144 as float) END AS std_ret#94769245, CASE WHEN ((Sharpe_ret#94769145 = NA) OR (Sharpe_ret#94769145 = null)) THEN null ELSE cast(Sharpe_ret#94769145 as float) END AS Sharpe_ret#94769246, CASE WHEN ((PctPos_ret#94769146 = NA) OR (PctPos_ret#94769146 = null)) THEN null ELSE cast(PctPos_ret#94769146 as float) END AS PctPos_ret#94769247, CASE WHEN ((TR_ret#94769147 = NA) OR (TR_ret#94769147 = null)) THEN null ELSE cast(TR_ret#94769147 as float) END AS TR_ret#94769249, CASE WHEN ((IR_ret#94769148 = NA) OR (IR_ret#94769148 = null)) THEN null ELSE cast(IR_ret#94769148 as float) END AS IR_ret#94769250, CASE WHEN ((annual_resret#94769149 = NA) OR (annual_resret#94769149 = null)) THEN null ELSE cast(annual_resret#94769149 as float) END AS annual_resret#94769251, CASE WHEN ((std_resret#94769150 = NA) OR (std_resret#94769150 = null)) THEN null ELSE cast(std_resret#94769150 as float) END AS std_resret#94769252, CASE WHEN ((Sharpe_resret#94769151 = NA) OR (Sharpe_resret#94769151 = null)) THEN null ELSE cast(Sharpe_resret#94769151 as float) END AS Sharpe_resret#94769253, CASE WHEN ((PctPos_resret#94769152 = NA) OR (PctPos_resret#94769152 = null)) THEN null ELSE cast(PctPos_resret#94769152 as float) END AS PctPos_resret#94769254, CASE WHEN ((TR_resret#94769153 = NA) OR (TR_resret#94769153 = null)) THEN null ELSE cast(TR_resret#94769153 as float) END AS TR_resret#94769255, CASE WHEN ((IR_resret#94769154 = NA) OR (IR_resret#94769154 = null)) THEN null ELSE cast(IR_resret#94769154 as float) END AS IR_resret#94769256, CASE WHEN ((annual_retnet#94769155 = NA) OR (annual_retnet#94769155 = null)) THEN null ELSE cast(annual_retnet#94769155 as float) END AS annual_retnet#94769257, CASE WHEN ((std_retnet#94769156 = NA) OR (std_retnet#94769156 = null)) THEN null ELSE cast(std_retnet#94769156 as float) END AS std_retnet#94769258, CASE WHEN ((Sharpe_retnet#94769157 = NA) OR (Sharpe_retnet#94769157 = null)) THEN null ELSE cast(Sharpe_retnet#94769157 as float) END AS Sharpe_retnet#94769292, CASE WHEN ((PctPos_retnet#94769158 = NA) OR (PctPos_retnet#94769158 = null)) THEN null ELSE cast(PctPos_retnet#94769158 as float) END AS PctPos_retnet#94769326, CASE WHEN ((TR_retnet#94769159 = NA) OR (TR_retnet#94769159 = null)) THEN null ELSE cast(TR_retnet#94769159 as float) END AS TR_retnet#94769327, CASE WHEN ((IR_retnet#94769160 = NA) OR (IR_retnet#94769160 = null)) THEN null ELSE cast(IR_retnet#94769160 as float) END AS IR_retnet#94769328, CASE WHEN ((turnover#94769161 = NA) OR (turnover#94769161 = null)) THEN null ELSE cast(turnover#94769161 as float) END AS turnover#94769362] +- FileScan csv [year#94769137,retIC#94769138,resretIC#94769139,numcos#94769140,numdates#94769141,annual_bmret#94769142,annual_ret#94769143,std_ret#94769144,Sharpe_ret#94769145,PctPos_ret#94769146,TR_ret#94769147,IR_ret#94769148,annual_resret#94769149,std_resret#94769150,Sharpe_resret#94769151,PctPos_resret#94769152,TR_resret#94769153,IR_resret#94769154,annual_retnet#94769155,std_retnet#94769156,Sharpe_retnet#94769157,PctPos_retnet#94769158,TR_retnet#94769159,IR_retnet#94769160,turnover#94769161] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/esg_innovation/innovat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94769238 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94769362, year#94769238] Arguments: [turnover#94769362, year#94769238], [isnotnull(turnover#94769362)] (4) InMemoryRelation Arguments: [year#94769238, retIC#94769239, resretIC#94769240, numcos#94769241, numdates#94769242, annual_bmret#94769243, annual_ret#94769244, std_ret#94769245, Sharpe_ret#94769246, PctPos_ret#94769247, TR_ret#94769249, IR_ret#94769250, annual_resret#94769251, std_resret#94769252, Sharpe_resret#94769253, PctPos_resret#94769254, TR_resret#94769255, IR_resret#94769256, annual_retnet#94769257, std_retnet#94769258, Sharpe_retnet#94769292, PctPos_retnet#94769326, TR_retnet#94769327, IR_retnet#94769328, turnover#94769362], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94769137 = NA) OR (year#94769137 = null)) THEN null ELSE cast(year#94769137 as float) END AS year#94769238, CASE WHEN ((retIC#94769138 = NA) OR (retIC#94769138 = null)) THEN null ELSE cast(retIC#94769138 as float) END AS retIC#94769239, CASE WHEN ((resretIC#94769139 = NA) OR (resretIC#94769139 = null)) THEN null ELSE cast(resretIC#94769139 as float) END AS resretIC#94769240, CASE WHEN ((numcos#94769140 = NA) OR (numcos#94769140 = null)) THEN null ELSE cast(numcos#94769140 as float) END AS numcos#94769241, CASE WHEN ((numdates#94769141 = NA) OR (numdates#94769141 = null)) THEN null ELSE cast(numdates#94769141 as int) END AS numdates#94769242, CASE WHEN ((annual_bmret#94769142 = NA) OR (annual_bmret#94769142 = null)) THEN null ELSE cast(annual_bmret#94769142 as float) END AS annual_bmret#94769243, CASE WHEN ((annual_ret#94769143 = NA) OR (annual_ret#94769143 = null)) THEN null ELSE cast(annual_ret#94769143 as float) END AS annual_ret#94769244, CASE WHEN ((std_ret#94769144 = NA) OR (std_ret#94769144 = null)) THEN null ELSE cast(std_ret#94769144 as float) END AS std_ret#94769245, CASE WHEN ((Sharpe_ret#94769145 = NA) OR (Sharpe_ret#94769145 = null)) THEN null ELSE cast(Sharpe_ret#94769145 as float) END AS Sharpe_ret#94769246, CASE WHEN ((PctPos_ret#94769146 = NA) OR (PctPos_ret#94769146 = null)) THEN null ELSE cast(PctPos_ret#94769146 as float) END AS PctPos_ret#94769247, CASE WHEN ((TR_ret#94769147 = NA) OR (TR_ret#94769147 = null)) THEN null ELSE cast(TR_ret#94769147 as float) END AS TR_ret#94769249, CASE WHEN ((IR_ret#94769148 = NA) OR (IR_ret#94769148 = null)) THEN null ELSE cast(IR_ret#94769148 as float) END AS IR_ret#94769250, CASE WHEN ((annual_resret#94769149 = NA) OR (annual_resret#94769149 = null)) THEN null ELSE cast(annual_resret#94769149 as float) END AS annual_resret#94769251, CASE WHEN ((std_resret#94769150 = NA) OR (std_resret#94769150 = null)) THEN null ELSE cast(std_resret#94769150 as float) END AS std_resret#94769252, CASE WHEN ((Sharpe_resret#94769151 = NA) OR (Sharpe_resret#94769151 = null)) THEN null ELSE cast(Sharpe_resret#94769151 as float) END AS Sharpe_resret#94769253, CASE WHEN ((PctPos_resret#94769152 = NA) OR (PctPos_resret#94769152 = null)) THEN null ELSE cast(PctPos_resret#94769152 as float) END AS PctPos_resret#94769254, CASE WHEN ((TR_resret#94769153 = NA) OR (TR_resret#94769153 = null)) THEN null ELSE cast(TR_resret#94769153 as float) END AS TR_resret#94769255, CASE WHEN ((IR_resret#94769154 = NA) OR (IR_resret#94769154 = null)) THEN null ELSE cast(IR_resret#94769154 as float) END AS IR_resret#94769256, CASE WHEN ((annual_retnet#94769155 = NA) OR (annual_retnet#94769155 = null)) THEN null ELSE cast(annual_retnet#94769155 as float) END AS annual_retnet#94769257, CASE WHEN ((std_retnet#94769156 = NA) OR (std_retnet#94769156 = null)) THEN null ELSE cast(std_retnet#94769156 as float) END AS std_retnet#94769258, CASE WHEN ((Sharpe_retnet#94769157 = NA) OR (Sharpe_retnet#94769157 = null)) THEN null ELSE cast(Sharpe_retnet#94769157 as float) END AS Sharpe_retnet#94769292, CASE WHEN ((PctPos_retnet#94769158 = NA) OR (PctPos_retnet#94769158 = null)) THEN null ELSE cast(PctPos_retnet#94769158 as float) END AS PctPos_retnet#94769326, CASE WHEN ((TR_retnet#94769159 = NA) OR (TR_retnet#94769159 = null)) THEN null ELSE cast(TR_retnet#94769159 as float) END AS TR_retnet#94769327, CASE WHEN ((IR_retnet#94769160 = NA) OR (IR_retnet#94769160 = null)) THEN null ELSE cast(IR_retnet#94769160 as float) END AS IR_retnet#94769328, CASE WHEN ((turnover#94769161 = NA) OR (turnover#94769161 = null)) THEN null ELSE cast(turnover#94769161 as float) END AS turnover#94769362] +- FileScan csv [year#94769137,retIC#94769138,resretIC#94769139,numcos#94769140,numdates#94769141,annual_bmret#94769142,annual_ret#94769143,std_ret#94769144,Sharpe_ret#94769145,PctPos_ret#94769146,TR_ret#94769147,IR_ret#94769148,annual_resret#94769149,std_resret#94769150,Sharpe_resret#94769151,PctPos_resret#94769152,TR_resret#94769153,IR_resret#94769154,annual_retnet#94769155,std_retnet#94769156,Sharpe_retnet#94769157,PctPos_retnet#94769158,TR_retnet#94769159,IR_retnet#94769160,turnover#94769161] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/esg_innovation/innovat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94769137, retIC#94769138, resretIC#94769139, numcos#94769140, numdates#94769141, annual_bmret#94769142, annual_ret#94769143, std_ret#94769144, Sharpe_ret#94769145, PctPos_ret#94769146, TR_ret#94769147, IR_ret#94769148, annual_resret#94769149, std_resret#94769150, Sharpe_resret#94769151, PctPos_resret#94769152, TR_resret#94769153, IR_resret#94769154, annual_retnet#94769155, std_retnet#94769156, Sharpe_retnet#94769157, PctPos_retnet#94769158, TR_retnet#94769159, IR_retnet#94769160, turnover#94769161] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/esg_innovation/innovation/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94769137 = NA) OR (year#94769137 = null)) THEN null ELSE cast(year#94769137 as float) END AS year#94769238, CASE WHEN ((retIC#94769138 = NA) OR (retIC#94769138 = null)) THEN null ELSE cast(retIC#94769138 as float) END AS retIC#94769239, CASE WHEN ((resretIC#94769139 = NA) OR (resretIC#94769139 = null)) THEN null ELSE cast(resretIC#94769139 as float) END AS resretIC#94769240, CASE WHEN ((numcos#94769140 = NA) OR (numcos#94769140 = null)) THEN null ELSE cast(numcos#94769140 as float) END AS numcos#94769241, CASE WHEN ((numdates#94769141 = NA) OR (numdates#94769141 = null)) THEN null ELSE cast(numdates#94769141 as int) END AS numdates#94769242, CASE WHEN ((annual_bmret#94769142 = NA) OR (annual_bmret#94769142 = null)) THEN null ELSE cast(annual_bmret#94769142 as float) END AS annual_bmret#94769243, CASE WHEN ((annual_ret#94769143 = NA) OR (annual_ret#94769143 = null)) THEN null ELSE cast(annual_ret#94769143 as float) END AS annual_ret#94769244, CASE WHEN ((std_ret#94769144 = NA) OR (std_ret#94769144 = null)) THEN null ELSE cast(std_ret#94769144 as float) END AS std_ret#94769245, CASE WHEN ((Sharpe_ret#94769145 = NA) OR (Sharpe_ret#94769145 = null)) THEN null ELSE cast(Sharpe_ret#94769145 as float) END AS Sharpe_ret#94769246, CASE WHEN ((PctPos_ret#94769146 = NA) OR (PctPos_ret#94769146 = null)) THEN null ELSE cast(PctPos_ret#94769146 as float) END AS PctPos_ret#94769247, CASE WHEN ((TR_ret#94769147 = NA) OR (TR_ret#94769147 = null)) THEN null ELSE cast(TR_ret#94769147 as float) END AS TR_ret#94769249, CASE WHEN ((IR_ret#94769148 = NA) OR (IR_ret#94769148 = null)) THEN null ELSE cast(IR_ret#94769148 as float) END AS IR_ret#94769250, CASE WHEN ((annual_resret#94769149 = NA) OR (annual_resret#94769149 = null)) THEN null ELSE cast(annual_resret#94769149 as float) END AS annual_resret#94769251, CASE WHEN ((std_resret#94769150 = NA) OR (std_resret#94769150 = null)) THEN null ELSE cast(std_resret#94769150 as float) END AS std_resret#94769252, CASE WHEN ((Sharpe_resret#94769151 = NA) OR (Sharpe_resret#94769151 = null)) THEN null ELSE cast(Sharpe_resret#94769151 as float) END AS Sharpe_resret#94769253, CASE WHEN ((PctPos_resret#94769152 = NA) OR (PctPos_resret#94769152 = null)) THEN null ELSE cast(PctPos_resret#94769152 as float) END AS PctPos_resret#94769254, CASE WHEN ((TR_resret#94769153 = NA) OR (TR_resret#94769153 = null)) THEN null ELSE cast(TR_resret#94769153 as float) END AS TR_resret#94769255, CASE WHEN ((IR_resret#94769154 = NA) OR (IR_resret#94769154 = null)) THEN null ELSE cast(IR_resret#94769154 as float) END AS IR_resret#94769256, CASE WHEN ((annual_retnet#94769155 = NA) OR (annual_retnet#94769155 = null)) THEN null ELSE cast(annual_retnet#94769155 as float) END AS annual_retnet#94769257, CASE WHEN ((std_retnet#94769156 = NA) OR (std_retnet#94769156 = null)) THEN null ELSE cast(std_retnet#94769156 as float) END AS std_retnet#94769258, CASE WHEN ((Sharpe_retnet#94769157 = NA) OR (Sharpe_retnet#94769157 = null)) THEN null ELSE cast(Sharpe_retnet#94769157 as float) END AS Sharpe_retnet#94769292, CASE WHEN ((PctPos_retnet#94769158 = NA) OR (PctPos_retnet#94769158 = null)) THEN null ELSE cast(PctPos_retnet#94769158 as float) END AS PctPos_retnet#94769326, CASE WHEN ((TR_retnet#94769159 = NA) OR (TR_retnet#94769159 = null)) THEN null ELSE cast(TR_retnet#94769159 as float) END AS TR_retnet#94769327, CASE WHEN ((IR_retnet#94769160 = NA) OR (IR_retnet#94769160 = null)) THEN null ELSE cast(IR_retnet#94769160 as float) END AS IR_retnet#94769328, CASE WHEN ((turnover#94769161 = NA) OR (turnover#94769161 = null)) THEN null ELSE cast(turnover#94769161 as float) END AS turnover#94769362] Input [25]: [year#94769137, retIC#94769138, resretIC#94769139, numcos#94769140, numdates#94769141, annual_bmret#94769142, annual_ret#94769143, std_ret#94769144, Sharpe_ret#94769145, PctPos_ret#94769146, TR_ret#94769147, IR_ret#94769148, annual_resret#94769149, std_resret#94769150, Sharpe_resret#94769151, PctPos_resret#94769152, TR_resret#94769153, IR_resret#94769154, annual_retnet#94769155, std_retnet#94769156, Sharpe_retnet#94769157, PctPos_retnet#94769158, TR_retnet#94769159, IR_retnet#94769160, turnover#94769161] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94769362, year#94769238] (8) Filter [codegen id : 1] Input [2]: [turnover#94769362, year#94769238] Condition : isnotnull(turnover#94769362) (9) Project [codegen id : 1] Output [3]: [year#94769238, turnover#94769362, (1.0 / cast(turnover#94769362 as double)) AS days_hold#94769436] Input [2]: [turnover#94769362, year#94769238] (10) Exchange Input [3]: [year#94769238, turnover#94769362, days_hold#94769436] Arguments: rangepartitioning(year#94769238 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7566131] (11) Sort [codegen id : 2] Input [3]: [year#94769238, turnover#94769362, days_hold#94769436] Arguments: [year#94769238 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94769238, turnover#94769362, days_hold#94769436] (13) CollectLimit Input [3]: [year#94769238, turnover#94769362, days_hold#94769436] Arguments: 1000000