== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94129013, turnover#94129069, days_hold#94129128] Arguments: [year#94129013, turnover#94129069, days_hold#94129128] (2) InMemoryRelation Arguments: [year#94129013, turnover#94129069, days_hold#94129128], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94129013 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94129013 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7515398] +- *(1) Project [year#94129013, turnover#94129069, (1.0 / cast(turnover#94129069 as double)) AS days_hold#94129128] +- *(1) Filter isnotnull(turnover#94129069) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94129069, year#94129013], [isnotnull(turnover#94129069)] +- InMemoryRelation [year#94129013, retIC#94129015, resretIC#94129017, numcos#94129020, numdates#94129022, annual_bmret#94129024, annual_ret#94129026, std_ret#94129029, Sharpe_ret#94129031, PctPos_ret#94129033, TR_ret#94129035, IR_ret#94129038, annual_resret#94129040, std_resret#94129043, Sharpe_resret#94129045, PctPos_resret#94129047, TR_resret#94129050, IR_resret#94129052, annual_retnet#94129054, std_retnet#94129057, Sharpe_retnet#94129059, PctPos_retnet#94129061, TR_retnet#94129064, IR_retnet#94129066, turnover#94129069], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94128778 = NA) OR (year#94128778 = null)) THEN null ELSE cast(year#94128778 as float) END AS year#94129013, CASE WHEN ((retIC#94128779 = NA) OR (retIC#94128779 = null)) THEN null ELSE cast(retIC#94128779 as float) END AS retIC#94129015, CASE WHEN ((resretIC#94128780 = NA) OR (resretIC#94128780 = null)) THEN null ELSE cast(resretIC#94128780 as float) END AS resretIC#94129017, CASE WHEN ((numcos#94128781 = NA) OR (numcos#94128781 = null)) THEN null ELSE cast(numcos#94128781 as float) END AS numcos#94129020, CASE WHEN ((numdates#94128782 = NA) OR (numdates#94128782 = null)) THEN null ELSE cast(numdates#94128782 as int) END AS numdates#94129022, CASE WHEN ((annual_bmret#94128783 = NA) OR (annual_bmret#94128783 = null)) THEN null ELSE cast(annual_bmret#94128783 as float) END AS annual_bmret#94129024, CASE WHEN ((annual_ret#94128784 = NA) OR (annual_ret#94128784 = null)) THEN null ELSE cast(annual_ret#94128784 as float) END AS annual_ret#94129026, CASE WHEN ((std_ret#94128785 = NA) OR (std_ret#94128785 = null)) THEN null ELSE cast(std_ret#94128785 as float) END AS std_ret#94129029, CASE WHEN ((Sharpe_ret#94128786 = NA) OR (Sharpe_ret#94128786 = null)) THEN null ELSE cast(Sharpe_ret#94128786 as float) END AS Sharpe_ret#94129031, CASE WHEN ((PctPos_ret#94128787 = NA) OR (PctPos_ret#94128787 = null)) THEN null ELSE cast(PctPos_ret#94128787 as float) END AS PctPos_ret#94129033, CASE WHEN ((TR_ret#94128788 = NA) OR (TR_ret#94128788 = null)) THEN null ELSE cast(TR_ret#94128788 as float) END AS TR_ret#94129035, CASE WHEN ((IR_ret#94128789 = NA) OR (IR_ret#94128789 = null)) THEN null ELSE cast(IR_ret#94128789 as float) END AS IR_ret#94129038, CASE WHEN ((annual_resret#94128790 = NA) OR (annual_resret#94128790 = null)) THEN null ELSE cast(annual_resret#94128790 as float) END AS annual_resret#94129040, CASE WHEN ((std_resret#94128791 = NA) OR (std_resret#94128791 = null)) THEN null ELSE cast(std_resret#94128791 as float) END AS std_resret#94129043, CASE WHEN ((Sharpe_resret#94128792 = NA) OR (Sharpe_resret#94128792 = null)) THEN null ELSE cast(Sharpe_resret#94128792 as float) END AS Sharpe_resret#94129045, CASE WHEN ((PctPos_resret#94128793 = NA) OR (PctPos_resret#94128793 = null)) THEN null ELSE cast(PctPos_resret#94128793 as float) END AS PctPos_resret#94129047, CASE WHEN ((TR_resret#94128794 = NA) OR (TR_resret#94128794 = null)) THEN null ELSE cast(TR_resret#94128794 as float) END AS TR_resret#94129050, CASE WHEN ((IR_resret#94128795 = NA) OR (IR_resret#94128795 = null)) THEN null ELSE cast(IR_resret#94128795 as float) END AS IR_resret#94129052, CASE WHEN ((annual_retnet#94128796 = NA) OR (annual_retnet#94128796 = null)) THEN null ELSE cast(annual_retnet#94128796 as float) END AS annual_retnet#94129054, CASE WHEN ((std_retnet#94128797 = NA) OR (std_retnet#94128797 = null)) THEN null ELSE cast(std_retnet#94128797 as float) END AS std_retnet#94129057, CASE WHEN ((Sharpe_retnet#94128798 = NA) OR (Sharpe_retnet#94128798 = null)) THEN null ELSE cast(Sharpe_retnet#94128798 as float) END AS Sharpe_retnet#94129059, CASE WHEN ((PctPos_retnet#94128799 = NA) OR (PctPos_retnet#94128799 = null)) THEN null ELSE cast(PctPos_retnet#94128799 as float) END AS PctPos_retnet#94129061, CASE WHEN ((TR_retnet#94128800 = NA) OR (TR_retnet#94128800 = null)) THEN null ELSE cast(TR_retnet#94128800 as float) END AS TR_retnet#94129064, CASE WHEN ((IR_retnet#94128801 = NA) OR (IR_retnet#94128801 = null)) THEN null ELSE cast(IR_retnet#94128801 as float) END AS IR_retnet#94129066, CASE WHEN ((turnover#94128802 = NA) OR (turnover#94128802 = null)) THEN null ELSE cast(turnover#94128802 as float) END AS turnover#94129069] +- FileScan csv [year#94128778,retIC#94128779,resretIC#94128780,numcos#94128781,numdates#94128782,annual_bmret#94128783,annual_ret#94128784,std_ret#94128785,Sharpe_ret#94128786,PctPos_ret#94128787,TR_ret#94128788,IR_ret#94128789,annual_resret#94128790,std_resret#94128791,Sharpe_resret#94128792,PctPos_resret#94128793,TR_resret#94128794,IR_resret#94128795,annual_retnet#94128796,std_retnet#94128797,Sharpe_retnet#94128798,PctPos_retnet#94128799,TR_retnet#94128800,IR_retnet#94128801,turnover#94128802] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/transcripts/transcript..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94129013 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94129069, year#94129013] Arguments: [turnover#94129069, year#94129013], [isnotnull(turnover#94129069)] (4) InMemoryRelation Arguments: [year#94129013, retIC#94129015, resretIC#94129017, numcos#94129020, numdates#94129022, annual_bmret#94129024, annual_ret#94129026, std_ret#94129029, Sharpe_ret#94129031, PctPos_ret#94129033, TR_ret#94129035, IR_ret#94129038, annual_resret#94129040, std_resret#94129043, Sharpe_resret#94129045, PctPos_resret#94129047, TR_resret#94129050, IR_resret#94129052, annual_retnet#94129054, std_retnet#94129057, Sharpe_retnet#94129059, PctPos_retnet#94129061, TR_retnet#94129064, IR_retnet#94129066, turnover#94129069], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94128778 = NA) OR (year#94128778 = null)) THEN null ELSE cast(year#94128778 as float) END AS year#94129013, CASE WHEN ((retIC#94128779 = NA) OR (retIC#94128779 = null)) THEN null ELSE cast(retIC#94128779 as float) END AS retIC#94129015, CASE WHEN ((resretIC#94128780 = NA) OR (resretIC#94128780 = null)) THEN null ELSE cast(resretIC#94128780 as float) END AS resretIC#94129017, CASE WHEN ((numcos#94128781 = NA) OR (numcos#94128781 = null)) THEN null ELSE cast(numcos#94128781 as float) END AS numcos#94129020, CASE WHEN ((numdates#94128782 = NA) OR (numdates#94128782 = null)) THEN null ELSE cast(numdates#94128782 as int) END AS numdates#94129022, CASE WHEN ((annual_bmret#94128783 = NA) OR (annual_bmret#94128783 = null)) THEN null ELSE cast(annual_bmret#94128783 as float) END AS annual_bmret#94129024, CASE WHEN ((annual_ret#94128784 = NA) OR (annual_ret#94128784 = null)) THEN null ELSE cast(annual_ret#94128784 as float) END AS annual_ret#94129026, CASE WHEN ((std_ret#94128785 = NA) OR (std_ret#94128785 = null)) THEN null ELSE cast(std_ret#94128785 as float) END AS std_ret#94129029, CASE WHEN ((Sharpe_ret#94128786 = NA) OR (Sharpe_ret#94128786 = null)) THEN null ELSE cast(Sharpe_ret#94128786 as float) END AS Sharpe_ret#94129031, CASE WHEN ((PctPos_ret#94128787 = NA) OR (PctPos_ret#94128787 = null)) THEN null ELSE cast(PctPos_ret#94128787 as float) END AS PctPos_ret#94129033, CASE WHEN ((TR_ret#94128788 = NA) OR (TR_ret#94128788 = null)) THEN null ELSE cast(TR_ret#94128788 as float) END AS TR_ret#94129035, CASE WHEN ((IR_ret#94128789 = NA) OR (IR_ret#94128789 = null)) THEN null ELSE cast(IR_ret#94128789 as float) END AS IR_ret#94129038, CASE WHEN ((annual_resret#94128790 = NA) OR (annual_resret#94128790 = null)) THEN null ELSE cast(annual_resret#94128790 as float) END AS annual_resret#94129040, CASE WHEN ((std_resret#94128791 = NA) OR (std_resret#94128791 = null)) THEN null ELSE cast(std_resret#94128791 as float) END AS std_resret#94129043, CASE WHEN ((Sharpe_resret#94128792 = NA) OR (Sharpe_resret#94128792 = null)) THEN null ELSE cast(Sharpe_resret#94128792 as float) END AS Sharpe_resret#94129045, CASE WHEN ((PctPos_resret#94128793 = NA) OR (PctPos_resret#94128793 = null)) THEN null ELSE cast(PctPos_resret#94128793 as float) END AS PctPos_resret#94129047, CASE WHEN ((TR_resret#94128794 = NA) OR (TR_resret#94128794 = null)) THEN null ELSE cast(TR_resret#94128794 as float) END AS TR_resret#94129050, CASE WHEN ((IR_resret#94128795 = NA) OR (IR_resret#94128795 = null)) THEN null ELSE cast(IR_resret#94128795 as float) END AS IR_resret#94129052, CASE WHEN ((annual_retnet#94128796 = NA) OR (annual_retnet#94128796 = null)) THEN null ELSE cast(annual_retnet#94128796 as float) END AS annual_retnet#94129054, CASE WHEN ((std_retnet#94128797 = NA) OR (std_retnet#94128797 = null)) THEN null ELSE cast(std_retnet#94128797 as float) END AS std_retnet#94129057, CASE WHEN ((Sharpe_retnet#94128798 = NA) OR (Sharpe_retnet#94128798 = null)) THEN null ELSE cast(Sharpe_retnet#94128798 as float) END AS Sharpe_retnet#94129059, CASE WHEN ((PctPos_retnet#94128799 = NA) OR (PctPos_retnet#94128799 = null)) THEN null ELSE cast(PctPos_retnet#94128799 as float) END AS PctPos_retnet#94129061, CASE WHEN ((TR_retnet#94128800 = NA) OR (TR_retnet#94128800 = null)) THEN null ELSE cast(TR_retnet#94128800 as float) END AS TR_retnet#94129064, CASE WHEN ((IR_retnet#94128801 = NA) OR (IR_retnet#94128801 = null)) THEN null ELSE cast(IR_retnet#94128801 as float) END AS IR_retnet#94129066, CASE WHEN ((turnover#94128802 = NA) OR (turnover#94128802 = null)) THEN null ELSE cast(turnover#94128802 as float) END AS turnover#94129069] +- FileScan csv [year#94128778,retIC#94128779,resretIC#94128780,numcos#94128781,numdates#94128782,annual_bmret#94128783,annual_ret#94128784,std_ret#94128785,Sharpe_ret#94128786,PctPos_ret#94128787,TR_ret#94128788,IR_ret#94128789,annual_resret#94128790,std_resret#94128791,Sharpe_resret#94128792,PctPos_resret#94128793,TR_resret#94128794,IR_resret#94128795,annual_retnet#94128796,std_retnet#94128797,Sharpe_retnet#94128798,PctPos_retnet#94128799,TR_retnet#94128800,IR_retnet#94128801,turnover#94128802] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/transcripts/transcript..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94128778, retIC#94128779, resretIC#94128780, numcos#94128781, numdates#94128782, annual_bmret#94128783, annual_ret#94128784, std_ret#94128785, Sharpe_ret#94128786, PctPos_ret#94128787, TR_ret#94128788, IR_ret#94128789, annual_resret#94128790, std_resret#94128791, Sharpe_resret#94128792, PctPos_resret#94128793, TR_resret#94128794, IR_resret#94128795, annual_retnet#94128796, std_retnet#94128797, Sharpe_retnet#94128798, PctPos_retnet#94128799, TR_retnet#94128800, IR_retnet#94128801, turnover#94128802] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/transcripts/transcript_model_residualized/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94128778 = NA) OR (year#94128778 = null)) THEN null ELSE cast(year#94128778 as float) END AS year#94129013, CASE WHEN ((retIC#94128779 = NA) OR (retIC#94128779 = null)) THEN null ELSE cast(retIC#94128779 as float) END AS retIC#94129015, CASE WHEN ((resretIC#94128780 = NA) OR (resretIC#94128780 = null)) THEN null ELSE cast(resretIC#94128780 as float) END AS resretIC#94129017, CASE WHEN ((numcos#94128781 = NA) OR (numcos#94128781 = null)) THEN null ELSE cast(numcos#94128781 as float) END AS numcos#94129020, CASE WHEN ((numdates#94128782 = NA) OR (numdates#94128782 = null)) THEN null ELSE cast(numdates#94128782 as int) END AS numdates#94129022, CASE WHEN ((annual_bmret#94128783 = NA) OR (annual_bmret#94128783 = null)) THEN null ELSE cast(annual_bmret#94128783 as float) END AS annual_bmret#94129024, CASE WHEN ((annual_ret#94128784 = NA) OR (annual_ret#94128784 = null)) THEN null ELSE cast(annual_ret#94128784 as float) END AS annual_ret#94129026, CASE WHEN ((std_ret#94128785 = NA) OR (std_ret#94128785 = null)) THEN null ELSE cast(std_ret#94128785 as float) END AS std_ret#94129029, CASE WHEN ((Sharpe_ret#94128786 = NA) OR (Sharpe_ret#94128786 = null)) THEN null ELSE cast(Sharpe_ret#94128786 as float) END AS Sharpe_ret#94129031, CASE WHEN ((PctPos_ret#94128787 = NA) OR (PctPos_ret#94128787 = null)) THEN null ELSE cast(PctPos_ret#94128787 as float) END AS PctPos_ret#94129033, CASE WHEN ((TR_ret#94128788 = NA) OR (TR_ret#94128788 = null)) THEN null ELSE cast(TR_ret#94128788 as float) END AS TR_ret#94129035, CASE WHEN ((IR_ret#94128789 = NA) OR (IR_ret#94128789 = null)) THEN null ELSE cast(IR_ret#94128789 as float) END AS IR_ret#94129038, CASE WHEN ((annual_resret#94128790 = NA) OR (annual_resret#94128790 = null)) THEN null ELSE cast(annual_resret#94128790 as float) END AS annual_resret#94129040, CASE WHEN ((std_resret#94128791 = NA) OR (std_resret#94128791 = null)) THEN null ELSE cast(std_resret#94128791 as float) END AS std_resret#94129043, CASE WHEN ((Sharpe_resret#94128792 = NA) OR (Sharpe_resret#94128792 = null)) THEN null ELSE cast(Sharpe_resret#94128792 as float) END AS Sharpe_resret#94129045, CASE WHEN ((PctPos_resret#94128793 = NA) OR (PctPos_resret#94128793 = null)) THEN null ELSE cast(PctPos_resret#94128793 as float) END AS PctPos_resret#94129047, CASE WHEN ((TR_resret#94128794 = NA) OR (TR_resret#94128794 = null)) THEN null ELSE cast(TR_resret#94128794 as float) END AS TR_resret#94129050, CASE WHEN ((IR_resret#94128795 = NA) OR (IR_resret#94128795 = null)) THEN null ELSE cast(IR_resret#94128795 as float) END AS IR_resret#94129052, CASE WHEN ((annual_retnet#94128796 = NA) OR (annual_retnet#94128796 = null)) THEN null ELSE cast(annual_retnet#94128796 as float) END AS annual_retnet#94129054, CASE WHEN ((std_retnet#94128797 = NA) OR (std_retnet#94128797 = null)) THEN null ELSE cast(std_retnet#94128797 as float) END AS std_retnet#94129057, CASE WHEN ((Sharpe_retnet#94128798 = NA) OR (Sharpe_retnet#94128798 = null)) THEN null ELSE cast(Sharpe_retnet#94128798 as float) END AS Sharpe_retnet#94129059, CASE WHEN ((PctPos_retnet#94128799 = NA) OR (PctPos_retnet#94128799 = null)) THEN null ELSE cast(PctPos_retnet#94128799 as float) END AS PctPos_retnet#94129061, CASE WHEN ((TR_retnet#94128800 = NA) OR (TR_retnet#94128800 = null)) THEN null ELSE cast(TR_retnet#94128800 as float) END AS TR_retnet#94129064, CASE WHEN ((IR_retnet#94128801 = NA) OR (IR_retnet#94128801 = null)) THEN null ELSE cast(IR_retnet#94128801 as float) END AS IR_retnet#94129066, CASE WHEN ((turnover#94128802 = NA) OR (turnover#94128802 = null)) THEN null ELSE cast(turnover#94128802 as float) END AS turnover#94129069] Input [25]: [year#94128778, retIC#94128779, resretIC#94128780, numcos#94128781, numdates#94128782, annual_bmret#94128783, annual_ret#94128784, std_ret#94128785, Sharpe_ret#94128786, PctPos_ret#94128787, TR_ret#94128788, IR_ret#94128789, annual_resret#94128790, std_resret#94128791, Sharpe_resret#94128792, PctPos_resret#94128793, TR_resret#94128794, IR_resret#94128795, annual_retnet#94128796, std_retnet#94128797, Sharpe_retnet#94128798, PctPos_retnet#94128799, TR_retnet#94128800, IR_retnet#94128801, turnover#94128802] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94129069, year#94129013] (8) Filter [codegen id : 1] Input [2]: [turnover#94129069, year#94129013] Condition : isnotnull(turnover#94129069) (9) Project [codegen id : 1] Output [3]: [year#94129013, turnover#94129069, (1.0 / cast(turnover#94129069 as double)) AS days_hold#94129128] Input [2]: [turnover#94129069, year#94129013] (10) Exchange Input [3]: [year#94129013, turnover#94129069, days_hold#94129128] Arguments: rangepartitioning(year#94129013 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7515398] (11) Sort [codegen id : 2] Input [3]: [year#94129013, turnover#94129069, days_hold#94129128] Arguments: [year#94129013 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94129013, turnover#94129069, days_hold#94129128] (13) CollectLimit Input [3]: [year#94129013, turnover#94129069, days_hold#94129128] Arguments: 1000000