== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94042677, turnover#94042749, days_hold#94042792] Arguments: [year#94042677, turnover#94042749, days_hold#94042792] (2) InMemoryRelation Arguments: [year#94042677, turnover#94042749, days_hold#94042792], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94042677 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94042677 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7508514] +- *(1) Project [year#94042677, turnover#94042749, (1.0 / cast(turnover#94042749 as double)) AS days_hold#94042792] +- *(1) Filter isnotnull(turnover#94042749) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94042749, year#94042677], [isnotnull(turnover#94042749)] +- InMemoryRelation [year#94042677, retIC#94042678, resretIC#94042712, numcos#94042713, numdates#94042714, annual_bmret#94042715, annual_ret#94042716, std_ret#94042717, Sharpe_ret#94042719, PctPos_ret#94042721, TR_ret#94042723, IR_ret#94042724, annual_resret#94042726, std_resret#94042728, Sharpe_resret#94042730, PctPos_resret#94042732, TR_resret#94042733, IR_resret#94042735, annual_retnet#94042737, std_retnet#94042739, Sharpe_retnet#94042741, PctPos_retnet#94042743, TR_retnet#94042744, IR_retnet#94042746, turnover#94042749], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94042478 = NA) OR (year#94042478 = null)) THEN null ELSE cast(year#94042478 as int) END AS year#94042677, CASE WHEN ((retIC#94042479 = NA) OR (retIC#94042479 = null)) THEN null ELSE cast(retIC#94042479 as float) END AS retIC#94042678, CASE WHEN ((resretIC#94042480 = NA) OR (resretIC#94042480 = null)) THEN null ELSE cast(resretIC#94042480 as float) END AS resretIC#94042712, CASE WHEN ((numcos#94042481 = NA) OR (numcos#94042481 = null)) THEN null ELSE cast(numcos#94042481 as float) END AS numcos#94042713, CASE WHEN ((numdates#94042482 = NA) OR (numdates#94042482 = null)) THEN null ELSE cast(numdates#94042482 as int) END AS numdates#94042714, CASE WHEN ((annual_bmret#94042483 = NA) OR (annual_bmret#94042483 = null)) THEN null ELSE cast(annual_bmret#94042483 as float) END AS annual_bmret#94042715, CASE WHEN ((annual_ret#94042484 = NA) OR (annual_ret#94042484 = null)) THEN null ELSE cast(annual_ret#94042484 as float) END AS annual_ret#94042716, CASE WHEN ((std_ret#94042485 = NA) OR (std_ret#94042485 = null)) THEN null ELSE cast(std_ret#94042485 as float) END AS std_ret#94042717, CASE WHEN ((Sharpe_ret#94042486 = NA) OR (Sharpe_ret#94042486 = null)) THEN null ELSE cast(Sharpe_ret#94042486 as float) END AS Sharpe_ret#94042719, CASE WHEN ((PctPos_ret#94042487 = NA) OR (PctPos_ret#94042487 = null)) THEN null ELSE cast(PctPos_ret#94042487 as float) END AS PctPos_ret#94042721, CASE WHEN ((TR_ret#94042488 = NA) OR (TR_ret#94042488 = null)) THEN null ELSE cast(TR_ret#94042488 as float) END AS TR_ret#94042723, CASE WHEN ((IR_ret#94042489 = NA) OR (IR_ret#94042489 = null)) THEN null ELSE cast(IR_ret#94042489 as float) END AS IR_ret#94042724, CASE WHEN ((annual_resret#94042490 = NA) OR (annual_resret#94042490 = null)) THEN null ELSE cast(annual_resret#94042490 as float) END AS annual_resret#94042726, CASE WHEN ((std_resret#94042491 = NA) OR (std_resret#94042491 = null)) THEN null ELSE cast(std_resret#94042491 as float) END AS std_resret#94042728, CASE WHEN ((Sharpe_resret#94042492 = NA) OR (Sharpe_resret#94042492 = null)) THEN null ELSE cast(Sharpe_resret#94042492 as float) END AS Sharpe_resret#94042730, CASE WHEN ((PctPos_resret#94042493 = NA) OR (PctPos_resret#94042493 = null)) THEN null ELSE cast(PctPos_resret#94042493 as float) END AS PctPos_resret#94042732, CASE WHEN ((TR_resret#94042494 = NA) OR (TR_resret#94042494 = null)) THEN null ELSE cast(TR_resret#94042494 as float) END AS TR_resret#94042733, CASE WHEN ((IR_resret#94042495 = NA) OR (IR_resret#94042495 = null)) THEN null ELSE cast(IR_resret#94042495 as float) END AS IR_resret#94042735, CASE WHEN ((annual_retnet#94042496 = NA) OR (annual_retnet#94042496 = null)) THEN null ELSE cast(annual_retnet#94042496 as float) END AS annual_retnet#94042737, CASE WHEN ((std_retnet#94042497 = NA) OR (std_retnet#94042497 = null)) THEN null ELSE cast(std_retnet#94042497 as float) END AS std_retnet#94042739, CASE WHEN ((Sharpe_retnet#94042498 = NA) OR (Sharpe_retnet#94042498 = null)) THEN null ELSE cast(Sharpe_retnet#94042498 as float) END AS Sharpe_retnet#94042741, CASE WHEN ((PctPos_retnet#94042499 = NA) OR (PctPos_retnet#94042499 = null)) THEN null ELSE cast(PctPos_retnet#94042499 as float) END AS PctPos_retnet#94042743, CASE WHEN ((TR_retnet#94042500 = NA) OR (TR_retnet#94042500 = null)) THEN null ELSE cast(TR_retnet#94042500 as float) END AS TR_retnet#94042744, CASE WHEN ((IR_retnet#94042501 = NA) OR (IR_retnet#94042501 = null)) THEN null ELSE cast(IR_retnet#94042501 as float) END AS IR_retnet#94042746, CASE WHEN ((turnover#94042502 = NA) OR (turnover#94042502 = null)) THEN null ELSE cast(turnover#94042502 as float) END AS turnover#94042749] +- FileScan csv [year#94042478,retIC#94042479,resretIC#94042480,numcos#94042481,numdates#94042482,annual_bmret#94042483,annual_ret#94042484,std_ret#94042485,Sharpe_ret#94042486,PctPos_ret#94042487,TR_ret#94042488,IR_ret#94042489,annual_resret#94042490,std_resret#94042491,Sharpe_resret#94042492,PctPos_resret#94042493,TR_resret#94042494,IR_resret#94042495,annual_retnet#94042496,std_retnet#94042497,Sharpe_retnet#94042498,PctPos_retnet#94042499,TR_retnet#94042500,IR_retnet#94042501,turnover#94042502] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/leverage/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94042677 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94042749, year#94042677] Arguments: [turnover#94042749, year#94042677], [isnotnull(turnover#94042749)] (4) InMemoryRelation Arguments: [year#94042677, retIC#94042678, resretIC#94042712, numcos#94042713, numdates#94042714, annual_bmret#94042715, annual_ret#94042716, std_ret#94042717, Sharpe_ret#94042719, PctPos_ret#94042721, TR_ret#94042723, IR_ret#94042724, annual_resret#94042726, std_resret#94042728, Sharpe_resret#94042730, PctPos_resret#94042732, TR_resret#94042733, IR_resret#94042735, annual_retnet#94042737, std_retnet#94042739, Sharpe_retnet#94042741, PctPos_retnet#94042743, TR_retnet#94042744, IR_retnet#94042746, turnover#94042749], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94042478 = NA) OR (year#94042478 = null)) THEN null ELSE cast(year#94042478 as int) END AS year#94042677, CASE WHEN ((retIC#94042479 = NA) OR (retIC#94042479 = null)) THEN null ELSE cast(retIC#94042479 as float) END AS retIC#94042678, CASE WHEN ((resretIC#94042480 = NA) OR (resretIC#94042480 = null)) THEN null ELSE cast(resretIC#94042480 as float) END AS resretIC#94042712, CASE WHEN ((numcos#94042481 = NA) OR (numcos#94042481 = null)) THEN null ELSE cast(numcos#94042481 as float) END AS numcos#94042713, CASE WHEN ((numdates#94042482 = NA) OR (numdates#94042482 = null)) THEN null ELSE cast(numdates#94042482 as int) END AS numdates#94042714, CASE WHEN ((annual_bmret#94042483 = NA) OR (annual_bmret#94042483 = null)) THEN null ELSE cast(annual_bmret#94042483 as float) END AS annual_bmret#94042715, CASE WHEN ((annual_ret#94042484 = NA) OR (annual_ret#94042484 = null)) THEN null ELSE cast(annual_ret#94042484 as float) END AS annual_ret#94042716, CASE WHEN ((std_ret#94042485 = NA) OR (std_ret#94042485 = null)) THEN null ELSE cast(std_ret#94042485 as float) END AS std_ret#94042717, CASE WHEN ((Sharpe_ret#94042486 = NA) OR (Sharpe_ret#94042486 = null)) THEN null ELSE cast(Sharpe_ret#94042486 as float) END AS Sharpe_ret#94042719, CASE WHEN ((PctPos_ret#94042487 = NA) OR (PctPos_ret#94042487 = null)) THEN null ELSE cast(PctPos_ret#94042487 as float) END AS PctPos_ret#94042721, CASE WHEN ((TR_ret#94042488 = NA) OR (TR_ret#94042488 = null)) THEN null ELSE cast(TR_ret#94042488 as float) END AS TR_ret#94042723, CASE WHEN ((IR_ret#94042489 = NA) OR (IR_ret#94042489 = null)) THEN null ELSE cast(IR_ret#94042489 as float) END AS IR_ret#94042724, CASE WHEN ((annual_resret#94042490 = NA) OR (annual_resret#94042490 = null)) THEN null ELSE cast(annual_resret#94042490 as float) END AS annual_resret#94042726, CASE WHEN ((std_resret#94042491 = NA) OR (std_resret#94042491 = null)) THEN null ELSE cast(std_resret#94042491 as float) END AS std_resret#94042728, CASE WHEN ((Sharpe_resret#94042492 = NA) OR (Sharpe_resret#94042492 = null)) THEN null ELSE cast(Sharpe_resret#94042492 as float) END AS Sharpe_resret#94042730, CASE WHEN ((PctPos_resret#94042493 = NA) OR (PctPos_resret#94042493 = null)) THEN null ELSE cast(PctPos_resret#94042493 as float) END AS PctPos_resret#94042732, CASE WHEN ((TR_resret#94042494 = NA) OR (TR_resret#94042494 = null)) THEN null ELSE cast(TR_resret#94042494 as float) END AS TR_resret#94042733, CASE WHEN ((IR_resret#94042495 = NA) OR (IR_resret#94042495 = null)) THEN null ELSE cast(IR_resret#94042495 as float) END AS IR_resret#94042735, CASE WHEN ((annual_retnet#94042496 = NA) OR (annual_retnet#94042496 = null)) THEN null ELSE cast(annual_retnet#94042496 as float) END AS annual_retnet#94042737, CASE WHEN ((std_retnet#94042497 = NA) OR (std_retnet#94042497 = null)) THEN null ELSE cast(std_retnet#94042497 as float) END AS std_retnet#94042739, CASE WHEN ((Sharpe_retnet#94042498 = NA) OR (Sharpe_retnet#94042498 = null)) THEN null ELSE cast(Sharpe_retnet#94042498 as float) END AS Sharpe_retnet#94042741, CASE WHEN ((PctPos_retnet#94042499 = NA) OR (PctPos_retnet#94042499 = null)) THEN null ELSE cast(PctPos_retnet#94042499 as float) END AS PctPos_retnet#94042743, CASE WHEN ((TR_retnet#94042500 = NA) OR (TR_retnet#94042500 = null)) THEN null ELSE cast(TR_retnet#94042500 as float) END AS TR_retnet#94042744, CASE WHEN ((IR_retnet#94042501 = NA) OR (IR_retnet#94042501 = null)) THEN null ELSE cast(IR_retnet#94042501 as float) END AS IR_retnet#94042746, CASE WHEN ((turnover#94042502 = NA) OR (turnover#94042502 = null)) THEN null ELSE cast(turnover#94042502 as float) END AS turnover#94042749] +- FileScan csv [year#94042478,retIC#94042479,resretIC#94042480,numcos#94042481,numdates#94042482,annual_bmret#94042483,annual_ret#94042484,std_ret#94042485,Sharpe_ret#94042486,PctPos_ret#94042487,TR_ret#94042488,IR_ret#94042489,annual_resret#94042490,std_resret#94042491,Sharpe_resret#94042492,PctPos_resret#94042493,TR_resret#94042494,IR_resret#94042495,annual_retnet#94042496,std_retnet#94042497,Sharpe_retnet#94042498,PctPos_retnet#94042499,TR_retnet#94042500,IR_retnet#94042501,turnover#94042502] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/leverage/..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94042478, retIC#94042479, resretIC#94042480, numcos#94042481, numdates#94042482, annual_bmret#94042483, annual_ret#94042484, std_ret#94042485, Sharpe_ret#94042486, PctPos_ret#94042487, TR_ret#94042488, IR_ret#94042489, annual_resret#94042490, std_resret#94042491, Sharpe_resret#94042492, PctPos_resret#94042493, TR_resret#94042494, IR_resret#94042495, annual_retnet#94042496, std_retnet#94042497, Sharpe_retnet#94042498, PctPos_retnet#94042499, TR_retnet#94042500, IR_retnet#94042501, turnover#94042502] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/leverage/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94042478 = NA) OR (year#94042478 = null)) THEN null ELSE cast(year#94042478 as int) END AS year#94042677, CASE WHEN ((retIC#94042479 = NA) OR (retIC#94042479 = null)) THEN null ELSE cast(retIC#94042479 as float) END AS retIC#94042678, CASE WHEN ((resretIC#94042480 = NA) OR (resretIC#94042480 = null)) THEN null ELSE cast(resretIC#94042480 as float) END AS resretIC#94042712, CASE WHEN ((numcos#94042481 = NA) OR (numcos#94042481 = null)) THEN null ELSE cast(numcos#94042481 as float) END AS numcos#94042713, CASE WHEN ((numdates#94042482 = NA) OR (numdates#94042482 = null)) THEN null ELSE cast(numdates#94042482 as int) END AS numdates#94042714, CASE WHEN ((annual_bmret#94042483 = NA) OR (annual_bmret#94042483 = null)) THEN null ELSE cast(annual_bmret#94042483 as float) END AS annual_bmret#94042715, CASE WHEN ((annual_ret#94042484 = NA) OR (annual_ret#94042484 = null)) THEN null ELSE cast(annual_ret#94042484 as float) END AS annual_ret#94042716, CASE WHEN ((std_ret#94042485 = NA) OR (std_ret#94042485 = null)) THEN null ELSE cast(std_ret#94042485 as float) END AS std_ret#94042717, CASE WHEN ((Sharpe_ret#94042486 = NA) OR (Sharpe_ret#94042486 = null)) THEN null ELSE cast(Sharpe_ret#94042486 as float) END AS Sharpe_ret#94042719, CASE WHEN ((PctPos_ret#94042487 = NA) OR (PctPos_ret#94042487 = null)) THEN null ELSE cast(PctPos_ret#94042487 as float) END AS PctPos_ret#94042721, CASE WHEN ((TR_ret#94042488 = NA) OR (TR_ret#94042488 = null)) THEN null ELSE cast(TR_ret#94042488 as float) END AS TR_ret#94042723, CASE WHEN ((IR_ret#94042489 = NA) OR (IR_ret#94042489 = null)) THEN null ELSE cast(IR_ret#94042489 as float) END AS IR_ret#94042724, CASE WHEN ((annual_resret#94042490 = NA) OR (annual_resret#94042490 = null)) THEN null ELSE cast(annual_resret#94042490 as float) END AS annual_resret#94042726, CASE WHEN ((std_resret#94042491 = NA) OR (std_resret#94042491 = null)) THEN null ELSE cast(std_resret#94042491 as float) END AS std_resret#94042728, CASE WHEN ((Sharpe_resret#94042492 = NA) OR (Sharpe_resret#94042492 = null)) THEN null ELSE cast(Sharpe_resret#94042492 as float) END AS Sharpe_resret#94042730, CASE WHEN ((PctPos_resret#94042493 = NA) OR (PctPos_resret#94042493 = null)) THEN null ELSE cast(PctPos_resret#94042493 as float) END AS PctPos_resret#94042732, CASE WHEN ((TR_resret#94042494 = NA) OR (TR_resret#94042494 = null)) THEN null ELSE cast(TR_resret#94042494 as float) END AS TR_resret#94042733, CASE WHEN ((IR_resret#94042495 = NA) OR (IR_resret#94042495 = null)) THEN null ELSE cast(IR_resret#94042495 as float) END AS IR_resret#94042735, CASE WHEN ((annual_retnet#94042496 = NA) OR (annual_retnet#94042496 = null)) THEN null ELSE cast(annual_retnet#94042496 as float) END AS annual_retnet#94042737, CASE WHEN ((std_retnet#94042497 = NA) OR (std_retnet#94042497 = null)) THEN null ELSE cast(std_retnet#94042497 as float) END AS std_retnet#94042739, CASE WHEN ((Sharpe_retnet#94042498 = NA) OR (Sharpe_retnet#94042498 = null)) THEN null ELSE cast(Sharpe_retnet#94042498 as float) END AS Sharpe_retnet#94042741, CASE WHEN ((PctPos_retnet#94042499 = NA) OR (PctPos_retnet#94042499 = null)) THEN null ELSE cast(PctPos_retnet#94042499 as float) END AS PctPos_retnet#94042743, CASE WHEN ((TR_retnet#94042500 = NA) OR (TR_retnet#94042500 = null)) THEN null ELSE cast(TR_retnet#94042500 as float) END AS TR_retnet#94042744, CASE WHEN ((IR_retnet#94042501 = NA) OR (IR_retnet#94042501 = null)) THEN null ELSE cast(IR_retnet#94042501 as float) END AS IR_retnet#94042746, CASE WHEN ((turnover#94042502 = NA) OR (turnover#94042502 = null)) THEN null ELSE cast(turnover#94042502 as float) END AS turnover#94042749] Input [25]: [year#94042478, retIC#94042479, resretIC#94042480, numcos#94042481, numdates#94042482, annual_bmret#94042483, annual_ret#94042484, std_ret#94042485, Sharpe_ret#94042486, PctPos_ret#94042487, TR_ret#94042488, IR_ret#94042489, annual_resret#94042490, std_resret#94042491, Sharpe_resret#94042492, PctPos_resret#94042493, TR_resret#94042494, IR_resret#94042495, annual_retnet#94042496, std_retnet#94042497, Sharpe_retnet#94042498, PctPos_retnet#94042499, TR_retnet#94042500, IR_retnet#94042501, turnover#94042502] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94042749, year#94042677] (8) Filter [codegen id : 1] Input [2]: [turnover#94042749, year#94042677] Condition : isnotnull(turnover#94042749) (9) Project [codegen id : 1] Output [3]: [year#94042677, turnover#94042749, (1.0 / cast(turnover#94042749 as double)) AS days_hold#94042792] Input [2]: [turnover#94042749, year#94042677] (10) Exchange Input [3]: [year#94042677, turnover#94042749, days_hold#94042792] Arguments: rangepartitioning(year#94042677 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7508514] (11) Sort [codegen id : 2] Input [3]: [year#94042677, turnover#94042749, days_hold#94042792] Arguments: [year#94042677 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94042677, turnover#94042749, days_hold#94042792] (13) CollectLimit Input [3]: [year#94042677, turnover#94042749, days_hold#94042792] Arguments: 1000000