== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94285583, turnover#94285631, days_hold#94285666] Arguments: [year#94285583, turnover#94285631, days_hold#94285666] (2) InMemoryRelation Arguments: [year#94285583, turnover#94285631, days_hold#94285666], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94285583 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94285583 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7527893] +- *(1) Project [year#94285583, turnover#94285631, (1.0 / cast(turnover#94285631 as double)) AS days_hold#94285666] +- *(1) Filter isnotnull(turnover#94285631) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94285631, year#94285583], [isnotnull(turnover#94285631)] +- InMemoryRelation [year#94285583, retIC#94285585, resretIC#94285587, numcos#94285589, numdates#94285591, annual_bmret#94285593, annual_ret#94285595, std_ret#94285597, Sharpe_ret#94285599, PctPos_ret#94285601, TR_ret#94285603, IR_ret#94285605, annual_resret#94285607, std_resret#94285609, Sharpe_resret#94285611, PctPos_resret#94285613, TR_resret#94285615, IR_resret#94285617, annual_retnet#94285619, std_retnet#94285621, Sharpe_retnet#94285623, PctPos_retnet#94285625, TR_retnet#94285627, IR_retnet#94285629, turnover#94285631], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94285352 = NA) OR (year#94285352 = null)) THEN null ELSE cast(year#94285352 as float) END AS year#94285583, CASE WHEN ((retIC#94285354 = NA) OR (retIC#94285354 = null)) THEN null ELSE cast(retIC#94285354 as float) END AS retIC#94285585, CASE WHEN ((resretIC#94285356 = NA) OR (resretIC#94285356 = null)) THEN null ELSE cast(resretIC#94285356 as float) END AS resretIC#94285587, CASE WHEN ((numcos#94285359 = NA) OR (numcos#94285359 = null)) THEN null ELSE cast(numcos#94285359 as float) END AS numcos#94285589, CASE WHEN ((numdates#94285361 = NA) OR (numdates#94285361 = null)) THEN null ELSE cast(numdates#94285361 as int) END AS numdates#94285591, CASE WHEN ((annual_bmret#94285363 = NA) OR (annual_bmret#94285363 = null)) THEN null ELSE cast(annual_bmret#94285363 as float) END AS annual_bmret#94285593, CASE WHEN ((annual_ret#94285365 = NA) OR (annual_ret#94285365 = null)) THEN null ELSE cast(annual_ret#94285365 as float) END AS annual_ret#94285595, CASE WHEN ((std_ret#94285368 = NA) OR (std_ret#94285368 = null)) THEN null ELSE cast(std_ret#94285368 as float) END AS std_ret#94285597, CASE WHEN ((Sharpe_ret#94285370 = NA) OR (Sharpe_ret#94285370 = null)) THEN null ELSE cast(Sharpe_ret#94285370 as float) END AS Sharpe_ret#94285599, CASE WHEN ((PctPos_ret#94285372 = NA) OR (PctPos_ret#94285372 = null)) THEN null ELSE cast(PctPos_ret#94285372 as float) END AS PctPos_ret#94285601, CASE WHEN ((TR_ret#94285374 = NA) OR (TR_ret#94285374 = null)) THEN null ELSE cast(TR_ret#94285374 as float) END AS TR_ret#94285603, CASE WHEN ((IR_ret#94285376 = NA) OR (IR_ret#94285376 = null)) THEN null ELSE cast(IR_ret#94285376 as float) END AS IR_ret#94285605, CASE WHEN ((annual_resret#94285378 = NA) OR (annual_resret#94285378 = null)) THEN null ELSE cast(annual_resret#94285378 as float) END AS annual_resret#94285607, CASE WHEN ((std_resret#94285380 = NA) OR (std_resret#94285380 = null)) THEN null ELSE cast(std_resret#94285380 as float) END AS std_resret#94285609, CASE WHEN ((Sharpe_resret#94285382 = NA) OR (Sharpe_resret#94285382 = null)) THEN null ELSE cast(Sharpe_resret#94285382 as float) END AS Sharpe_resret#94285611, CASE WHEN ((PctPos_resret#94285384 = NA) OR (PctPos_resret#94285384 = null)) THEN null ELSE cast(PctPos_resret#94285384 as float) END AS PctPos_resret#94285613, CASE WHEN ((TR_resret#94285386 = NA) OR (TR_resret#94285386 = null)) THEN null ELSE cast(TR_resret#94285386 as float) END AS TR_resret#94285615, CASE WHEN ((IR_resret#94285388 = NA) OR (IR_resret#94285388 = null)) THEN null ELSE cast(IR_resret#94285388 as float) END AS IR_resret#94285617, CASE WHEN ((annual_retnet#94285390 = NA) OR (annual_retnet#94285390 = null)) THEN null ELSE cast(annual_retnet#94285390 as float) END AS annual_retnet#94285619, CASE WHEN ((std_retnet#94285392 = NA) OR (std_retnet#94285392 = null)) THEN null ELSE cast(std_retnet#94285392 as float) END AS std_retnet#94285621, CASE WHEN ((Sharpe_retnet#94285395 = NA) OR (Sharpe_retnet#94285395 = null)) THEN null ELSE cast(Sharpe_retnet#94285395 as float) END AS Sharpe_retnet#94285623, CASE WHEN ((PctPos_retnet#94285398 = NA) OR (PctPos_retnet#94285398 = null)) THEN null ELSE cast(PctPos_retnet#94285398 as float) END AS PctPos_retnet#94285625, CASE WHEN ((TR_retnet#94285399 = NA) OR (TR_retnet#94285399 = null)) THEN null ELSE cast(TR_retnet#94285399 as float) END AS TR_retnet#94285627, CASE WHEN ((IR_retnet#94285401 = NA) OR (IR_retnet#94285401 = null)) THEN null ELSE cast(IR_retnet#94285401 as float) END AS IR_retnet#94285629, CASE WHEN ((turnover#94285403 = NA) OR (turnover#94285403 = null)) THEN null ELSE cast(turnover#94285403 as float) END AS turnover#94285631] +- FileScan csv [year#94285352,retIC#94285354,resretIC#94285356,numcos#94285359,numdates#94285361,annual_bmret#94285363,annual_ret#94285365,std_ret#94285368,Sharpe_ret#94285370,PctPos_ret#94285372,TR_ret#94285374,IR_ret#94285376,annual_resret#94285378,std_resret#94285380,Sharpe_resret#94285382,PctPos_resret#94285384,TR_resret#94285386,IR_resret#94285388,annual_retnet#94285390,std_retnet#94285392,Sharpe_retnet#94285395,PctPos_retnet#94285398,TR_retnet#94285399,IR_retnet#94285401,turnover#94285403] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/eps_truebeat/surprise_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94285583 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94285631, year#94285583] Arguments: [turnover#94285631, year#94285583], [isnotnull(turnover#94285631)] (4) InMemoryRelation Arguments: [year#94285583, retIC#94285585, resretIC#94285587, numcos#94285589, numdates#94285591, annual_bmret#94285593, annual_ret#94285595, std_ret#94285597, Sharpe_ret#94285599, PctPos_ret#94285601, TR_ret#94285603, IR_ret#94285605, annual_resret#94285607, std_resret#94285609, Sharpe_resret#94285611, PctPos_resret#94285613, TR_resret#94285615, IR_resret#94285617, annual_retnet#94285619, std_retnet#94285621, Sharpe_retnet#94285623, PctPos_retnet#94285625, TR_retnet#94285627, IR_retnet#94285629, turnover#94285631], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94285352 = NA) OR (year#94285352 = null)) THEN null ELSE cast(year#94285352 as float) END AS year#94285583, CASE WHEN ((retIC#94285354 = NA) OR (retIC#94285354 = null)) THEN null ELSE cast(retIC#94285354 as float) END AS retIC#94285585, CASE WHEN ((resretIC#94285356 = NA) OR (resretIC#94285356 = null)) THEN null ELSE cast(resretIC#94285356 as float) END AS resretIC#94285587, CASE WHEN ((numcos#94285359 = NA) OR (numcos#94285359 = null)) THEN null ELSE cast(numcos#94285359 as float) END AS numcos#94285589, CASE WHEN ((numdates#94285361 = NA) OR (numdates#94285361 = null)) THEN null ELSE cast(numdates#94285361 as int) END AS numdates#94285591, CASE WHEN ((annual_bmret#94285363 = NA) OR (annual_bmret#94285363 = null)) THEN null ELSE cast(annual_bmret#94285363 as float) END AS annual_bmret#94285593, CASE WHEN ((annual_ret#94285365 = NA) OR (annual_ret#94285365 = null)) THEN null ELSE cast(annual_ret#94285365 as float) END AS annual_ret#94285595, CASE WHEN ((std_ret#94285368 = NA) OR (std_ret#94285368 = null)) THEN null ELSE cast(std_ret#94285368 as float) END AS std_ret#94285597, CASE WHEN ((Sharpe_ret#94285370 = NA) OR (Sharpe_ret#94285370 = null)) THEN null ELSE cast(Sharpe_ret#94285370 as float) END AS Sharpe_ret#94285599, CASE WHEN ((PctPos_ret#94285372 = NA) OR (PctPos_ret#94285372 = null)) THEN null ELSE cast(PctPos_ret#94285372 as float) END AS PctPos_ret#94285601, CASE WHEN ((TR_ret#94285374 = NA) OR (TR_ret#94285374 = null)) THEN null ELSE cast(TR_ret#94285374 as float) END AS TR_ret#94285603, CASE WHEN ((IR_ret#94285376 = NA) OR (IR_ret#94285376 = null)) THEN null ELSE cast(IR_ret#94285376 as float) END AS IR_ret#94285605, CASE WHEN ((annual_resret#94285378 = NA) OR (annual_resret#94285378 = null)) THEN null ELSE cast(annual_resret#94285378 as float) END AS annual_resret#94285607, CASE WHEN ((std_resret#94285380 = NA) OR (std_resret#94285380 = null)) THEN null ELSE cast(std_resret#94285380 as float) END AS std_resret#94285609, CASE WHEN ((Sharpe_resret#94285382 = NA) OR (Sharpe_resret#94285382 = null)) THEN null ELSE cast(Sharpe_resret#94285382 as float) END AS Sharpe_resret#94285611, CASE WHEN ((PctPos_resret#94285384 = NA) OR (PctPos_resret#94285384 = null)) THEN null ELSE cast(PctPos_resret#94285384 as float) END AS PctPos_resret#94285613, CASE WHEN ((TR_resret#94285386 = NA) OR (TR_resret#94285386 = null)) THEN null ELSE cast(TR_resret#94285386 as float) END AS TR_resret#94285615, CASE WHEN ((IR_resret#94285388 = NA) OR (IR_resret#94285388 = null)) THEN null ELSE cast(IR_resret#94285388 as float) END AS IR_resret#94285617, CASE WHEN ((annual_retnet#94285390 = NA) OR (annual_retnet#94285390 = null)) THEN null ELSE cast(annual_retnet#94285390 as float) END AS annual_retnet#94285619, CASE WHEN ((std_retnet#94285392 = NA) OR (std_retnet#94285392 = null)) THEN null ELSE cast(std_retnet#94285392 as float) END AS std_retnet#94285621, CASE WHEN ((Sharpe_retnet#94285395 = NA) OR (Sharpe_retnet#94285395 = null)) THEN null ELSE cast(Sharpe_retnet#94285395 as float) END AS Sharpe_retnet#94285623, CASE WHEN ((PctPos_retnet#94285398 = NA) OR (PctPos_retnet#94285398 = null)) THEN null ELSE cast(PctPos_retnet#94285398 as float) END AS PctPos_retnet#94285625, CASE WHEN ((TR_retnet#94285399 = NA) OR (TR_retnet#94285399 = null)) THEN null ELSE cast(TR_retnet#94285399 as float) END AS TR_retnet#94285627, CASE WHEN ((IR_retnet#94285401 = NA) OR (IR_retnet#94285401 = null)) THEN null ELSE cast(IR_retnet#94285401 as float) END AS IR_retnet#94285629, CASE WHEN ((turnover#94285403 = NA) OR (turnover#94285403 = null)) THEN null ELSE cast(turnover#94285403 as float) END AS turnover#94285631] +- FileScan csv [year#94285352,retIC#94285354,resretIC#94285356,numcos#94285359,numdates#94285361,annual_bmret#94285363,annual_ret#94285365,std_ret#94285368,Sharpe_ret#94285370,PctPos_ret#94285372,TR_ret#94285374,IR_ret#94285376,annual_resret#94285378,std_resret#94285380,Sharpe_resret#94285382,PctPos_resret#94285384,TR_resret#94285386,IR_resret#94285388,annual_retnet#94285390,std_retnet#94285392,Sharpe_retnet#94285395,PctPos_retnet#94285398,TR_retnet#94285399,IR_retnet#94285401,turnover#94285403] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/eps_truebeat/surprise_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94285352, retIC#94285354, resretIC#94285356, numcos#94285359, numdates#94285361, annual_bmret#94285363, annual_ret#94285365, std_ret#94285368, Sharpe_ret#94285370, PctPos_ret#94285372, TR_ret#94285374, IR_ret#94285376, annual_resret#94285378, std_resret#94285380, Sharpe_resret#94285382, PctPos_resret#94285384, TR_resret#94285386, IR_resret#94285388, annual_retnet#94285390, std_retnet#94285392, Sharpe_retnet#94285395, PctPos_retnet#94285398, TR_retnet#94285399, IR_retnet#94285401, turnover#94285403] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/eps_truebeat/surprise_prediction_eps_trunc/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94285352 = NA) OR (year#94285352 = null)) THEN null ELSE cast(year#94285352 as float) END AS year#94285583, CASE WHEN ((retIC#94285354 = NA) OR (retIC#94285354 = null)) THEN null ELSE cast(retIC#94285354 as float) END AS retIC#94285585, CASE WHEN ((resretIC#94285356 = NA) OR (resretIC#94285356 = null)) THEN null ELSE cast(resretIC#94285356 as float) END AS resretIC#94285587, CASE WHEN ((numcos#94285359 = NA) OR (numcos#94285359 = null)) THEN null ELSE cast(numcos#94285359 as float) END AS numcos#94285589, CASE WHEN ((numdates#94285361 = NA) OR (numdates#94285361 = null)) THEN null ELSE cast(numdates#94285361 as int) END AS numdates#94285591, CASE WHEN ((annual_bmret#94285363 = NA) OR (annual_bmret#94285363 = null)) THEN null ELSE cast(annual_bmret#94285363 as float) END AS annual_bmret#94285593, CASE WHEN ((annual_ret#94285365 = NA) OR (annual_ret#94285365 = null)) THEN null ELSE cast(annual_ret#94285365 as float) END AS annual_ret#94285595, CASE WHEN ((std_ret#94285368 = NA) OR (std_ret#94285368 = null)) THEN null ELSE cast(std_ret#94285368 as float) END AS std_ret#94285597, CASE WHEN ((Sharpe_ret#94285370 = NA) OR (Sharpe_ret#94285370 = null)) THEN null ELSE cast(Sharpe_ret#94285370 as float) END AS Sharpe_ret#94285599, CASE WHEN ((PctPos_ret#94285372 = NA) OR (PctPos_ret#94285372 = null)) THEN null ELSE cast(PctPos_ret#94285372 as float) END AS PctPos_ret#94285601, CASE WHEN ((TR_ret#94285374 = NA) OR (TR_ret#94285374 = null)) THEN null ELSE cast(TR_ret#94285374 as float) END AS TR_ret#94285603, CASE WHEN ((IR_ret#94285376 = NA) OR (IR_ret#94285376 = null)) THEN null ELSE cast(IR_ret#94285376 as float) END AS IR_ret#94285605, CASE WHEN ((annual_resret#94285378 = NA) OR (annual_resret#94285378 = null)) THEN null ELSE cast(annual_resret#94285378 as float) END AS annual_resret#94285607, CASE WHEN ((std_resret#94285380 = NA) OR (std_resret#94285380 = null)) THEN null ELSE cast(std_resret#94285380 as float) END AS std_resret#94285609, CASE WHEN ((Sharpe_resret#94285382 = NA) OR (Sharpe_resret#94285382 = null)) THEN null ELSE cast(Sharpe_resret#94285382 as float) END AS Sharpe_resret#94285611, CASE WHEN ((PctPos_resret#94285384 = NA) OR (PctPos_resret#94285384 = null)) THEN null ELSE cast(PctPos_resret#94285384 as float) END AS PctPos_resret#94285613, CASE WHEN ((TR_resret#94285386 = NA) OR (TR_resret#94285386 = null)) THEN null ELSE cast(TR_resret#94285386 as float) END AS TR_resret#94285615, CASE WHEN ((IR_resret#94285388 = NA) OR (IR_resret#94285388 = null)) THEN null ELSE cast(IR_resret#94285388 as float) END AS IR_resret#94285617, CASE WHEN ((annual_retnet#94285390 = NA) OR (annual_retnet#94285390 = null)) THEN null ELSE cast(annual_retnet#94285390 as float) END AS annual_retnet#94285619, CASE WHEN ((std_retnet#94285392 = NA) OR (std_retnet#94285392 = null)) THEN null ELSE cast(std_retnet#94285392 as float) END AS std_retnet#94285621, CASE WHEN ((Sharpe_retnet#94285395 = NA) OR (Sharpe_retnet#94285395 = null)) THEN null ELSE cast(Sharpe_retnet#94285395 as float) END AS Sharpe_retnet#94285623, CASE WHEN ((PctPos_retnet#94285398 = NA) OR (PctPos_retnet#94285398 = null)) THEN null ELSE cast(PctPos_retnet#94285398 as float) END AS PctPos_retnet#94285625, CASE WHEN ((TR_retnet#94285399 = NA) OR (TR_retnet#94285399 = null)) THEN null ELSE cast(TR_retnet#94285399 as float) END AS TR_retnet#94285627, CASE WHEN ((IR_retnet#94285401 = NA) OR (IR_retnet#94285401 = null)) THEN null ELSE cast(IR_retnet#94285401 as float) END AS IR_retnet#94285629, CASE WHEN ((turnover#94285403 = NA) OR (turnover#94285403 = null)) THEN null ELSE cast(turnover#94285403 as float) END AS turnover#94285631] Input [25]: [year#94285352, retIC#94285354, resretIC#94285356, numcos#94285359, numdates#94285361, annual_bmret#94285363, annual_ret#94285365, std_ret#94285368, Sharpe_ret#94285370, PctPos_ret#94285372, TR_ret#94285374, IR_ret#94285376, annual_resret#94285378, std_resret#94285380, Sharpe_resret#94285382, PctPos_resret#94285384, TR_resret#94285386, IR_resret#94285388, annual_retnet#94285390, std_retnet#94285392, Sharpe_retnet#94285395, PctPos_retnet#94285398, TR_retnet#94285399, IR_retnet#94285401, turnover#94285403] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94285631, year#94285583] (8) Filter [codegen id : 1] Input [2]: [turnover#94285631, year#94285583] Condition : isnotnull(turnover#94285631) (9) Project [codegen id : 1] Output [3]: [year#94285583, turnover#94285631, (1.0 / cast(turnover#94285631 as double)) AS days_hold#94285666] Input [2]: [turnover#94285631, year#94285583] (10) Exchange Input [3]: [year#94285583, turnover#94285631, days_hold#94285666] Arguments: rangepartitioning(year#94285583 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7527893] (11) Sort [codegen id : 2] Input [3]: [year#94285583, turnover#94285631, days_hold#94285666] Arguments: [year#94285583 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94285583, turnover#94285631, days_hold#94285666] (13) CollectLimit Input [3]: [year#94285583, turnover#94285631, days_hold#94285666] Arguments: 1000000