== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94234185, turnover#94234229, days_hold#94234268] Arguments: [year#94234185, turnover#94234229, days_hold#94234268] (2) InMemoryRelation Arguments: [year#94234185, turnover#94234229, days_hold#94234268], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94234185 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94234185 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7523675] +- *(1) Project [year#94234185, turnover#94234229, (1.0 / cast(turnover#94234229 as double)) AS days_hold#94234268] +- *(1) Filter isnotnull(turnover#94234229) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94234229, year#94234185], [isnotnull(turnover#94234229)] +- InMemoryRelation [year#94234185, retIC#94234186, resretIC#94234187, numcos#94234188, numdates#94234190, annual_bmret#94234191, annual_ret#94234194, std_ret#94234196, Sharpe_ret#94234197, PctPos_ret#94234199, TR_ret#94234201, IR_ret#94234203, annual_resret#94234205, std_resret#94234207, Sharpe_resret#94234209, PctPos_resret#94234211, TR_resret#94234214, IR_resret#94234216, annual_retnet#94234218, std_retnet#94234220, Sharpe_retnet#94234222, PctPos_retnet#94234224, TR_retnet#94234225, IR_retnet#94234227, turnover#94234229], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94233953 = NA) OR (year#94233953 = null)) THEN null ELSE cast(year#94233953 as float) END AS year#94234185, CASE WHEN ((retIC#94233954 = NA) OR (retIC#94233954 = null)) THEN null ELSE cast(retIC#94233954 as float) END AS retIC#94234186, CASE WHEN ((resretIC#94233955 = NA) OR (resretIC#94233955 = null)) THEN null ELSE cast(resretIC#94233955 as float) END AS resretIC#94234187, CASE WHEN ((numcos#94233956 = NA) OR (numcos#94233956 = null)) THEN null ELSE cast(numcos#94233956 as float) END AS numcos#94234188, CASE WHEN ((numdates#94233957 = NA) OR (numdates#94233957 = null)) THEN null ELSE cast(numdates#94233957 as int) END AS numdates#94234190, CASE WHEN ((annual_bmret#94233958 = NA) OR (annual_bmret#94233958 = null)) THEN null ELSE cast(annual_bmret#94233958 as float) END AS annual_bmret#94234191, CASE WHEN ((annual_ret#94233959 = NA) OR (annual_ret#94233959 = null)) THEN null ELSE cast(annual_ret#94233959 as float) END AS annual_ret#94234194, CASE WHEN ((std_ret#94233960 = NA) OR (std_ret#94233960 = null)) THEN null ELSE cast(std_ret#94233960 as float) END AS std_ret#94234196, CASE WHEN ((Sharpe_ret#94233961 = NA) OR (Sharpe_ret#94233961 = null)) THEN null ELSE cast(Sharpe_ret#94233961 as float) END AS Sharpe_ret#94234197, CASE WHEN ((PctPos_ret#94233962 = NA) OR (PctPos_ret#94233962 = null)) THEN null ELSE cast(PctPos_ret#94233962 as float) END AS PctPos_ret#94234199, CASE WHEN ((TR_ret#94233963 = NA) OR (TR_ret#94233963 = null)) THEN null ELSE cast(TR_ret#94233963 as float) END AS TR_ret#94234201, CASE WHEN ((IR_ret#94233964 = NA) OR (IR_ret#94233964 = null)) THEN null ELSE cast(IR_ret#94233964 as float) END AS IR_ret#94234203, CASE WHEN ((annual_resret#94233965 = NA) OR (annual_resret#94233965 = null)) THEN null ELSE cast(annual_resret#94233965 as float) END AS annual_resret#94234205, CASE WHEN ((std_resret#94233966 = NA) OR (std_resret#94233966 = null)) THEN null ELSE cast(std_resret#94233966 as float) END AS std_resret#94234207, CASE WHEN ((Sharpe_resret#94233967 = NA) OR (Sharpe_resret#94233967 = null)) THEN null ELSE cast(Sharpe_resret#94233967 as float) END AS Sharpe_resret#94234209, CASE WHEN ((PctPos_resret#94233968 = NA) OR (PctPos_resret#94233968 = null)) THEN null ELSE cast(PctPos_resret#94233968 as float) END AS PctPos_resret#94234211, CASE WHEN ((TR_resret#94233969 = NA) OR (TR_resret#94233969 = null)) THEN null ELSE cast(TR_resret#94233969 as float) END AS TR_resret#94234214, CASE WHEN ((IR_resret#94233970 = NA) OR (IR_resret#94233970 = null)) THEN null ELSE cast(IR_resret#94233970 as float) END AS IR_resret#94234216, CASE WHEN ((annual_retnet#94233971 = NA) OR (annual_retnet#94233971 = null)) THEN null ELSE cast(annual_retnet#94233971 as float) END AS annual_retnet#94234218, CASE WHEN ((std_retnet#94233972 = NA) OR (std_retnet#94233972 = null)) THEN null ELSE cast(std_retnet#94233972 as float) END AS std_retnet#94234220, CASE WHEN ((Sharpe_retnet#94233973 = NA) OR (Sharpe_retnet#94233973 = null)) THEN null ELSE cast(Sharpe_retnet#94233973 as float) END AS Sharpe_retnet#94234222, CASE WHEN ((PctPos_retnet#94233974 = NA) OR (PctPos_retnet#94233974 = null)) THEN null ELSE cast(PctPos_retnet#94233974 as float) END AS PctPos_retnet#94234224, CASE WHEN ((TR_retnet#94233975 = NA) OR (TR_retnet#94233975 = null)) THEN null ELSE cast(TR_retnet#94233975 as float) END AS TR_retnet#94234225, CASE WHEN ((IR_retnet#94233976 = NA) OR (IR_retnet#94233976 = null)) THEN null ELSE cast(IR_retnet#94233976 as float) END AS IR_retnet#94234227, CASE WHEN ((turnover#94233977 = NA) OR (turnover#94233977 = null)) THEN null ELSE cast(turnover#94233977 as float) END AS turnover#94234229] +- FileScan csv [year#94233953,retIC#94233954,resretIC#94233955,numcos#94233956,numdates#94233957,annual_bmret#94233958,annual_ret#94233959,std_ret#94233960,Sharpe_ret#94233961,PctPos_ret#94233962,TR_ret#94233963,IR_ret#94233964,annual_resret#94233965,std_resret#94233966,Sharpe_resret#94233967,PctPos_resret#94233968,TR_resret#94233969,IR_resret#94233970,annual_retnet#94233971,std_retnet#94233972,Sharpe_retnet#94233973,PctPos_retnet#94233974,TR_retnet#94233975,IR_retnet#94233976,turnover#94233977] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/transcripts/transcript..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94234185 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94234229, year#94234185] Arguments: [turnover#94234229, year#94234185], [isnotnull(turnover#94234229)] (4) InMemoryRelation Arguments: [year#94234185, retIC#94234186, resretIC#94234187, numcos#94234188, numdates#94234190, annual_bmret#94234191, annual_ret#94234194, std_ret#94234196, Sharpe_ret#94234197, PctPos_ret#94234199, TR_ret#94234201, IR_ret#94234203, annual_resret#94234205, std_resret#94234207, Sharpe_resret#94234209, PctPos_resret#94234211, TR_resret#94234214, IR_resret#94234216, annual_retnet#94234218, std_retnet#94234220, Sharpe_retnet#94234222, PctPos_retnet#94234224, TR_retnet#94234225, IR_retnet#94234227, turnover#94234229], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94233953 = NA) OR (year#94233953 = null)) THEN null ELSE cast(year#94233953 as float) END AS year#94234185, CASE WHEN ((retIC#94233954 = NA) OR (retIC#94233954 = null)) THEN null ELSE cast(retIC#94233954 as float) END AS retIC#94234186, CASE WHEN ((resretIC#94233955 = NA) OR (resretIC#94233955 = null)) THEN null ELSE cast(resretIC#94233955 as float) END AS resretIC#94234187, CASE WHEN ((numcos#94233956 = NA) OR (numcos#94233956 = null)) THEN null ELSE cast(numcos#94233956 as float) END AS numcos#94234188, CASE WHEN ((numdates#94233957 = NA) OR (numdates#94233957 = null)) THEN null ELSE cast(numdates#94233957 as int) END AS numdates#94234190, CASE WHEN ((annual_bmret#94233958 = NA) OR (annual_bmret#94233958 = null)) THEN null ELSE cast(annual_bmret#94233958 as float) END AS annual_bmret#94234191, CASE WHEN ((annual_ret#94233959 = NA) OR (annual_ret#94233959 = null)) THEN null ELSE cast(annual_ret#94233959 as float) END AS annual_ret#94234194, CASE WHEN ((std_ret#94233960 = NA) OR (std_ret#94233960 = null)) THEN null ELSE cast(std_ret#94233960 as float) END AS std_ret#94234196, CASE WHEN ((Sharpe_ret#94233961 = NA) OR (Sharpe_ret#94233961 = null)) THEN null ELSE cast(Sharpe_ret#94233961 as float) END AS Sharpe_ret#94234197, CASE WHEN ((PctPos_ret#94233962 = NA) OR (PctPos_ret#94233962 = null)) THEN null ELSE cast(PctPos_ret#94233962 as float) END AS PctPos_ret#94234199, CASE WHEN ((TR_ret#94233963 = NA) OR (TR_ret#94233963 = null)) THEN null ELSE cast(TR_ret#94233963 as float) END AS TR_ret#94234201, CASE WHEN ((IR_ret#94233964 = NA) OR (IR_ret#94233964 = null)) THEN null ELSE cast(IR_ret#94233964 as float) END AS IR_ret#94234203, CASE WHEN ((annual_resret#94233965 = NA) OR (annual_resret#94233965 = null)) THEN null ELSE cast(annual_resret#94233965 as float) END AS annual_resret#94234205, CASE WHEN ((std_resret#94233966 = NA) OR (std_resret#94233966 = null)) THEN null ELSE cast(std_resret#94233966 as float) END AS std_resret#94234207, CASE WHEN ((Sharpe_resret#94233967 = NA) OR (Sharpe_resret#94233967 = null)) THEN null ELSE cast(Sharpe_resret#94233967 as float) END AS Sharpe_resret#94234209, CASE WHEN ((PctPos_resret#94233968 = NA) OR (PctPos_resret#94233968 = null)) THEN null ELSE cast(PctPos_resret#94233968 as float) END AS PctPos_resret#94234211, CASE WHEN ((TR_resret#94233969 = NA) OR (TR_resret#94233969 = null)) THEN null ELSE cast(TR_resret#94233969 as float) END AS TR_resret#94234214, CASE WHEN ((IR_resret#94233970 = NA) OR (IR_resret#94233970 = null)) THEN null ELSE cast(IR_resret#94233970 as float) END AS IR_resret#94234216, CASE WHEN ((annual_retnet#94233971 = NA) OR (annual_retnet#94233971 = null)) THEN null ELSE cast(annual_retnet#94233971 as float) END AS annual_retnet#94234218, CASE WHEN ((std_retnet#94233972 = NA) OR (std_retnet#94233972 = null)) THEN null ELSE cast(std_retnet#94233972 as float) END AS std_retnet#94234220, CASE WHEN ((Sharpe_retnet#94233973 = NA) OR (Sharpe_retnet#94233973 = null)) THEN null ELSE cast(Sharpe_retnet#94233973 as float) END AS Sharpe_retnet#94234222, CASE WHEN ((PctPos_retnet#94233974 = NA) OR (PctPos_retnet#94233974 = null)) THEN null ELSE cast(PctPos_retnet#94233974 as float) END AS PctPos_retnet#94234224, CASE WHEN ((TR_retnet#94233975 = NA) OR (TR_retnet#94233975 = null)) THEN null ELSE cast(TR_retnet#94233975 as float) END AS TR_retnet#94234225, CASE WHEN ((IR_retnet#94233976 = NA) OR (IR_retnet#94233976 = null)) THEN null ELSE cast(IR_retnet#94233976 as float) END AS IR_retnet#94234227, CASE WHEN ((turnover#94233977 = NA) OR (turnover#94233977 = null)) THEN null ELSE cast(turnover#94233977 as float) END AS turnover#94234229] +- FileScan csv [year#94233953,retIC#94233954,resretIC#94233955,numcos#94233956,numdates#94233957,annual_bmret#94233958,annual_ret#94233959,std_ret#94233960,Sharpe_ret#94233961,PctPos_ret#94233962,TR_ret#94233963,IR_ret#94233964,annual_resret#94233965,std_resret#94233966,Sharpe_resret#94233967,PctPos_resret#94233968,TR_resret#94233969,IR_resret#94233970,annual_retnet#94233971,std_retnet#94233972,Sharpe_retnet#94233973,PctPos_retnet#94233974,TR_retnet#94233975,IR_retnet#94233976,turnover#94233977] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/transcripts/transcript..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94233953, retIC#94233954, resretIC#94233955, numcos#94233956, numdates#94233957, annual_bmret#94233958, annual_ret#94233959, std_ret#94233960, Sharpe_ret#94233961, PctPos_ret#94233962, TR_ret#94233963, IR_ret#94233964, annual_resret#94233965, std_resret#94233966, Sharpe_resret#94233967, PctPos_resret#94233968, TR_resret#94233969, IR_resret#94233970, annual_retnet#94233971, std_retnet#94233972, Sharpe_retnet#94233973, PctPos_retnet#94233974, TR_retnet#94233975, IR_retnet#94233976, turnover#94233977] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/transcripts/transcript_model_residualized/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94233953 = NA) OR (year#94233953 = null)) THEN null ELSE cast(year#94233953 as float) END AS year#94234185, CASE WHEN ((retIC#94233954 = NA) OR (retIC#94233954 = null)) THEN null ELSE cast(retIC#94233954 as float) END AS retIC#94234186, CASE WHEN ((resretIC#94233955 = NA) OR (resretIC#94233955 = null)) THEN null ELSE cast(resretIC#94233955 as float) END AS resretIC#94234187, CASE WHEN ((numcos#94233956 = NA) OR (numcos#94233956 = null)) THEN null ELSE cast(numcos#94233956 as float) END AS numcos#94234188, CASE WHEN ((numdates#94233957 = NA) OR (numdates#94233957 = null)) THEN null ELSE cast(numdates#94233957 as int) END AS numdates#94234190, CASE WHEN ((annual_bmret#94233958 = NA) OR (annual_bmret#94233958 = null)) THEN null ELSE cast(annual_bmret#94233958 as float) END AS annual_bmret#94234191, CASE WHEN ((annual_ret#94233959 = NA) OR (annual_ret#94233959 = null)) THEN null ELSE cast(annual_ret#94233959 as float) END AS annual_ret#94234194, CASE WHEN ((std_ret#94233960 = NA) OR (std_ret#94233960 = null)) THEN null ELSE cast(std_ret#94233960 as float) END AS std_ret#94234196, CASE WHEN ((Sharpe_ret#94233961 = NA) OR (Sharpe_ret#94233961 = null)) THEN null ELSE cast(Sharpe_ret#94233961 as float) END AS Sharpe_ret#94234197, CASE WHEN ((PctPos_ret#94233962 = NA) OR (PctPos_ret#94233962 = null)) THEN null ELSE cast(PctPos_ret#94233962 as float) END AS PctPos_ret#94234199, CASE WHEN ((TR_ret#94233963 = NA) OR (TR_ret#94233963 = null)) THEN null ELSE cast(TR_ret#94233963 as float) END AS TR_ret#94234201, CASE WHEN ((IR_ret#94233964 = NA) OR (IR_ret#94233964 = null)) THEN null ELSE cast(IR_ret#94233964 as float) END AS IR_ret#94234203, CASE WHEN ((annual_resret#94233965 = NA) OR (annual_resret#94233965 = null)) THEN null ELSE cast(annual_resret#94233965 as float) END AS annual_resret#94234205, CASE WHEN ((std_resret#94233966 = NA) OR (std_resret#94233966 = null)) THEN null ELSE cast(std_resret#94233966 as float) END AS std_resret#94234207, CASE WHEN ((Sharpe_resret#94233967 = NA) OR (Sharpe_resret#94233967 = null)) THEN null ELSE cast(Sharpe_resret#94233967 as float) END AS Sharpe_resret#94234209, CASE WHEN ((PctPos_resret#94233968 = NA) OR (PctPos_resret#94233968 = null)) THEN null ELSE cast(PctPos_resret#94233968 as float) END AS PctPos_resret#94234211, CASE WHEN ((TR_resret#94233969 = NA) OR (TR_resret#94233969 = null)) THEN null ELSE cast(TR_resret#94233969 as float) END AS TR_resret#94234214, CASE WHEN ((IR_resret#94233970 = NA) OR (IR_resret#94233970 = null)) THEN null ELSE cast(IR_resret#94233970 as float) END AS IR_resret#94234216, CASE WHEN ((annual_retnet#94233971 = NA) OR (annual_retnet#94233971 = null)) THEN null ELSE cast(annual_retnet#94233971 as float) END AS annual_retnet#94234218, CASE WHEN ((std_retnet#94233972 = NA) OR (std_retnet#94233972 = null)) THEN null ELSE cast(std_retnet#94233972 as float) END AS std_retnet#94234220, CASE WHEN ((Sharpe_retnet#94233973 = NA) OR (Sharpe_retnet#94233973 = null)) THEN null ELSE cast(Sharpe_retnet#94233973 as float) END AS Sharpe_retnet#94234222, CASE WHEN ((PctPos_retnet#94233974 = NA) OR (PctPos_retnet#94233974 = null)) THEN null ELSE cast(PctPos_retnet#94233974 as float) END AS PctPos_retnet#94234224, CASE WHEN ((TR_retnet#94233975 = NA) OR (TR_retnet#94233975 = null)) THEN null ELSE cast(TR_retnet#94233975 as float) END AS TR_retnet#94234225, CASE WHEN ((IR_retnet#94233976 = NA) OR (IR_retnet#94233976 = null)) THEN null ELSE cast(IR_retnet#94233976 as float) END AS IR_retnet#94234227, CASE WHEN ((turnover#94233977 = NA) OR (turnover#94233977 = null)) THEN null ELSE cast(turnover#94233977 as float) END AS turnover#94234229] Input [25]: [year#94233953, retIC#94233954, resretIC#94233955, numcos#94233956, numdates#94233957, annual_bmret#94233958, annual_ret#94233959, std_ret#94233960, Sharpe_ret#94233961, PctPos_ret#94233962, TR_ret#94233963, IR_ret#94233964, annual_resret#94233965, std_resret#94233966, Sharpe_resret#94233967, PctPos_resret#94233968, TR_resret#94233969, IR_resret#94233970, annual_retnet#94233971, std_retnet#94233972, Sharpe_retnet#94233973, PctPos_retnet#94233974, TR_retnet#94233975, IR_retnet#94233976, turnover#94233977] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94234229, year#94234185] (8) Filter [codegen id : 1] Input [2]: [turnover#94234229, year#94234185] Condition : isnotnull(turnover#94234229) (9) Project [codegen id : 1] Output [3]: [year#94234185, turnover#94234229, (1.0 / cast(turnover#94234229 as double)) AS days_hold#94234268] Input [2]: [turnover#94234229, year#94234185] (10) Exchange Input [3]: [year#94234185, turnover#94234229, days_hold#94234268] Arguments: rangepartitioning(year#94234185 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7523675] (11) Sort [codegen id : 2] Input [3]: [year#94234185, turnover#94234229, days_hold#94234268] Arguments: [year#94234185 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94234185, turnover#94234229, days_hold#94234268] (13) CollectLimit Input [3]: [year#94234185, turnover#94234229, days_hold#94234268] Arguments: 1000000