== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94181196, turnover#94181245, days_hold#94181278] Arguments: [year#94181196, turnover#94181245, days_hold#94181278] (2) InMemoryRelation Arguments: [year#94181196, turnover#94181245, days_hold#94181278], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94181196 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94181196 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7519407] +- *(1) Project [year#94181196, turnover#94181245, (1.0 / cast(turnover#94181245 as double)) AS days_hold#94181278] +- *(1) Filter isnotnull(turnover#94181245) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94181245, year#94181196], [isnotnull(turnover#94181245)] +- InMemoryRelation [year#94181196, retIC#94181198, resretIC#94181200, numcos#94181202, numdates#94181204, annual_bmret#94181206, annual_ret#94181208, std_ret#94181210, Sharpe_ret#94181212, PctPos_ret#94181214, TR_ret#94181216, IR_ret#94181218, annual_resret#94181220, std_resret#94181222, Sharpe_resret#94181224, PctPos_resret#94181226, TR_resret#94181229, IR_resret#94181231, annual_retnet#94181233, std_retnet#94181235, Sharpe_retnet#94181237, PctPos_retnet#94181239, TR_retnet#94181241, IR_retnet#94181243, turnover#94181245], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94180963 = NA) OR (year#94180963 = null)) THEN null ELSE cast(year#94180963 as int) END AS year#94181196, CASE WHEN ((retIC#94180964 = NA) OR (retIC#94180964 = null)) THEN null ELSE cast(retIC#94180964 as float) END AS retIC#94181198, CASE WHEN ((resretIC#94180965 = NA) OR (resretIC#94180965 = null)) THEN null ELSE cast(resretIC#94180965 as float) END AS resretIC#94181200, CASE WHEN ((numcos#94180966 = NA) OR (numcos#94180966 = null)) THEN null ELSE cast(numcos#94180966 as float) END AS numcos#94181202, CASE WHEN ((numdates#94180967 = NA) OR (numdates#94180967 = null)) THEN null ELSE cast(numdates#94180967 as int) END AS numdates#94181204, CASE WHEN ((annual_bmret#94180968 = NA) OR (annual_bmret#94180968 = null)) THEN null ELSE cast(annual_bmret#94180968 as float) END AS annual_bmret#94181206, CASE WHEN ((annual_ret#94180969 = NA) OR (annual_ret#94180969 = null)) THEN null ELSE cast(annual_ret#94180969 as float) END AS annual_ret#94181208, CASE WHEN ((std_ret#94180970 = NA) OR (std_ret#94180970 = null)) THEN null ELSE cast(std_ret#94180970 as float) END AS std_ret#94181210, CASE WHEN ((Sharpe_ret#94180971 = NA) OR (Sharpe_ret#94180971 = null)) THEN null ELSE cast(Sharpe_ret#94180971 as float) END AS Sharpe_ret#94181212, CASE WHEN ((PctPos_ret#94180972 = NA) OR (PctPos_ret#94180972 = null)) THEN null ELSE cast(PctPos_ret#94180972 as float) END AS PctPos_ret#94181214, CASE WHEN ((TR_ret#94180973 = NA) OR (TR_ret#94180973 = null)) THEN null ELSE cast(TR_ret#94180973 as float) END AS TR_ret#94181216, CASE WHEN ((IR_ret#94180974 = NA) OR (IR_ret#94180974 = null)) THEN null ELSE cast(IR_ret#94180974 as float) END AS IR_ret#94181218, CASE WHEN ((annual_resret#94180975 = NA) OR (annual_resret#94180975 = null)) THEN null ELSE cast(annual_resret#94180975 as float) END AS annual_resret#94181220, CASE WHEN ((std_resret#94180976 = NA) OR (std_resret#94180976 = null)) THEN null ELSE cast(std_resret#94180976 as float) END AS std_resret#94181222, CASE WHEN ((Sharpe_resret#94180977 = NA) OR (Sharpe_resret#94180977 = null)) THEN null ELSE cast(Sharpe_resret#94180977 as float) END AS Sharpe_resret#94181224, CASE WHEN ((PctPos_resret#94180978 = NA) OR (PctPos_resret#94180978 = null)) THEN null ELSE cast(PctPos_resret#94180978 as float) END AS PctPos_resret#94181226, CASE WHEN ((TR_resret#94180979 = NA) OR (TR_resret#94180979 = null)) THEN null ELSE cast(TR_resret#94180979 as float) END AS TR_resret#94181229, CASE WHEN ((IR_resret#94180980 = NA) OR (IR_resret#94180980 = null)) THEN null ELSE cast(IR_resret#94180980 as float) END AS IR_resret#94181231, CASE WHEN ((annual_retnet#94180981 = NA) OR (annual_retnet#94180981 = null)) THEN null ELSE cast(annual_retnet#94180981 as float) END AS annual_retnet#94181233, CASE WHEN ((std_retnet#94180982 = NA) OR (std_retnet#94180982 = null)) THEN null ELSE cast(std_retnet#94180982 as float) END AS std_retnet#94181235, CASE WHEN ((Sharpe_retnet#94180983 = NA) OR (Sharpe_retnet#94180983 = null)) THEN null ELSE cast(Sharpe_retnet#94180983 as float) END AS Sharpe_retnet#94181237, CASE WHEN ((PctPos_retnet#94180984 = NA) OR (PctPos_retnet#94180984 = null)) THEN null ELSE cast(PctPos_retnet#94180984 as float) END AS PctPos_retnet#94181239, CASE WHEN ((TR_retnet#94180985 = NA) OR (TR_retnet#94180985 = null)) THEN null ELSE cast(TR_retnet#94180985 as float) END AS TR_retnet#94181241, CASE WHEN ((IR_retnet#94180986 = NA) OR (IR_retnet#94180986 = null)) THEN null ELSE cast(IR_retnet#94180986 as float) END AS IR_retnet#94181243, CASE WHEN ((turnover#94180987 = NA) OR (turnover#94180987 = null)) THEN null ELSE cast(turnover#94180987 as float) END AS turnover#94181245] +- FileScan csv [year#94180963,retIC#94180964,resretIC#94180965,numcos#94180966,numdates#94180967,annual_bmret#94180968,annual_ret#94180969,std_ret#94180970,Sharpe_ret#94180971,PctPos_ret#94180972,TR_ret#94180973,IR_ret#94180974,annual_resret#94180975,std_resret#94180976,Sharpe_resret#94180977,PctPos_resret#94180978,TR_resret#94180979,IR_resret#94180980,annual_retnet#94180981,std_retnet#94180982,Sharpe_retnet#94180983,PctPos_retnet#94180984,TR_retnet#94180985,IR_retnet#94180986,turnover#94180987] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/volatilit..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94181196 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94181245, year#94181196] Arguments: [turnover#94181245, year#94181196], [isnotnull(turnover#94181245)] (4) InMemoryRelation Arguments: [year#94181196, retIC#94181198, resretIC#94181200, numcos#94181202, numdates#94181204, annual_bmret#94181206, annual_ret#94181208, std_ret#94181210, Sharpe_ret#94181212, PctPos_ret#94181214, TR_ret#94181216, IR_ret#94181218, annual_resret#94181220, std_resret#94181222, Sharpe_resret#94181224, PctPos_resret#94181226, TR_resret#94181229, IR_resret#94181231, annual_retnet#94181233, std_retnet#94181235, Sharpe_retnet#94181237, PctPos_retnet#94181239, TR_retnet#94181241, IR_retnet#94181243, turnover#94181245], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94180963 = NA) OR (year#94180963 = null)) THEN null ELSE cast(year#94180963 as int) END AS year#94181196, CASE WHEN ((retIC#94180964 = NA) OR (retIC#94180964 = null)) THEN null ELSE cast(retIC#94180964 as float) END AS retIC#94181198, CASE WHEN ((resretIC#94180965 = NA) OR (resretIC#94180965 = null)) THEN null ELSE cast(resretIC#94180965 as float) END AS resretIC#94181200, CASE WHEN ((numcos#94180966 = NA) OR (numcos#94180966 = null)) THEN null ELSE cast(numcos#94180966 as float) END AS numcos#94181202, CASE WHEN ((numdates#94180967 = NA) OR (numdates#94180967 = null)) THEN null ELSE cast(numdates#94180967 as int) END AS numdates#94181204, CASE WHEN ((annual_bmret#94180968 = NA) OR (annual_bmret#94180968 = null)) THEN null ELSE cast(annual_bmret#94180968 as float) END AS annual_bmret#94181206, CASE WHEN ((annual_ret#94180969 = NA) OR (annual_ret#94180969 = null)) THEN null ELSE cast(annual_ret#94180969 as float) END AS annual_ret#94181208, CASE WHEN ((std_ret#94180970 = NA) OR (std_ret#94180970 = null)) THEN null ELSE cast(std_ret#94180970 as float) END AS std_ret#94181210, CASE WHEN ((Sharpe_ret#94180971 = NA) OR (Sharpe_ret#94180971 = null)) THEN null ELSE cast(Sharpe_ret#94180971 as float) END AS Sharpe_ret#94181212, CASE WHEN ((PctPos_ret#94180972 = NA) OR (PctPos_ret#94180972 = null)) THEN null ELSE cast(PctPos_ret#94180972 as float) END AS PctPos_ret#94181214, CASE WHEN ((TR_ret#94180973 = NA) OR (TR_ret#94180973 = null)) THEN null ELSE cast(TR_ret#94180973 as float) END AS TR_ret#94181216, CASE WHEN ((IR_ret#94180974 = NA) OR (IR_ret#94180974 = null)) THEN null ELSE cast(IR_ret#94180974 as float) END AS IR_ret#94181218, CASE WHEN ((annual_resret#94180975 = NA) OR (annual_resret#94180975 = null)) THEN null ELSE cast(annual_resret#94180975 as float) END AS annual_resret#94181220, CASE WHEN ((std_resret#94180976 = NA) OR (std_resret#94180976 = null)) THEN null ELSE cast(std_resret#94180976 as float) END AS std_resret#94181222, CASE WHEN ((Sharpe_resret#94180977 = NA) OR (Sharpe_resret#94180977 = null)) THEN null ELSE cast(Sharpe_resret#94180977 as float) END AS Sharpe_resret#94181224, CASE WHEN ((PctPos_resret#94180978 = NA) OR (PctPos_resret#94180978 = null)) THEN null ELSE cast(PctPos_resret#94180978 as float) END AS PctPos_resret#94181226, CASE WHEN ((TR_resret#94180979 = NA) OR (TR_resret#94180979 = null)) THEN null ELSE cast(TR_resret#94180979 as float) END AS TR_resret#94181229, CASE WHEN ((IR_resret#94180980 = NA) OR (IR_resret#94180980 = null)) THEN null ELSE cast(IR_resret#94180980 as float) END AS IR_resret#94181231, CASE WHEN ((annual_retnet#94180981 = NA) OR (annual_retnet#94180981 = null)) THEN null ELSE cast(annual_retnet#94180981 as float) END AS annual_retnet#94181233, CASE WHEN ((std_retnet#94180982 = NA) OR (std_retnet#94180982 = null)) THEN null ELSE cast(std_retnet#94180982 as float) END AS std_retnet#94181235, CASE WHEN ((Sharpe_retnet#94180983 = NA) OR (Sharpe_retnet#94180983 = null)) THEN null ELSE cast(Sharpe_retnet#94180983 as float) END AS Sharpe_retnet#94181237, CASE WHEN ((PctPos_retnet#94180984 = NA) OR (PctPos_retnet#94180984 = null)) THEN null ELSE cast(PctPos_retnet#94180984 as float) END AS PctPos_retnet#94181239, CASE WHEN ((TR_retnet#94180985 = NA) OR (TR_retnet#94180985 = null)) THEN null ELSE cast(TR_retnet#94180985 as float) END AS TR_retnet#94181241, CASE WHEN ((IR_retnet#94180986 = NA) OR (IR_retnet#94180986 = null)) THEN null ELSE cast(IR_retnet#94180986 as float) END AS IR_retnet#94181243, CASE WHEN ((turnover#94180987 = NA) OR (turnover#94180987 = null)) THEN null ELSE cast(turnover#94180987 as float) END AS turnover#94181245] +- FileScan csv [year#94180963,retIC#94180964,resretIC#94180965,numcos#94180966,numdates#94180967,annual_bmret#94180968,annual_ret#94180969,std_ret#94180970,Sharpe_ret#94180971,PctPos_ret#94180972,TR_ret#94180973,IR_ret#94180974,annual_resret#94180975,std_resret#94180976,Sharpe_resret#94180977,PctPos_resret#94180978,TR_resret#94180979,IR_resret#94180980,annual_retnet#94180981,std_retnet#94180982,Sharpe_retnet#94180983,PctPos_retnet#94180984,TR_retnet#94180985,IR_retnet#94180986,turnover#94180987] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/volatilit..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94180963, retIC#94180964, resretIC#94180965, numcos#94180966, numdates#94180967, annual_bmret#94180968, annual_ret#94180969, std_ret#94180970, Sharpe_ret#94180971, PctPos_ret#94180972, TR_ret#94180973, IR_ret#94180974, annual_resret#94180975, std_resret#94180976, Sharpe_resret#94180977, PctPos_resret#94180978, TR_resret#94180979, IR_resret#94180980, annual_retnet#94180981, std_retnet#94180982, Sharpe_retnet#94180983, PctPos_retnet#94180984, TR_retnet#94180985, IR_retnet#94180986, turnover#94180987] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/volatility/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94180963 = NA) OR (year#94180963 = null)) THEN null ELSE cast(year#94180963 as int) END AS year#94181196, CASE WHEN ((retIC#94180964 = NA) OR (retIC#94180964 = null)) THEN null ELSE cast(retIC#94180964 as float) END AS retIC#94181198, CASE WHEN ((resretIC#94180965 = NA) OR (resretIC#94180965 = null)) THEN null ELSE cast(resretIC#94180965 as float) END AS resretIC#94181200, CASE WHEN ((numcos#94180966 = NA) OR (numcos#94180966 = null)) THEN null ELSE cast(numcos#94180966 as float) END AS numcos#94181202, CASE WHEN ((numdates#94180967 = NA) OR (numdates#94180967 = null)) THEN null ELSE cast(numdates#94180967 as int) END AS numdates#94181204, CASE WHEN ((annual_bmret#94180968 = NA) OR (annual_bmret#94180968 = null)) THEN null ELSE cast(annual_bmret#94180968 as float) END AS annual_bmret#94181206, CASE WHEN ((annual_ret#94180969 = NA) OR (annual_ret#94180969 = null)) THEN null ELSE cast(annual_ret#94180969 as float) END AS annual_ret#94181208, CASE WHEN ((std_ret#94180970 = NA) OR (std_ret#94180970 = null)) THEN null ELSE cast(std_ret#94180970 as float) END AS std_ret#94181210, CASE WHEN ((Sharpe_ret#94180971 = NA) OR (Sharpe_ret#94180971 = null)) THEN null ELSE cast(Sharpe_ret#94180971 as float) END AS Sharpe_ret#94181212, CASE WHEN ((PctPos_ret#94180972 = NA) OR (PctPos_ret#94180972 = null)) THEN null ELSE cast(PctPos_ret#94180972 as float) END AS PctPos_ret#94181214, CASE WHEN ((TR_ret#94180973 = NA) OR (TR_ret#94180973 = null)) THEN null ELSE cast(TR_ret#94180973 as float) END AS TR_ret#94181216, CASE WHEN ((IR_ret#94180974 = NA) OR (IR_ret#94180974 = null)) THEN null ELSE cast(IR_ret#94180974 as float) END AS IR_ret#94181218, CASE WHEN ((annual_resret#94180975 = NA) OR (annual_resret#94180975 = null)) THEN null ELSE cast(annual_resret#94180975 as float) END AS annual_resret#94181220, CASE WHEN ((std_resret#94180976 = NA) OR (std_resret#94180976 = null)) THEN null ELSE cast(std_resret#94180976 as float) END AS std_resret#94181222, CASE WHEN ((Sharpe_resret#94180977 = NA) OR (Sharpe_resret#94180977 = null)) THEN null ELSE cast(Sharpe_resret#94180977 as float) END AS Sharpe_resret#94181224, CASE WHEN ((PctPos_resret#94180978 = NA) OR (PctPos_resret#94180978 = null)) THEN null ELSE cast(PctPos_resret#94180978 as float) END AS PctPos_resret#94181226, CASE WHEN ((TR_resret#94180979 = NA) OR (TR_resret#94180979 = null)) THEN null ELSE cast(TR_resret#94180979 as float) END AS TR_resret#94181229, CASE WHEN ((IR_resret#94180980 = NA) OR (IR_resret#94180980 = null)) THEN null ELSE cast(IR_resret#94180980 as float) END AS IR_resret#94181231, CASE WHEN ((annual_retnet#94180981 = NA) OR (annual_retnet#94180981 = null)) THEN null ELSE cast(annual_retnet#94180981 as float) END AS annual_retnet#94181233, CASE WHEN ((std_retnet#94180982 = NA) OR (std_retnet#94180982 = null)) THEN null ELSE cast(std_retnet#94180982 as float) END AS std_retnet#94181235, CASE WHEN ((Sharpe_retnet#94180983 = NA) OR (Sharpe_retnet#94180983 = null)) THEN null ELSE cast(Sharpe_retnet#94180983 as float) END AS Sharpe_retnet#94181237, CASE WHEN ((PctPos_retnet#94180984 = NA) OR (PctPos_retnet#94180984 = null)) THEN null ELSE cast(PctPos_retnet#94180984 as float) END AS PctPos_retnet#94181239, CASE WHEN ((TR_retnet#94180985 = NA) OR (TR_retnet#94180985 = null)) THEN null ELSE cast(TR_retnet#94180985 as float) END AS TR_retnet#94181241, CASE WHEN ((IR_retnet#94180986 = NA) OR (IR_retnet#94180986 = null)) THEN null ELSE cast(IR_retnet#94180986 as float) END AS IR_retnet#94181243, CASE WHEN ((turnover#94180987 = NA) OR (turnover#94180987 = null)) THEN null ELSE cast(turnover#94180987 as float) END AS turnover#94181245] Input [25]: [year#94180963, retIC#94180964, resretIC#94180965, numcos#94180966, numdates#94180967, annual_bmret#94180968, annual_ret#94180969, std_ret#94180970, Sharpe_ret#94180971, PctPos_ret#94180972, TR_ret#94180973, IR_ret#94180974, annual_resret#94180975, std_resret#94180976, Sharpe_resret#94180977, PctPos_resret#94180978, TR_resret#94180979, IR_resret#94180980, annual_retnet#94180981, std_retnet#94180982, Sharpe_retnet#94180983, PctPos_retnet#94180984, TR_retnet#94180985, IR_retnet#94180986, turnover#94180987] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94181245, year#94181196] (8) Filter [codegen id : 1] Input [2]: [turnover#94181245, year#94181196] Condition : isnotnull(turnover#94181245) (9) Project [codegen id : 1] Output [3]: [year#94181196, turnover#94181245, (1.0 / cast(turnover#94181245 as double)) AS days_hold#94181278] Input [2]: [turnover#94181245, year#94181196] (10) Exchange Input [3]: [year#94181196, turnover#94181245, days_hold#94181278] Arguments: rangepartitioning(year#94181196 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7519407] (11) Sort [codegen id : 2] Input [3]: [year#94181196, turnover#94181245, days_hold#94181278] Arguments: [year#94181196 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94181196, turnover#94181245, days_hold#94181278] (13) CollectLimit Input [3]: [year#94181196, turnover#94181245, days_hold#94181278] Arguments: 1000000