== Physical Plan == CollectLimit (11) +- * ColumnarToRow (10) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [7]: [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205] Arguments: [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205] (2) InMemoryRelation Arguments: [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205] +- *(1) Filter (isnotnull(cap#94100197) AND (cap#94100197 = 0.0)) +- *(1) ColumnarToRow +- InMemoryTableScan [cap#94100197, growth#94100203, leverage#94100205, momentum#94100209, size#94100200, value#94100202, volatility#94100207, yield#94100210], [isnotnull(cap#94100197), (cap#94100197 = 0.0)] +- InMemoryRelation [overall#94100196, cap#94100197, retIC#94100198, resretIC#94100199, size#94100200, value#94100202, growth#94100203, leverage#94100205, volatility#94100207, momentum#94100209, yield#94100210, numcos#94100212, numdates#94100214, annual_bmret#94100216, annual_ret#94100218, std_ret#94100220, Sharpe_ret#94100221, PctPos_ret#94100223, TR_ret#94100225, IR_ret#94100226, annual_resret#94100228, std_resret#94100230, Sharpe_resret#94100232, PctPos_resret#94100234, ... 9 more fields], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((overall#94099964 = NA) OR (overall#94099964 = null)) THEN null ELSE cast(overall#94099964 as int) END AS overall#94100196, CASE WHEN ((cap#94099965 = NA) OR (cap#94099965 = null)) THEN null ELSE cast(cap#94099965 as float) END AS cap#94100197, CASE WHEN ((retIC#94099966 = NA) OR (retIC#94099966 = null)) THEN null ELSE cast(retIC#94099966 as float) END AS retIC#94100198, CASE WHEN ((resretIC#94099967 = NA) OR (resretIC#94099967 = null)) THEN null ELSE cast(resretIC#94099967 as float) END AS resretIC#94100199, CASE WHEN ((size#94099968 = NA) OR (size#94099968 = null)) THEN null ELSE cast(size#94099968 as float) END AS size#94100200, CASE WHEN ((value#94099969 = NA) OR (value#94099969 = null)) THEN null ELSE cast(value#94099969 as float) END AS value#94100202, CASE WHEN ((growth#94099970 = NA) OR (growth#94099970 = null)) THEN null ELSE cast(growth#94099970 as float) END AS growth#94100203, CASE WHEN ((leverage#94099971 = NA) OR (leverage#94099971 = null)) THEN null ELSE cast(leverage#94099971 as float) END AS leverage#94100205, CASE WHEN ((volatility#94099972 = NA) OR (volatility#94099972 = null)) THEN null ELSE cast(volatility#94099972 as float) END AS volatility#94100207, CASE WHEN ((momentum#94099973 = NA) OR (momentum#94099973 = null)) THEN null ELSE cast(momentum#94099973 as float) END AS momentum#94100209, CASE WHEN ((yield#94099974 = NA) OR (yield#94099974 = null)) THEN null ELSE cast(yield#94099974 as float) END AS yield#94100210, CASE WHEN ((numcos#94099975 = NA) OR (numcos#94099975 = null)) THEN null ELSE cast(numcos#94099975 as float) END AS numcos#94100212, CASE WHEN ((numdates#94099976 = NA) OR (numdates#94099976 = null)) THEN null ELSE cast(numdates#94099976 as float) END AS numdates#94100214, CASE WHEN ((annual_bmret#94099977 = NA) OR (annual_bmret#94099977 = null)) THEN null ELSE cast(annual_bmret#94099977 as float) END AS annual_bmret#94100216, CASE WHEN ((annual_ret#94099978 = NA) OR (annual_ret#94099978 = null)) THEN null ELSE cast(annual_ret#94099978 as float) END AS annual_ret#94100218, CASE WHEN ((std_ret#94099979 = NA) OR (std_ret#94099979 = null)) THEN null ELSE cast(std_ret#94099979 as float) END AS std_ret#94100220, CASE WHEN ((Sharpe_ret#94099980 = NA) OR (Sharpe_ret#94099980 = null)) THEN null ELSE cast(Sharpe_ret#94099980 as float) END AS Sharpe_ret#94100221, CASE WHEN ((PctPos_ret#94099981 = NA) OR (PctPos_ret#94099981 = null)) THEN null ELSE cast(PctPos_ret#94099981 as float) END AS PctPos_ret#94100223, CASE WHEN ((TR_ret#94099982 = NA) OR (TR_ret#94099982 = null)) THEN null ELSE cast(TR_ret#94099982 as float) END AS TR_ret#94100225, CASE WHEN ((IR_ret#94099983 = NA) OR (IR_ret#94099983 = null)) THEN null ELSE cast(IR_ret#94099983 as float) END AS IR_ret#94100226, CASE WHEN ((annual_resret#94099984 = NA) OR (annual_resret#94099984 = null)) THEN null ELSE cast(annual_resret#94099984 as float) END AS annual_resret#94100228, CASE WHEN ((std_resret#94099985 = NA) OR (std_resret#94099985 = null)) THEN null ELSE cast(std_resret#94099985 as float) END AS std_resret#94100230, CASE WHEN ((Sharpe_resret#94099986 = NA) OR (Sharpe_resret#94099986 = null)) THEN null ELSE cast(Sharpe_resret#94099986 as float) END AS Sharpe_resret#94100232, CASE WHEN ((PctPos_resret#94099987 = NA) OR (PctPos_resret#94099987 = null)) THEN null ELSE cast(PctPos_resret#94099987 as float) END AS PctPos_resret#94100234, ... 9 more fields] +- FileScan csv [overall#94099964,cap#94099965,retIC#94099966,resretIC#94099967,size#94099968,value#94099969,growth#94099970,leverage#94099971,volatility#94099972,momentum#94099973,yield#94099974,numcos#94099975,numdates#94099976,annual_bmret#94099977,annual_ret#94099978,std_ret#94099979,Sharpe_ret#94099980,PctPos_ret#94099981,TR_ret#94099982,IR_ret#94099983,annual_resret#94099984,std_resret#94099985,Sharpe_resret#94099986,PctPos_resret#94099987,... 9 more fields] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/yield/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<overall:string,cap:string,retIC:string,resretIC:string,size:string,value:string,growth:str... ,None) (3) InMemoryTableScan Output [8]: [cap#94100197, growth#94100203, leverage#94100205, momentum#94100209, size#94100200, value#94100202, volatility#94100207, yield#94100210] Arguments: [cap#94100197, growth#94100203, leverage#94100205, momentum#94100209, size#94100200, value#94100202, volatility#94100207, yield#94100210], [isnotnull(cap#94100197), (cap#94100197 = 0.0)] (4) InMemoryRelation Arguments: [overall#94100196, cap#94100197, retIC#94100198, resretIC#94100199, size#94100200, value#94100202, growth#94100203, leverage#94100205, volatility#94100207, momentum#94100209, yield#94100210, numcos#94100212, numdates#94100214, annual_bmret#94100216, annual_ret#94100218, std_ret#94100220, Sharpe_ret#94100221, PctPos_ret#94100223, TR_ret#94100225, IR_ret#94100226, annual_resret#94100228, std_resret#94100230, Sharpe_resret#94100232, PctPos_resret#94100234, ... 9 more fields], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((overall#94099964 = NA) OR (overall#94099964 = null)) THEN null ELSE cast(overall#94099964 as int) END AS overall#94100196, CASE WHEN ((cap#94099965 = NA) OR (cap#94099965 = null)) THEN null ELSE cast(cap#94099965 as float) END AS cap#94100197, CASE WHEN ((retIC#94099966 = NA) OR (retIC#94099966 = null)) THEN null ELSE cast(retIC#94099966 as float) END AS retIC#94100198, CASE WHEN ((resretIC#94099967 = NA) OR (resretIC#94099967 = null)) THEN null ELSE cast(resretIC#94099967 as float) END AS resretIC#94100199, CASE WHEN ((size#94099968 = NA) OR (size#94099968 = null)) THEN null ELSE cast(size#94099968 as float) END AS size#94100200, CASE WHEN ((value#94099969 = NA) OR (value#94099969 = null)) THEN null ELSE cast(value#94099969 as float) END AS value#94100202, CASE WHEN ((growth#94099970 = NA) OR (growth#94099970 = null)) THEN null ELSE cast(growth#94099970 as float) END AS growth#94100203, CASE WHEN ((leverage#94099971 = NA) OR (leverage#94099971 = null)) THEN null ELSE cast(leverage#94099971 as float) END AS leverage#94100205, CASE WHEN ((volatility#94099972 = NA) OR (volatility#94099972 = null)) THEN null ELSE cast(volatility#94099972 as float) END AS volatility#94100207, CASE WHEN ((momentum#94099973 = NA) OR (momentum#94099973 = null)) THEN null ELSE cast(momentum#94099973 as float) END AS momentum#94100209, CASE WHEN ((yield#94099974 = NA) OR (yield#94099974 = null)) THEN null ELSE cast(yield#94099974 as float) END AS yield#94100210, CASE WHEN ((numcos#94099975 = NA) OR (numcos#94099975 = null)) THEN null ELSE cast(numcos#94099975 as float) END AS numcos#94100212, CASE WHEN ((numdates#94099976 = NA) OR (numdates#94099976 = null)) THEN null ELSE cast(numdates#94099976 as float) END AS numdates#94100214, CASE WHEN ((annual_bmret#94099977 = NA) OR (annual_bmret#94099977 = null)) THEN null ELSE cast(annual_bmret#94099977 as float) END AS annual_bmret#94100216, CASE WHEN ((annual_ret#94099978 = NA) OR (annual_ret#94099978 = null)) THEN null ELSE cast(annual_ret#94099978 as float) END AS annual_ret#94100218, CASE WHEN ((std_ret#94099979 = NA) OR (std_ret#94099979 = null)) THEN null ELSE cast(std_ret#94099979 as float) END AS std_ret#94100220, CASE WHEN ((Sharpe_ret#94099980 = NA) OR (Sharpe_ret#94099980 = null)) THEN null ELSE cast(Sharpe_ret#94099980 as float) END AS Sharpe_ret#94100221, CASE WHEN ((PctPos_ret#94099981 = NA) OR (PctPos_ret#94099981 = null)) THEN null ELSE cast(PctPos_ret#94099981 as float) END AS PctPos_ret#94100223, CASE WHEN ((TR_ret#94099982 = NA) OR (TR_ret#94099982 = null)) THEN null ELSE cast(TR_ret#94099982 as float) END AS TR_ret#94100225, CASE WHEN ((IR_ret#94099983 = NA) OR (IR_ret#94099983 = null)) THEN null ELSE cast(IR_ret#94099983 as float) END AS IR_ret#94100226, CASE WHEN ((annual_resret#94099984 = NA) OR (annual_resret#94099984 = null)) THEN null ELSE cast(annual_resret#94099984 as float) END AS annual_resret#94100228, CASE WHEN ((std_resret#94099985 = NA) OR (std_resret#94099985 = null)) THEN null ELSE cast(std_resret#94099985 as float) END AS std_resret#94100230, CASE WHEN ((Sharpe_resret#94099986 = NA) OR (Sharpe_resret#94099986 = null)) THEN null ELSE cast(Sharpe_resret#94099986 as float) END AS Sharpe_resret#94100232, CASE WHEN ((PctPos_resret#94099987 = NA) OR (PctPos_resret#94099987 = null)) THEN null ELSE cast(PctPos_resret#94099987 as float) END AS PctPos_resret#94100234, ... 9 more fields] +- FileScan csv [overall#94099964,cap#94099965,retIC#94099966,resretIC#94099967,size#94099968,value#94099969,growth#94099970,leverage#94099971,volatility#94099972,momentum#94099973,yield#94099974,numcos#94099975,numdates#94099976,annual_bmret#94099977,annual_ret#94099978,std_ret#94099979,Sharpe_ret#94099980,PctPos_ret#94099981,TR_ret#94099982,IR_ret#94099983,annual_resret#94099984,std_resret#94099985,Sharpe_resret#94099986,PctPos_resret#94099987,... 9 more fields] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/risk_factors/yield/sta..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<overall:string,cap:string,retIC:string,resretIC:string,size:string,value:string,growth:str... ,None) (5) Scan csv Output [33]: [overall#94099964, cap#94099965, retIC#94099966, resretIC#94099967, size#94099968, value#94099969, growth#94099970, leverage#94099971, volatility#94099972, momentum#94099973, yield#94099974, numcos#94099975, numdates#94099976, annual_bmret#94099977, annual_ret#94099978, std_ret#94099979, Sharpe_ret#94099980, PctPos_ret#94099981, TR_ret#94099982, IR_ret#94099983, annual_resret#94099984, std_resret#94099985, Sharpe_resret#94099986, PctPos_resret#94099987, TR_resret#94099988, IR_resret#94099989, annual_retnet#94099990, std_retnet#94099991, Sharpe_retnet#94099992, PctPos_retnet#94099993, TR_retnet#94099994, IR_retnet#94099995, turnover#94099996] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/risk_factors/yield/stats_overall.csv] ReadSchema: struct<overall:string,cap:string,retIC:string,resretIC:string,size:string,value:string,growth:string,leverage:string,volatility:string,momentum:string,yield:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [33]: [CASE WHEN ((overall#94099964 = NA) OR (overall#94099964 = null)) THEN null ELSE cast(overall#94099964 as int) END AS overall#94100196, CASE WHEN ((cap#94099965 = NA) OR (cap#94099965 = null)) THEN null ELSE cast(cap#94099965 as float) END AS cap#94100197, CASE WHEN ((retIC#94099966 = NA) OR (retIC#94099966 = null)) THEN null ELSE cast(retIC#94099966 as float) END AS retIC#94100198, CASE WHEN ((resretIC#94099967 = NA) OR (resretIC#94099967 = null)) THEN null ELSE cast(resretIC#94099967 as float) END AS resretIC#94100199, CASE WHEN ((size#94099968 = NA) OR (size#94099968 = null)) THEN null ELSE cast(size#94099968 as float) END AS size#94100200, CASE WHEN ((value#94099969 = NA) OR (value#94099969 = null)) THEN null ELSE cast(value#94099969 as float) END AS value#94100202, CASE WHEN ((growth#94099970 = NA) OR (growth#94099970 = null)) THEN null ELSE cast(growth#94099970 as float) END AS growth#94100203, CASE WHEN ((leverage#94099971 = NA) OR (leverage#94099971 = null)) THEN null ELSE cast(leverage#94099971 as float) END AS leverage#94100205, CASE WHEN ((volatility#94099972 = NA) OR (volatility#94099972 = null)) THEN null ELSE cast(volatility#94099972 as float) END AS volatility#94100207, CASE WHEN ((momentum#94099973 = NA) OR (momentum#94099973 = null)) THEN null ELSE cast(momentum#94099973 as float) END AS momentum#94100209, CASE WHEN ((yield#94099974 = NA) OR (yield#94099974 = null)) THEN null ELSE cast(yield#94099974 as float) END AS yield#94100210, CASE WHEN ((numcos#94099975 = NA) OR (numcos#94099975 = null)) THEN null ELSE cast(numcos#94099975 as float) END AS numcos#94100212, CASE WHEN ((numdates#94099976 = NA) OR (numdates#94099976 = null)) THEN null ELSE cast(numdates#94099976 as float) END AS numdates#94100214, CASE WHEN ((annual_bmret#94099977 = NA) OR (annual_bmret#94099977 = null)) THEN null ELSE cast(annual_bmret#94099977 as float) END AS annual_bmret#94100216, CASE WHEN ((annual_ret#94099978 = NA) OR (annual_ret#94099978 = null)) THEN null ELSE cast(annual_ret#94099978 as float) END AS annual_ret#94100218, CASE WHEN ((std_ret#94099979 = NA) OR (std_ret#94099979 = null)) THEN null ELSE cast(std_ret#94099979 as float) END AS std_ret#94100220, CASE WHEN ((Sharpe_ret#94099980 = NA) OR (Sharpe_ret#94099980 = null)) THEN null ELSE cast(Sharpe_ret#94099980 as float) END AS Sharpe_ret#94100221, CASE WHEN ((PctPos_ret#94099981 = NA) OR (PctPos_ret#94099981 = null)) THEN null ELSE cast(PctPos_ret#94099981 as float) END AS PctPos_ret#94100223, CASE WHEN ((TR_ret#94099982 = NA) OR (TR_ret#94099982 = null)) THEN null ELSE cast(TR_ret#94099982 as float) END AS TR_ret#94100225, CASE WHEN ((IR_ret#94099983 = NA) OR (IR_ret#94099983 = null)) THEN null ELSE cast(IR_ret#94099983 as float) END AS IR_ret#94100226, CASE WHEN ((annual_resret#94099984 = NA) OR (annual_resret#94099984 = null)) THEN null ELSE cast(annual_resret#94099984 as float) END AS annual_resret#94100228, CASE WHEN ((std_resret#94099985 = NA) OR (std_resret#94099985 = null)) THEN null ELSE cast(std_resret#94099985 as float) END AS std_resret#94100230, CASE WHEN ((Sharpe_resret#94099986 = NA) OR (Sharpe_resret#94099986 = null)) THEN null ELSE cast(Sharpe_resret#94099986 as float) END AS Sharpe_resret#94100232, CASE WHEN ((PctPos_resret#94099987 = NA) OR (PctPos_resret#94099987 = null)) THEN null ELSE cast(PctPos_resret#94099987 as float) END AS PctPos_resret#94100234, CASE WHEN ((TR_resret#94099988 = NA) OR (TR_resret#94099988 = null)) THEN null ELSE cast(TR_resret#94099988 as float) END AS TR_resret#94100235, CASE WHEN ((IR_resret#94099989 = NA) OR (IR_resret#94099989 = null)) THEN null ELSE cast(IR_resret#94099989 as float) END AS IR_resret#94100237, CASE WHEN ((annual_retnet#94099990 = NA) OR (annual_retnet#94099990 = null)) THEN null ELSE cast(annual_retnet#94099990 as float) END AS annual_retnet#94100239, CASE WHEN ((std_retnet#94099991 = NA) OR (std_retnet#94099991 = null)) THEN null ELSE cast(std_retnet#94099991 as float) END AS std_retnet#94100241, CASE WHEN ((Sharpe_retnet#94099992 = NA) OR (Sharpe_retnet#94099992 = null)) THEN null ELSE cast(Sharpe_retnet#94099992 as float) END AS Sharpe_retnet#94100242, CASE WHEN ((PctPos_retnet#94099993 = NA) OR (PctPos_retnet#94099993 = null)) THEN null ELSE cast(PctPos_retnet#94099993 as float) END AS PctPos_retnet#94100244, CASE WHEN ((TR_retnet#94099994 = NA) OR (TR_retnet#94099994 = null)) THEN null ELSE cast(TR_retnet#94099994 as float) END AS TR_retnet#94100246, CASE WHEN ((IR_retnet#94099995 = NA) OR (IR_retnet#94099995 = null)) THEN null ELSE cast(IR_retnet#94099995 as float) END AS IR_retnet#94100247, CASE WHEN ((turnover#94099996 = NA) OR (turnover#94099996 = null)) THEN null ELSE cast(turnover#94099996 as float) END AS turnover#94100249] Input [33]: [overall#94099964, cap#94099965, retIC#94099966, resretIC#94099967, size#94099968, value#94099969, growth#94099970, leverage#94099971, volatility#94099972, momentum#94099973, yield#94099974, numcos#94099975, numdates#94099976, annual_bmret#94099977, annual_ret#94099978, std_ret#94099979, Sharpe_ret#94099980, PctPos_ret#94099981, TR_ret#94099982, IR_ret#94099983, annual_resret#94099984, std_resret#94099985, Sharpe_resret#94099986, PctPos_resret#94099987, TR_resret#94099988, IR_resret#94099989, annual_retnet#94099990, std_retnet#94099991, Sharpe_retnet#94099992, PctPos_retnet#94099993, TR_retnet#94099994, IR_retnet#94099995, turnover#94099996] (7) ColumnarToRow [codegen id : 1] Input [8]: [cap#94100197, growth#94100203, leverage#94100205, momentum#94100209, size#94100200, value#94100202, volatility#94100207, yield#94100210] (8) Filter [codegen id : 1] Input [8]: [cap#94100197, growth#94100203, leverage#94100205, momentum#94100209, size#94100200, value#94100202, volatility#94100207, yield#94100210] Condition : (isnotnull(cap#94100197) AND (cap#94100197 = 0.0)) (9) Project [codegen id : 1] Output [7]: [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205] Input [8]: [cap#94100197, growth#94100203, leverage#94100205, momentum#94100209, size#94100200, value#94100202, volatility#94100207, yield#94100210] (10) ColumnarToRow [codegen id : 1] Input [7]: [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205] (11) CollectLimit Input [7]: [yield#94100210, volatility#94100207, momentum#94100209, size#94100200, value#94100202, growth#94100203, leverage#94100205] Arguments: 1000000