== Physical Plan == CollectLimit (13) +- * ColumnarToRow (12) +- InMemoryTableScan (1) +- InMemoryRelation (2) +- * Sort (11) +- Exchange (10) +- * Project (9) +- * Filter (8) +- * ColumnarToRow (7) +- InMemoryTableScan (3) +- InMemoryRelation (4) +- * Project (6) +- Scan csv (5) (1) InMemoryTableScan Output [3]: [year#94300152, turnover#94300201, days_hold#94300233] Arguments: [year#94300152, turnover#94300201, days_hold#94300233] (2) InMemoryRelation Arguments: [year#94300152, turnover#94300201, days_hold#94300233], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(2) Sort [year#94300152 ASC NULLS FIRST], true, 0 +- Exchange rangepartitioning(year#94300152 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7529050] +- *(1) Project [year#94300152, turnover#94300201, (1.0 / cast(turnover#94300201 as double)) AS days_hold#94300233] +- *(1) Filter isnotnull(turnover#94300201) +- *(1) ColumnarToRow +- InMemoryTableScan [turnover#94300201, year#94300152], [isnotnull(turnover#94300201)] +- InMemoryRelation [year#94300152, retIC#94300154, resretIC#94300156, numcos#94300158, numdates#94300160, annual_bmret#94300162, annual_ret#94300164, std_ret#94300166, Sharpe_ret#94300168, PctPos_ret#94300171, TR_ret#94300173, IR_ret#94300175, annual_resret#94300177, std_resret#94300179, Sharpe_resret#94300181, PctPos_resret#94300183, TR_resret#94300185, IR_resret#94300187, annual_retnet#94300189, std_retnet#94300190, Sharpe_retnet#94300193, PctPos_retnet#94300194, TR_retnet#94300197, IR_retnet#94300199, turnover#94300201], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [CASE WHEN ((year#94299951 = NA) OR (year#94299951 = null)) THEN null ELSE cast(year#94299951 as float) END AS year#94300152, CASE WHEN ((retIC#94299952 = NA) OR (retIC#94299952 = null)) THEN null ELSE cast(retIC#94299952 as float) END AS retIC#94300154, CASE WHEN ((resretIC#94299953 = NA) OR (resretIC#94299953 = null)) THEN null ELSE cast(resretIC#94299953 as float) END AS resretIC#94300156, CASE WHEN ((numcos#94299954 = NA) OR (numcos#94299954 = null)) THEN null ELSE cast(numcos#94299954 as float) END AS numcos#94300158, CASE WHEN ((numdates#94299955 = NA) OR (numdates#94299955 = null)) THEN null ELSE cast(numdates#94299955 as int) END AS numdates#94300160, CASE WHEN ((annual_bmret#94299956 = NA) OR (annual_bmret#94299956 = null)) THEN null ELSE cast(annual_bmret#94299956 as float) END AS annual_bmret#94300162, CASE WHEN ((annual_ret#94299957 = NA) OR (annual_ret#94299957 = null)) THEN null ELSE cast(annual_ret#94299957 as float) END AS annual_ret#94300164, CASE WHEN ((std_ret#94299958 = NA) OR (std_ret#94299958 = null)) THEN null ELSE cast(std_ret#94299958 as float) END AS std_ret#94300166, CASE WHEN ((Sharpe_ret#94299959 = NA) OR (Sharpe_ret#94299959 = null)) THEN null ELSE cast(Sharpe_ret#94299959 as float) END AS Sharpe_ret#94300168, CASE WHEN ((PctPos_ret#94299960 = NA) OR (PctPos_ret#94299960 = null)) THEN null ELSE cast(PctPos_ret#94299960 as float) END AS PctPos_ret#94300171, CASE WHEN ((TR_ret#94299961 = NA) OR (TR_ret#94299961 = null)) THEN null ELSE cast(TR_ret#94299961 as float) END AS TR_ret#94300173, CASE WHEN ((IR_ret#94299962 = NA) OR (IR_ret#94299962 = null)) THEN null ELSE cast(IR_ret#94299962 as float) END AS IR_ret#94300175, CASE WHEN ((annual_resret#94299963 = NA) OR (annual_resret#94299963 = null)) THEN null ELSE cast(annual_resret#94299963 as float) END AS annual_resret#94300177, CASE WHEN ((std_resret#94299964 = NA) OR (std_resret#94299964 = null)) THEN null ELSE cast(std_resret#94299964 as float) END AS std_resret#94300179, CASE WHEN ((Sharpe_resret#94299965 = NA) OR (Sharpe_resret#94299965 = null)) THEN null ELSE cast(Sharpe_resret#94299965 as float) END AS Sharpe_resret#94300181, CASE WHEN ((PctPos_resret#94299966 = NA) OR (PctPos_resret#94299966 = null)) THEN null ELSE cast(PctPos_resret#94299966 as float) END AS PctPos_resret#94300183, CASE WHEN ((TR_resret#94299967 = NA) OR (TR_resret#94299967 = null)) THEN null ELSE cast(TR_resret#94299967 as float) END AS TR_resret#94300185, CASE WHEN ((IR_resret#94299968 = NA) OR (IR_resret#94299968 = null)) THEN null ELSE cast(IR_resret#94299968 as float) END AS IR_resret#94300187, CASE WHEN ((annual_retnet#94299969 = NA) OR (annual_retnet#94299969 = null)) THEN null ELSE cast(annual_retnet#94299969 as float) END AS annual_retnet#94300189, CASE WHEN ((std_retnet#94299970 = NA) OR (std_retnet#94299970 = null)) THEN null ELSE cast(std_retnet#94299970 as float) END AS std_retnet#94300190, CASE WHEN ((Sharpe_retnet#94299971 = NA) OR (Sharpe_retnet#94299971 = null)) THEN null ELSE cast(Sharpe_retnet#94299971 as float) END AS Sharpe_retnet#94300193, CASE WHEN ((PctPos_retnet#94299972 = NA) OR (PctPos_retnet#94299972 = null)) THEN null ELSE cast(PctPos_retnet#94299972 as float) END AS PctPos_retnet#94300194, CASE WHEN ((TR_retnet#94299973 = NA) OR (TR_retnet#94299973 = null)) THEN null ELSE cast(TR_retnet#94299973 as float) END AS TR_retnet#94300197, CASE WHEN ((IR_retnet#94299974 = NA) OR (IR_retnet#94299974 = null)) THEN null ELSE cast(IR_retnet#94299974 as float) END AS IR_retnet#94300199, CASE WHEN ((turnover#94299975 = NA) OR (turnover#94299975 = null)) THEN null ELSE cast(turnover#94299975 as float) END AS turnover#94300201] +- FileScan csv [year#94299951,retIC#94299952,resretIC#94299953,numcos#94299954,numdates#94299955,annual_bmret#94299956,annual_ret#94299957,std_ret#94299958,Sharpe_ret#94299959,PctPos_ret#94299960,TR_ret#94299961,IR_ret#94299962,annual_resret#94299963,std_resret#94299964,Sharpe_resret#94299965,PctPos_resret#94299966,TR_resret#94299967,IR_resret#94299968,annual_retnet#94299969,std_retnet#94299970,Sharpe_retnet#94299971,PctPos_retnet#94299972,TR_retnet#94299973,IR_retnet#94299974,turnover#94299975] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/esg_innovation/innovat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None), [year#94300152 ASC NULLS FIRST] (3) InMemoryTableScan Output [2]: [turnover#94300201, year#94300152] Arguments: [turnover#94300201, year#94300152], [isnotnull(turnover#94300201)] (4) InMemoryRelation Arguments: [year#94300152, retIC#94300154, resretIC#94300156, numcos#94300158, numdates#94300160, annual_bmret#94300162, annual_ret#94300164, std_ret#94300166, Sharpe_ret#94300168, PctPos_ret#94300171, TR_ret#94300173, IR_ret#94300175, annual_resret#94300177, std_resret#94300179, Sharpe_resret#94300181, PctPos_resret#94300183, TR_resret#94300185, IR_resret#94300187, annual_retnet#94300189, std_retnet#94300190, Sharpe_retnet#94300193, PctPos_retnet#94300194, TR_retnet#94300197, IR_retnet#94300199, turnover#94300201], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@208e3fd9,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [CASE WHEN ((year#94299951 = NA) OR (year#94299951 = null)) THEN null ELSE cast(year#94299951 as float) END AS year#94300152, CASE WHEN ((retIC#94299952 = NA) OR (retIC#94299952 = null)) THEN null ELSE cast(retIC#94299952 as float) END AS retIC#94300154, CASE WHEN ((resretIC#94299953 = NA) OR (resretIC#94299953 = null)) THEN null ELSE cast(resretIC#94299953 as float) END AS resretIC#94300156, CASE WHEN ((numcos#94299954 = NA) OR (numcos#94299954 = null)) THEN null ELSE cast(numcos#94299954 as float) END AS numcos#94300158, CASE WHEN ((numdates#94299955 = NA) OR (numdates#94299955 = null)) THEN null ELSE cast(numdates#94299955 as int) END AS numdates#94300160, CASE WHEN ((annual_bmret#94299956 = NA) OR (annual_bmret#94299956 = null)) THEN null ELSE cast(annual_bmret#94299956 as float) END AS annual_bmret#94300162, CASE WHEN ((annual_ret#94299957 = NA) OR (annual_ret#94299957 = null)) THEN null ELSE cast(annual_ret#94299957 as float) END AS annual_ret#94300164, CASE WHEN ((std_ret#94299958 = NA) OR (std_ret#94299958 = null)) THEN null ELSE cast(std_ret#94299958 as float) END AS std_ret#94300166, CASE WHEN ((Sharpe_ret#94299959 = NA) OR (Sharpe_ret#94299959 = null)) THEN null ELSE cast(Sharpe_ret#94299959 as float) END AS Sharpe_ret#94300168, CASE WHEN ((PctPos_ret#94299960 = NA) OR (PctPos_ret#94299960 = null)) THEN null ELSE cast(PctPos_ret#94299960 as float) END AS PctPos_ret#94300171, CASE WHEN ((TR_ret#94299961 = NA) OR (TR_ret#94299961 = null)) THEN null ELSE cast(TR_ret#94299961 as float) END AS TR_ret#94300173, CASE WHEN ((IR_ret#94299962 = NA) OR (IR_ret#94299962 = null)) THEN null ELSE cast(IR_ret#94299962 as float) END AS IR_ret#94300175, CASE WHEN ((annual_resret#94299963 = NA) OR (annual_resret#94299963 = null)) THEN null ELSE cast(annual_resret#94299963 as float) END AS annual_resret#94300177, CASE WHEN ((std_resret#94299964 = NA) OR (std_resret#94299964 = null)) THEN null ELSE cast(std_resret#94299964 as float) END AS std_resret#94300179, CASE WHEN ((Sharpe_resret#94299965 = NA) OR (Sharpe_resret#94299965 = null)) THEN null ELSE cast(Sharpe_resret#94299965 as float) END AS Sharpe_resret#94300181, CASE WHEN ((PctPos_resret#94299966 = NA) OR (PctPos_resret#94299966 = null)) THEN null ELSE cast(PctPos_resret#94299966 as float) END AS PctPos_resret#94300183, CASE WHEN ((TR_resret#94299967 = NA) OR (TR_resret#94299967 = null)) THEN null ELSE cast(TR_resret#94299967 as float) END AS TR_resret#94300185, CASE WHEN ((IR_resret#94299968 = NA) OR (IR_resret#94299968 = null)) THEN null ELSE cast(IR_resret#94299968 as float) END AS IR_resret#94300187, CASE WHEN ((annual_retnet#94299969 = NA) OR (annual_retnet#94299969 = null)) THEN null ELSE cast(annual_retnet#94299969 as float) END AS annual_retnet#94300189, CASE WHEN ((std_retnet#94299970 = NA) OR (std_retnet#94299970 = null)) THEN null ELSE cast(std_retnet#94299970 as float) END AS std_retnet#94300190, CASE WHEN ((Sharpe_retnet#94299971 = NA) OR (Sharpe_retnet#94299971 = null)) THEN null ELSE cast(Sharpe_retnet#94299971 as float) END AS Sharpe_retnet#94300193, CASE WHEN ((PctPos_retnet#94299972 = NA) OR (PctPos_retnet#94299972 = null)) THEN null ELSE cast(PctPos_retnet#94299972 as float) END AS PctPos_retnet#94300194, CASE WHEN ((TR_retnet#94299973 = NA) OR (TR_retnet#94299973 = null)) THEN null ELSE cast(TR_retnet#94299973 as float) END AS TR_retnet#94300197, CASE WHEN ((IR_retnet#94299974 = NA) OR (IR_retnet#94299974 = null)) THEN null ELSE cast(IR_retnet#94299974 as float) END AS IR_retnet#94300199, CASE WHEN ((turnover#94299975 = NA) OR (turnover#94299975 = null)) THEN null ELSE cast(turnover#94299975 as float) END AS turnover#94300201] +- FileScan csv [year#94299951,retIC#94299952,resretIC#94299953,numcos#94299954,numdates#94299955,annual_bmret#94299956,annual_ret#94299957,std_ret#94299958,Sharpe_ret#94299959,PctPos_ret#94299960,TR_ret#94299961,IR_ret#94299962,annual_resret#94299963,std_resret#94299964,Sharpe_resret#94299965,PctPos_resret#94299966,TR_resret#94299967,IR_resret#94299968,annual_retnet#94299969,std_retnet#94299970,Sharpe_retnet#94299971,PctPos_retnet#94299972,TR_retnet#94299973,IR_retnet#94299974,turnover#94299975] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/srv/plusamp/data/default/ea-market/output/esg_innovation/innovat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string... ,None) (5) Scan csv Output [25]: [year#94299951, retIC#94299952, resretIC#94299953, numcos#94299954, numdates#94299955, annual_bmret#94299956, annual_ret#94299957, std_ret#94299958, Sharpe_ret#94299959, PctPos_ret#94299960, TR_ret#94299961, IR_ret#94299962, annual_resret#94299963, std_resret#94299964, Sharpe_resret#94299965, PctPos_resret#94299966, TR_resret#94299967, IR_resret#94299968, annual_retnet#94299969, std_retnet#94299970, Sharpe_retnet#94299971, PctPos_retnet#94299972, TR_retnet#94299973, IR_retnet#94299974, turnover#94299975] Batched: false Location: InMemoryFileIndex [file:/srv/plusamp/data/default/ea-market/output/esg_innovation/innovation/stats_year.csv] ReadSchema: struct<year:string,retIC:string,resretIC:string,numcos:string,numdates:string,annual_bmret:string,annual_ret:string,std_ret:string,Sharpe_ret:string,PctPos_ret:string,TR_ret:string,IR_ret:string,annual_resret:string,std_resret:string,Sharpe_resret:string,PctPos_resret:string,TR_resret:string,IR_resret:string,annual_retnet:string,std_retnet:string,Sharpe_retnet:string,PctPos_retnet:string,TR_retnet:string,IR_retnet:string,turnover:string> (6) Project [codegen id : 1] Output [25]: [CASE WHEN ((year#94299951 = NA) OR (year#94299951 = null)) THEN null ELSE cast(year#94299951 as float) END AS year#94300152, CASE WHEN ((retIC#94299952 = NA) OR (retIC#94299952 = null)) THEN null ELSE cast(retIC#94299952 as float) END AS retIC#94300154, CASE WHEN ((resretIC#94299953 = NA) OR (resretIC#94299953 = null)) THEN null ELSE cast(resretIC#94299953 as float) END AS resretIC#94300156, CASE WHEN ((numcos#94299954 = NA) OR (numcos#94299954 = null)) THEN null ELSE cast(numcos#94299954 as float) END AS numcos#94300158, CASE WHEN ((numdates#94299955 = NA) OR (numdates#94299955 = null)) THEN null ELSE cast(numdates#94299955 as int) END AS numdates#94300160, CASE WHEN ((annual_bmret#94299956 = NA) OR (annual_bmret#94299956 = null)) THEN null ELSE cast(annual_bmret#94299956 as float) END AS annual_bmret#94300162, CASE WHEN ((annual_ret#94299957 = NA) OR (annual_ret#94299957 = null)) THEN null ELSE cast(annual_ret#94299957 as float) END AS annual_ret#94300164, CASE WHEN ((std_ret#94299958 = NA) OR (std_ret#94299958 = null)) THEN null ELSE cast(std_ret#94299958 as float) END AS std_ret#94300166, CASE WHEN ((Sharpe_ret#94299959 = NA) OR (Sharpe_ret#94299959 = null)) THEN null ELSE cast(Sharpe_ret#94299959 as float) END AS Sharpe_ret#94300168, CASE WHEN ((PctPos_ret#94299960 = NA) OR (PctPos_ret#94299960 = null)) THEN null ELSE cast(PctPos_ret#94299960 as float) END AS PctPos_ret#94300171, CASE WHEN ((TR_ret#94299961 = NA) OR (TR_ret#94299961 = null)) THEN null ELSE cast(TR_ret#94299961 as float) END AS TR_ret#94300173, CASE WHEN ((IR_ret#94299962 = NA) OR (IR_ret#94299962 = null)) THEN null ELSE cast(IR_ret#94299962 as float) END AS IR_ret#94300175, CASE WHEN ((annual_resret#94299963 = NA) OR (annual_resret#94299963 = null)) THEN null ELSE cast(annual_resret#94299963 as float) END AS annual_resret#94300177, CASE WHEN ((std_resret#94299964 = NA) OR (std_resret#94299964 = null)) THEN null ELSE cast(std_resret#94299964 as float) END AS std_resret#94300179, CASE WHEN ((Sharpe_resret#94299965 = NA) OR (Sharpe_resret#94299965 = null)) THEN null ELSE cast(Sharpe_resret#94299965 as float) END AS Sharpe_resret#94300181, CASE WHEN ((PctPos_resret#94299966 = NA) OR (PctPos_resret#94299966 = null)) THEN null ELSE cast(PctPos_resret#94299966 as float) END AS PctPos_resret#94300183, CASE WHEN ((TR_resret#94299967 = NA) OR (TR_resret#94299967 = null)) THEN null ELSE cast(TR_resret#94299967 as float) END AS TR_resret#94300185, CASE WHEN ((IR_resret#94299968 = NA) OR (IR_resret#94299968 = null)) THEN null ELSE cast(IR_resret#94299968 as float) END AS IR_resret#94300187, CASE WHEN ((annual_retnet#94299969 = NA) OR (annual_retnet#94299969 = null)) THEN null ELSE cast(annual_retnet#94299969 as float) END AS annual_retnet#94300189, CASE WHEN ((std_retnet#94299970 = NA) OR (std_retnet#94299970 = null)) THEN null ELSE cast(std_retnet#94299970 as float) END AS std_retnet#94300190, CASE WHEN ((Sharpe_retnet#94299971 = NA) OR (Sharpe_retnet#94299971 = null)) THEN null ELSE cast(Sharpe_retnet#94299971 as float) END AS Sharpe_retnet#94300193, CASE WHEN ((PctPos_retnet#94299972 = NA) OR (PctPos_retnet#94299972 = null)) THEN null ELSE cast(PctPos_retnet#94299972 as float) END AS PctPos_retnet#94300194, CASE WHEN ((TR_retnet#94299973 = NA) OR (TR_retnet#94299973 = null)) THEN null ELSE cast(TR_retnet#94299973 as float) END AS TR_retnet#94300197, CASE WHEN ((IR_retnet#94299974 = NA) OR (IR_retnet#94299974 = null)) THEN null ELSE cast(IR_retnet#94299974 as float) END AS IR_retnet#94300199, CASE WHEN ((turnover#94299975 = NA) OR (turnover#94299975 = null)) THEN null ELSE cast(turnover#94299975 as float) END AS turnover#94300201] Input [25]: [year#94299951, retIC#94299952, resretIC#94299953, numcos#94299954, numdates#94299955, annual_bmret#94299956, annual_ret#94299957, std_ret#94299958, Sharpe_ret#94299959, PctPos_ret#94299960, TR_ret#94299961, IR_ret#94299962, annual_resret#94299963, std_resret#94299964, Sharpe_resret#94299965, PctPos_resret#94299966, TR_resret#94299967, IR_resret#94299968, annual_retnet#94299969, std_retnet#94299970, Sharpe_retnet#94299971, PctPos_retnet#94299972, TR_retnet#94299973, IR_retnet#94299974, turnover#94299975] (7) ColumnarToRow [codegen id : 1] Input [2]: [turnover#94300201, year#94300152] (8) Filter [codegen id : 1] Input [2]: [turnover#94300201, year#94300152] Condition : isnotnull(turnover#94300201) (9) Project [codegen id : 1] Output [3]: [year#94300152, turnover#94300201, (1.0 / cast(turnover#94300201 as double)) AS days_hold#94300233] Input [2]: [turnover#94300201, year#94300152] (10) Exchange Input [3]: [year#94300152, turnover#94300201, days_hold#94300233] Arguments: rangepartitioning(year#94300152 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [id=#7529050] (11) Sort [codegen id : 2] Input [3]: [year#94300152, turnover#94300201, days_hold#94300233] Arguments: [year#94300152 ASC NULLS FIRST], true, 0 (12) ColumnarToRow [codegen id : 1] Input [3]: [year#94300152, turnover#94300201, days_hold#94300233] (13) CollectLimit Input [3]: [year#94300152, turnover#94300201, days_hold#94300233] Arguments: 1000000