Need to generate the final column list with only one join column.
The solution is to rename the join column in the second DF before joining.
def joinTwoDFs(df1:DataFrame, df2:DataFrame, joinKey: String, joinType: String): DataFrame = { val colList1 = df1.columns.toList val colList2 = df2.columns.toList val targetColList = colList1.filterNot( _ == joinKey) ++ colList2.filterNot( _ == joinKey) val joinKey2 = "tmp_" + joinKey val tmpDf2 = df2.withColumnRenamed(joinKey, joinKey2) val targetDf = df1.join(tmpDf2, dataset1(joinKey) === tmpDf2(joinKey2), joinType).select(joinKey, targetColList.toSeq : _*) targetDf }
No comments:
Post a Comment