Need to generate the final column list with only one join column.
The solution is to rename the join column in the second DF before joining.
def joinTwoDFs(df1:DataFrame, df2:DataFrame, joinKey: String, joinType: String): DataFrame = {
val colList1 = df1.columns.toList
val colList2 = df2.columns.toList
val targetColList = colList1.filterNot( _ == joinKey) ++ colList2.filterNot( _ == joinKey)
val joinKey2 = "tmp_" + joinKey
val tmpDf2 = df2.withColumnRenamed(joinKey, joinKey2)
val targetDf = df1.join(tmpDf2, dataset1(joinKey) === tmpDf2(joinKey2),
joinType).select(joinKey, targetColList.toSeq : _*)
targetDf
}
No comments:
Post a Comment