# inner join
df1.merge(df2, on=['col1', 'col2'], suffixes=('_df1', '_df2')) # works for one-to-one and one-to-amy relationships
# multiple merges
df1.merge(df2, on='col1').merge(df3, on='col1')
# left join
df1.merge(df2, on='col1', how='left')
# different columns
df1.merge(df2, left_on='col1', right_on='col2')
# self join
# for example to show hierarchical or sequential relationships
df1.merge(df1, left_on='col1', right_on='col2', how='left', suffixes=('_str1', '_str2'))
# multi index
df1.merge(df2, left_on='id', left_index=True, right_on='id2', right_index=True)
# semi-join
# filter df1 by what's in df2
semi-join = df1.merge(df2, on='id')
df1[df1['id'].isin(semi-join['id'])]
# validating
# can use validate='one-to-one' to check whether it's true or false
# also 'one-to-many', 'many-to-many'
Comments