From b6386b665ba4f82d9b88bc72c5d14f5395e8ab6c Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Mon, 16 Sep 2024 17:17:50 -0400 Subject: [PATCH] test: Add test_top_value_matches() --- tests/test_api.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index 6982ae0..952de09 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -341,3 +341,55 @@ def test_top_matches_and_match_values_integration(): assert "similarity" in df.columns assert df.attrs["source"] == "fruits" assert df.attrs["target"] in ["fruit_types", "fruit_names", "fruit_id"] + + +def test_top_value_matches(): + # given + df_source = pd.DataFrame({"fruits": ["Applee", "Bananaa", "Oorange", "Strawberry"]}) + df_target = pd.DataFrame( + { + "fruit_names": [ + "apple", + "red apple", + "banana", + "mx banana", + "melon", + "kiwi", + "grapes", + ], + "fruit_id": ["1", "2", "3", "4", "5", "6", "7"], + } + ) + column_mapping = ("fruits", "fruit_names") + # when + matches = bdi.top_value_matches(df_source, df_target, column_mapping) + + # then + assert len(matches) == 4 # number of dataframes in the list + + # when + df_match = matches[0] # top matches for apple + + # then + assert len(df_match) == 2 + assert "source" in df_match.columns + assert "target" in df_match.columns + assert "similarity" in df_match.columns + + # when + df_match = matches[1] # top matches for banana + + # then + assert len(df_match) == 2 + assert "source" in df_match.columns + assert "target" in df_match.columns + assert "similarity" in df_match.columns + + # when + df_match = matches[2] # top matches for orange + + # then + assert len(df_match) == 1 + assert "source" in df_match.columns + assert "target" in df_match.columns + assert "similarity" in df_match.columns