From 68f1d9519f4ec50ebc5922360f33a8647c335b50 Mon Sep 17 00:00:00 2001 From: Pushpam Date: Thu, 6 Nov 2025 20:14:40 +0530 Subject: [PATCH] feat: remove duplicates --- app/etl/transform.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/etl/transform.py b/app/etl/transform.py index f2d2b33..ed7b936 100644 --- a/app/etl/transform.py +++ b/app/etl/transform.py @@ -22,11 +22,14 @@ def transform(df: pd.DataFrame) -> pd.DataFrame: # Handle duplicates initial_rows = len(df_transformed) - # TODO (Find & Fix): Duplicates are not removed + # Removing duplicates + df_transformed=df_transformed.drop_duplicates() + duplicates_removed = initial_rows - len(df_transformed) if duplicates_removed > 0: - # TODO (Find & Fix): Should log how many duplicates were removed - pass + # Number of duplicates removed + print(f"✅ Removed {duplicates_removed} duplicate rows.") + # Handle null values in numeric columns numeric_columns = df_transformed.select_dtypes(include=['number']).columns