From 405f3b160e5b7083f3a2ba6e3a1589a11fe65728 Mon Sep 17 00:00:00 2001
From: Naved Ansari <naved001@gmail.com>
Date: Mon, 2 Mar 2026 15:16:06 -0500
Subject: [PATCH 1/5] Move loading metrics metadata and data into separate
 functions.

Use MetricsMetadata dataclass to pass metrics metrics data.
---
 openshift_metrics/merge.py | 163 +++++++++++++++++++++++--------------
 1 file changed, 104 insertions(+), 59 deletions(-)

diff --git a/openshift_metrics/merge.py b/openshift_metrics/merge.py
index ea01980..67f1a4c 100644
--- a/openshift_metrics/merge.py
+++ b/openshift_metrics/merge.py
@@ -7,8 +7,9 @@
 import argparse
 from datetime import datetime, UTC, timedelta
 import json
-from typing import Tuple
+from typing import List, Tuple
 from decimal import Decimal
+from dataclasses import dataclass
 from nerc_rates import rates, outages
 
 from openshift_metrics import utils, invoice
@@ -19,6 +20,102 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass
+class MetricsMetadata:
+    cluster_name: str
+    report_start_date: str
+    report_end_date: str
+    interval_minutes: int
+
+    @property
+    def report_month(self) -> str:
+        return datetime.strftime(
+            datetime.strptime(self.report_start_date, "%Y-%m-%d"), "%Y-%m"
+        )
+
+def load_metrics_metadata(files: List[str]) -> MetricsMetadata:
+    """
+    Load only the metadata from the metrics files.
+
+    We need to load the metadata before the actual data because we need to know
+    interval_minutes and ensure that it's the same for all the files.
+    """
+    cluster_name = None
+    report_start_date = None
+    report_end_date = None
+    interval_minutes = None
+
+    for file in files:
+        with open(file, "r") as jsonfile:
+            metrics_from_file = json.load(jsonfile)
+            if cluster_name is None:
+                cluster_name = metrics_from_file.get("cluster_name")
+
+            if interval_minutes is None:
+                interval_minutes = metrics_from_file.get("interval_minutes")
+            else:
+                interval_minutes_from_file = metrics_from_file["interval_minutes"]
+                if interval_minutes != interval_minutes_from_file:
+                    sys.exit(
+                        f"Cannot process files with different intervals {interval_minutes} != {interval_minutes_from_file}"
+                    )
+
+            if report_start_date is None:
+                report_start_date = metrics_from_file["start_date"]
+            elif compare_dates(metrics_from_file["start_date"], report_start_date):
+                report_start_date = metrics_from_file["start_date"]
+
+            if report_end_date is None:
+                report_end_date = metrics_from_file["end_date"]
+            elif compare_dates(report_end_date, metrics_from_file["end_date"]):
+                report_end_date = metrics_from_file["end_date"]
+
+    if cluster_name is None:
+        cluster_name = "Unknown Cluster"
+
+    if interval_minutes is None:
+        logger.info(
+            f"No prometheus query interval minutes found in the given set of files. Using the provided interval: {PROM_QUERY_INTERVAL_MINUTES} minute(s)"
+        )
+        interval_minutes = PROM_QUERY_INTERVAL_MINUTES
+    else:
+        logger.info(
+            f"Prometheus Query interval set to {interval_minutes} minute(s) from file"
+        )
+
+    if report_start_date is None or report_end_date is None:
+        sys.exit(
+            "Could not find report start date or end date in the given set of files."
+        )
+
+    return MetricsMetadata(
+        cluster_name=cluster_name,
+        report_start_date=report_start_date,
+        report_end_date=report_end_date,
+        interval_minutes=interval_minutes,
+    )
+
+
+def load_and_merge_metrics(interval_minutes, files: List[str]) -> MetricsProcessor:
+    """Load and merge metrics
+
+    Loads metrics from provided json files and then returns a processor
+    that has all the merged data.
+    """
+    processor = MetricsProcessor(interval_minutes)
+    for file in files:
+        with open(file, "r") as jsonfile:
+            metrics_from_file = json.load(jsonfile)
+            cpu_request_metrics = metrics_from_file["cpu_metrics"]
+            memory_request_metrics = metrics_from_file["memory_metrics"]
+            gpu_request_metrics = metrics_from_file.get("gpu_metrics", None)
+            processor.merge_metrics("cpu_request", cpu_request_metrics)
+            processor.merge_metrics("memory_request", memory_request_metrics)
+            if gpu_request_metrics is not None:
+                processor.merge_metrics("gpu_request", gpu_request_metrics)
+    logger.info(f"Total metric files read: {len(files)}")
+    return processor
+
 def compare_dates(date_str1, date_str2):
     """Returns true is date1 is earlier than date2"""
     date1 = datetime.strptime(date_str1, "%Y-%m-%d")
@@ -104,66 +201,14 @@ def main():
     args = parser.parse_args()
     files = args.files
 
-    report_start_date = None
-    report_end_date = None
-    cluster_name = None
-    interval_minutes = None
+    metrics_metadata = load_metrics_metadata(files)
 
-    for file in files:
-        with open(file, "r") as jsonfile:
-            metrics_from_file = json.load(jsonfile)
-            if interval_minutes is None:
-                interval_minutes = metrics_from_file.get("interval_minutes")
-            else:
-                interval_minutes_from_file = metrics_from_file["interval_minutes"]
-                if interval_minutes != interval_minutes_from_file:
-                    sys.exit(
-                        f"Cannot process files with different intervals {interval_minutes} != {interval_minutes_from_file}"
-                    )
+    report_start_date = metrics_metadata.report_start_date
+    report_end_date = metrics_metadata.report_end_date
+    cluster_name = metrics_metadata.cluster_name
+    report_month = metrics_metadata.report_month
 
-    if interval_minutes is None:
-        logger.info(
-            f"No prometheus query interval minutes found in the given set of files. Using the provided interval: {PROM_QUERY_INTERVAL_MINUTES} minute(s)"
-        )
-        interval_minutes = PROM_QUERY_INTERVAL_MINUTES
-    else:
-        logger.info(
-            f"Prometheus Query interval set to {interval_minutes} minute(s) from file"
-        )
-
-    processor = MetricsProcessor(interval_minutes)
-
-    for file in files:
-        with open(file, "r") as jsonfile:
-            metrics_from_file = json.load(jsonfile)
-            if cluster_name is None:
-                cluster_name = metrics_from_file.get("cluster_name")
-            cpu_request_metrics = metrics_from_file["cpu_metrics"]
-            memory_request_metrics = metrics_from_file["memory_metrics"]
-            gpu_request_metrics = metrics_from_file.get("gpu_metrics", None)
-            processor.merge_metrics("cpu_request", cpu_request_metrics)
-            processor.merge_metrics("memory_request", memory_request_metrics)
-            if gpu_request_metrics is not None:
-                processor.merge_metrics("gpu_request", gpu_request_metrics)
-
-            if report_start_date is None:
-                report_start_date = metrics_from_file["start_date"]
-            elif compare_dates(metrics_from_file["start_date"], report_start_date):
-                report_start_date = metrics_from_file["start_date"]
-
-            if report_end_date is None:
-                report_end_date = metrics_from_file["end_date"]
-            elif compare_dates(report_end_date, metrics_from_file["end_date"]):
-                report_end_date = metrics_from_file["end_date"]
-
-    if cluster_name is None:
-        cluster_name = "Unknown Cluster"
-
-    logger.info(f"Total metric files read: {len(files)}")
-
-    report_month = datetime.strftime(
-        datetime.strptime(report_start_date, "%Y-%m-%d"), "%Y-%m"
-    )
+    processor = load_and_merge_metrics(metrics_metadata.interval_minutes, files)
 
     if args.use_nerc_rates:
         logger.info("Using nerc rates for rates and outages")

From b3a480a116b9eb8b3b24cbb8700d21779db7eecb Mon Sep 17 00:00:00 2001
From: Naved Ansari <naved001@gmail.com>
Date: Mon, 2 Mar 2026 15:17:19 -0500
Subject: [PATCH 2/5] Use cluster names for local files

---
 openshift_metrics/merge.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openshift_metrics/merge.py b/openshift_metrics/merge.py
index 67f1a4c..6ba5168 100644
--- a/openshift_metrics/merge.py
+++ b/openshift_metrics/merge.py
@@ -243,17 +243,17 @@ def main():
     if args.invoice_file:
         invoice_file = args.invoice_file
     else:
-        invoice_file = f"NERC OpenShift {report_month}.csv"
+        invoice_file = f"{cluster_name} {report_month}.csv"
 
     if args.class_invoice_file:
         class_invoice_file = args.class_invoice_file
     else:
-        class_invoice_file = f"NERC OpenShift Classes {report_month}.csv"
+        class_invoice_file = f"Classes-{cluster_name} {report_month}.csv"
 
     if args.pod_report_file:
         pod_report_file = args.pod_report_file
     else:
-        pod_report_file = f"Pod NERC OpenShift {report_month}.csv"
+        pod_report_file = f"Pod-{cluster_name} {report_month}.csv"
 
     report_start_date = datetime.strptime(report_start_date, "%Y-%m-%d").replace(
         tzinfo=UTC

From 94f16852565ce63bad594d13008a7ebe566ddd3f Mon Sep 17 00:00:00 2001
From: Naved Ansari <naved001@gmail.com>
Date: Tue, 3 Mar 2026 10:34:46 -0500
Subject: [PATCH 3/5] Some more refactoring

Move tasks out of main into their own functions.

Slightly rearrange the order of operations in main.
---
 openshift_metrics/merge.py | 177 ++++++++++++++++++-------------------
 1 file changed, 88 insertions(+), 89 deletions(-)

diff --git a/openshift_metrics/merge.py b/openshift_metrics/merge.py
index 6ba5168..6eebfd0 100644
--- a/openshift_metrics/merge.py
+++ b/openshift_metrics/merge.py
@@ -33,6 +33,14 @@ def report_month(self) -> str:
             datetime.strptime(self.report_start_date, "%Y-%m-%d"), "%Y-%m"
         )
 
+    @property
+    def start_time_utc(self) -> datetime:
+        return datetime.strptime(self.report_start_date, "%Y-%m-%d").replace(tzinfo=UTC)
+
+    @property
+    def end_time_utc(self) -> datetime:
+        return datetime.strptime(self.report_end_date, "%Y-%m-%d").replace(tzinfo=UTC)
+
 def load_metrics_metadata(files: List[str]) -> MetricsMetadata:
     """
     Load only the metadata from the metrics files.
@@ -164,21 +172,80 @@ def get_su_definitions(report_month) -> dict:
     return su_definitions
 
 
+def get_rates_and_outages(args: argparse.Namespace, meta: MetricsMetadata) -> Tuple[invoice.Rates, list]:
+    if args.use_nerc_rates:
+        logger.info("Using nerc rates for rates and outages")
+        rates_data = rates.load_from_url()
+        invoice_rates = invoice.Rates(
+            cpu=rates_data.get_value_at("CPU SU Rate", meta.report_month, Decimal),
+            gpu_a100=rates_data.get_value_at("GPUA100 SU Rate", meta.report_month, Decimal),
+            gpu_a100sxm4=rates_data.get_value_at(
+                "GPUA100SXM4 SU Rate", meta.report_month, Decimal
+            ),
+            gpu_v100=rates_data.get_value_at("GPUV100 SU Rate", meta.report_month, Decimal),
+            gpu_h100=rates_data.get_value_at("GPUH100 SU Rate", meta.report_month, Decimal),
+        )
+        outage_data = outages.load_from_url()
+        ignore_hours = outage_data.get_outages_during(
+            meta.report_start_date, meta.report_end_date, meta.cluster_name
+        )
+    else:
+        invoice_rates = invoice.Rates(
+            cpu=Decimal(args.rate_cpu_su),
+            gpu_a100=Decimal(args.rate_gpu_a100_su),
+            gpu_a100sxm4=Decimal(args.rate_gpu_a100sxm4_su),
+            gpu_v100=Decimal(args.rate_gpu_v100_su),
+            gpu_h100=Decimal(args.rate_gpu_h100_su),
+        )
+        ignore_hours = args.ignore_hours
+
+    if bool(ignore_hours):
+        for start_time, end_time in ignore_hours:
+            logger.info(f"{start_time} to {end_time} will be excluded from the invoice")
+
+    return invoice_rates, ignore_hours
+
+
+def upload_reports_to_s3(report_metadata: invoice.ReportMetadata, invoice_file: str, pod_report_file: str, class_invoice_file: str):
+    report_month = report_metadata.report_month
+    cluster_name = report_metadata.cluster_name
+    report_date = report_metadata.report_end_time.strftime("%Y-%m-%d")
+    timestamp = report_metadata.generated_at.strftime("%Y%m%dT%H%M%SZ")
+
+    archive_root = f"Invoices/{report_month}/Archive"
+    main_root = f"Invoices/{report_month}/Service Invoices"
+
+    primary_location = f"{main_root}/{cluster_name} {report_month}.csv"
+    daily_report_location = f"{main_root}/{cluster_name} {report_date}.csv"
+    secondary_location = f"{archive_root}/{cluster_name} {report_month} {timestamp}.csv"
+    pod_report_location = f"{archive_root}/Pod-{cluster_name} {report_month} {timestamp}.csv"
+    class_invoice_location = f"{archive_root}/Class-{cluster_name} {report_month} {timestamp}.csv"
+
+    for file, location in [
+        (invoice_file, primary_location),
+        (invoice_file, daily_report_location),
+        (invoice_file, secondary_location),
+        (pod_report_file, pod_report_location),
+        (class_invoice_file, class_invoice_location),
+    ]:
+        utils.upload_to_s3(file, S3_INVOICE_BUCKET, location)
+
+
 def main():
     """Reads the metrics from files and generates the reports"""
     parser = argparse.ArgumentParser()
     parser.add_argument("files", nargs="+")
     parser.add_argument(
         "--invoice-file",
-        help="Name of the invoice file. Defaults to NERC OpenShift <report_month>.csv",
+        help="Name of the invoice file. Defaults to <cluster_name> <report_month>.csv",
     )
     parser.add_argument(
         "--pod-report-file",
-        help="Name of the pod report file. Defaults to Pod NERC OpenShift <report_month>.csv",
+        help="Name of the pod report file. Defaults to Pod-<cluster_name> <report_month>.csv",
     )
     parser.add_argument(
         "--class-invoice-file",
-        help="Name of the class report file. Defaults to NERC OpenShift Class <report_month>.csv",
+        help="Name of the class report file. Defaults to Class-<cluster_name> <report_month>.csv",
     )
     parser.add_argument("--upload-to-s3", action="store_true")
     parser.add_argument(
@@ -199,85 +266,41 @@ def main():
     parser.add_argument("--rate-gpu-h100-su", type=Decimal)
 
     args = parser.parse_args()
-    files = args.files
 
+    # load metadata and set frequencly used variables.
+    files = args.files
     metrics_metadata = load_metrics_metadata(files)
 
-    report_start_date = metrics_metadata.report_start_date
-    report_end_date = metrics_metadata.report_end_date
     cluster_name = metrics_metadata.cluster_name
     report_month = metrics_metadata.report_month
 
-    processor = load_and_merge_metrics(metrics_metadata.interval_minutes, files)
-
-    if args.use_nerc_rates:
-        logger.info("Using nerc rates for rates and outages")
-        rates_data = rates.load_from_url()
-        invoice_rates = invoice.Rates(
-            cpu=rates_data.get_value_at("CPU SU Rate", report_month, Decimal),
-            gpu_a100=rates_data.get_value_at("GPUA100 SU Rate", report_month, Decimal),
-            gpu_a100sxm4=rates_data.get_value_at(
-                "GPUA100SXM4 SU Rate", report_month, Decimal
-            ),
-            gpu_v100=rates_data.get_value_at("GPUV100 SU Rate", report_month, Decimal),
-            gpu_h100=rates_data.get_value_at("GPUH100 SU Rate", report_month, Decimal),
-        )
-        outage_data = outages.load_from_url()
-        ignore_hours = outage_data.get_outages_during(
-            report_start_date, report_end_date, cluster_name
-        )
-    else:
-        invoice_rates = invoice.Rates(
-            cpu=Decimal(args.rate_cpu_su),
-            gpu_a100=Decimal(args.rate_gpu_a100_su),
-            gpu_a100sxm4=Decimal(args.rate_gpu_a100sxm4_su),
-            gpu_v100=Decimal(args.rate_gpu_v100_su),
-            gpu_h100=Decimal(args.rate_gpu_h100_su),
-        )
-        ignore_hours = args.ignore_hours
-
-    if bool(ignore_hours):  # could be None or []
-        for start_time, end_time in ignore_hours:
-            logger.info(f"{start_time} to {end_time} will be excluded from the invoice")
-
-    if args.invoice_file:
-        invoice_file = args.invoice_file
-    else:
-        invoice_file = f"{cluster_name} {report_month}.csv"
-
-    if args.class_invoice_file:
-        class_invoice_file = args.class_invoice_file
-    else:
-        class_invoice_file = f"Classes-{cluster_name} {report_month}.csv"
-
-    if args.pod_report_file:
-        pod_report_file = args.pod_report_file
-    else:
-        pod_report_file = f"Pod-{cluster_name} {report_month}.csv"
-
-    report_start_date = datetime.strptime(report_start_date, "%Y-%m-%d").replace(
-        tzinfo=UTC
-    )
-    report_end_date = datetime.strptime(report_end_date, "%Y-%m-%d").replace(tzinfo=UTC)
+    invoice_file = args.invoice_file or f"{cluster_name} {report_month}.csv"
+    class_invoice_file = args.class_invoice_file or f"Classes-{cluster_name} {report_month}.csv"
+    pod_report_file = args.pod_report_file or f"Pod-{cluster_name} {report_month}.csv"
 
     logger.info(
-        f"Generating report from {report_start_date} to {report_end_date + timedelta(days=1)} for {cluster_name}"
+        f"Generating report from {metrics_metadata.start_time_utc} to {metrics_metadata.end_time_utc + timedelta(days=1)} for {cluster_name}"
     )
 
+    # load and merge the metrics from the files, followed by condensing the metrics.
+    processor = load_and_merge_metrics(metrics_metadata.interval_minutes, files)
     condensed_metrics_dict = processor.condense_metrics(
         ["cpu_request", "memory_request", "gpu_request", "gpu_type"]
     )
 
+    # gather invoice rates and su defitions.
+    invoice_rates, ignore_hours = get_rates_and_outages(args, metrics_metadata)
     su_definitions = get_su_definitions(report_month)
+
+    # generate the reports
     current_time = datetime.now(UTC)
     report_metadata = invoice.ReportMetadata(
         report_month=report_month,
         cluster_name=cluster_name,
-        report_start_time=report_start_date,
-        report_end_time=report_end_date + timedelta(days=1),
+        report_start_time=metrics_metadata.start_time_utc,
+        report_end_time=metrics_metadata.end_time_utc + timedelta(days=1),
         generated_at=current_time,
     )
-
     utils.write_metrics_by_namespace(
         condensed_metrics_dict=condensed_metrics_dict,
         file_name=invoice_file,
@@ -303,34 +326,10 @@ def main():
     )
 
     if args.upload_to_s3:
-        primary_location = (
-            f"Invoices/{report_month}/"
-            f"Service Invoices/{cluster_name} {report_month}.csv"
-        )
-        utils.upload_to_s3(invoice_file, S3_INVOICE_BUCKET, primary_location)
-        report_date = report_end_date.strftime("%Y-%m-%d")
-        daily_report_location = (
-            f"Invoices/{report_month}/Service Invoices/{cluster_name} {report_date}.csv"
-        )
-        utils.upload_to_s3(invoice_file, S3_INVOICE_BUCKET, daily_report_location)
-
-        timestamp = current_time.strftime("%Y%m%dT%H%M%SZ")
-        secondary_location = (
-            f"Invoices/{report_month}/"
-            f"Archive/{cluster_name} {report_month} {timestamp}.csv"
-        )
-        utils.upload_to_s3(invoice_file, S3_INVOICE_BUCKET, secondary_location)
-        pod_report_location = (
-            f"Invoices/{report_month}/"
-            f"Archive/Pod-{cluster_name} {report_month} {timestamp}.csv"
-        )
-        utils.upload_to_s3(pod_report_file, S3_INVOICE_BUCKET, pod_report_location)
-        class_invoice_location = (
-            f"Invoices/{report_month}/"
-            f"Archive/Class-{cluster_name} {report_month} {timestamp}.csv"
-        )
-        utils.upload_to_s3(
-            class_invoice_file, S3_INVOICE_BUCKET, class_invoice_location
+        upload_reports_to_s3(report_metadata=report_metadata,
+                             invoice_file=invoice_file,
+                             pod_report_file=pod_report_file,
+                             class_invoice_file=class_invoice_file
         )
 
 

From 38e6c9f6991caa31cbfad2e0af2572ed24c43abb Mon Sep 17 00:00:00 2001
From: Naved Ansari <naved001@gmail.com>
Date: Tue, 3 Mar 2026 12:26:36 -0500
Subject: [PATCH 4/5] Add tests for merge.py

And switch to pytests as the test runner.
---
 .github/workflows/unit-tests.yaml     |   2 +-
 openshift_metrics/tests/conftest.py   | 143 ++++++++++++++++++++++++++
 openshift_metrics/tests/test_merge.py | 113 ++++++++++++++++++++
 requirements.txt                      |   2 +
 4 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 openshift_metrics/tests/conftest.py
 create mode 100644 openshift_metrics/tests/test_merge.py

diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
index 1d1632a..4241c0d 100644
--- a/.github/workflows/unit-tests.yaml
+++ b/.github/workflows/unit-tests.yaml
@@ -22,4 +22,4 @@ jobs:
 
       - name: Run unit tests
         run: |
-          python -m unittest openshift_metrics/tests/test_*
+          pytest openshift_metrics/tests/
diff --git a/openshift_metrics/tests/conftest.py b/openshift_metrics/tests/conftest.py
new file mode 100644
index 0000000..9c64f21
--- /dev/null
+++ b/openshift_metrics/tests/conftest.py
@@ -0,0 +1,143 @@
+import pytest
+
+
+@pytest.fixture
+def mock_metrics_file1():
+    cpu_metrics = [
+        {
+            "metric": {
+                "pod": "pod1",
+                "namespace": "namespace1",
+                "resource": "cpu",
+            },
+            "values": [
+                [0, 10],
+                [60, 15],
+                [120, 20],
+            ],
+        },
+        {
+            "metric": {
+                "pod": "pod2",
+                "namespace": "namespace1",
+                "resource": "cpu",
+            },
+            "values": [
+                [0, 30],
+                [60, 35],
+                [120, 40],
+            ],
+        },
+    ]
+    memory_metrics = [
+        {
+            "metric": {
+                "pod": "pod1",
+                "namespace": "namespace1",
+                "resource": "memory",
+            },
+            "values": [
+                [0, 10],
+                [60, 15],
+                [120, 20],
+            ],
+        },
+        {
+            "metric": {
+                "pod": "pod2",
+                "namespace": "namespace1",
+                "resource": "cpu",
+            },
+            "values": [
+                [0, 30],
+                [60, 35],
+                [120, 40],
+            ],
+        },
+    ]
+    return {
+        "cluster_name": "ocp-prod",
+        "start_date": "2025-09-20",
+        "end_date": "2025-09-20",
+        "interval_minutes": 15,
+        "cpu_metrics": cpu_metrics,
+        "memory_metrics": memory_metrics,
+    }
+
+
+@pytest.fixture
+def mock_metrics_file2():
+    cpu_metrics = [
+        {
+            "metric": {
+                "pod": "pod1",
+                "namespace": "namespace1",
+                "resource": "cpu",
+            },
+            "values": [
+                [180, 10],
+                [240, 15],
+                [300, 20],
+            ],
+        },
+        {
+            "metric": {
+                "pod": "pod2",
+                "namespace": "namespace1",
+                "resource": "cpu",
+            },
+            "values": [
+                [180, 30],
+                [240, 35],
+                [300, 40],
+            ],
+        },
+    ]
+    memory_metrics = [
+        {
+            "metric": {
+                "pod": "pod1",
+                "namespace": "namespace1",
+                "resource": "memory",
+            },
+            "values": [
+                [180, 10],
+                [240, 15],
+                [300, 20],
+            ],
+        },
+        {
+            "metric": {
+                "pod": "pod2",
+                "namespace": "namespace1",
+                "resource": "cpu",
+            },
+            "values": [
+                [180, 30],
+                [240, 35],
+                [300, 40],
+            ],
+        },
+    ]
+    return {
+        "cluster_name": "ocp-prod",
+        "start_date": "2025-09-21",
+        "end_date": "2025-09-21",
+        "cpu_metrics": cpu_metrics,
+        "memory_metrics": memory_metrics,
+        "interval_minutes": 15,
+    }
+
+
+@pytest.fixture
+def mock_metrics_file3():
+    cpu_metrics = []
+    memory_metrics = []
+    return {
+        "cluster_name": "ocp-prod",
+        "start_date": "2025-09-21",
+        "end_date": "2025-09-21",
+        "interval_minutes": 3,  # file1 and file2 have 15 minutes
+        "cpu_metrics": cpu_metrics,
+        "memory_metrics": memory_metrics,
+    }
diff --git a/openshift_metrics/tests/test_merge.py b/openshift_metrics/tests/test_merge.py
new file mode 100644
index 0000000..b12d6da
--- /dev/null
+++ b/openshift_metrics/tests/test_merge.py
@@ -0,0 +1,113 @@
+import pytest
+import json
+from decimal import Decimal
+
+from openshift_metrics.merge import (
+    compare_dates,
+    get_su_definitions,
+    load_and_merge_metrics,
+    load_metrics_metadata,
+)
+
+
+@pytest.mark.parametrize(
+    "date1, date2, expected_result",
+    [
+        ("2025-01-18", "2025-01-20", True),
+        ("2025-01-18", "2025-01-16", False),
+        ("2025-01-18", "2025-01-18", False),
+    ],
+)
+def test_compare_dates(date1, date2, expected_result):
+    assert compare_dates(date1, date2) is expected_result
+
+
+def test_get_su_definitions(mocker):
+    mock_rates = {
+        "vCPUs in GPUV100 SU": Decimal("20"),
+        "RAM in GPUV100 SU": Decimal("8192"),
+        "GPUs in GPUV100 SU": Decimal("1"),
+        "vCPUs in CPU SU": Decimal("5"),
+        "RAM in CPU SU": Decimal("1024"),
+        "GPUs in CPU SU": Decimal("0"),
+    }
+    mock_rates_data = mocker.MagicMock()
+
+    def mock_get_value_at(key, month, value_type):
+        return mock_rates.get(key, Decimal("67"))
+
+    mock_rates_data.get_value_at.side_effect = mock_get_value_at
+    mocker.patch(
+        "openshift_metrics.merge.rates.load_from_url", return_value=mock_rates_data
+    )
+    report_month = "2025-10"
+    su_definitions = get_su_definitions(report_month)
+
+    assert "OpenShift GPUV100" in su_definitions
+    assert su_definitions["OpenShift GPUV100"]["vCPUs"] == Decimal("20")
+    assert su_definitions["OpenShift GPUV100"]["RAM"] == Decimal("8192")
+    assert su_definitions["OpenShift GPUV100"]["GPUs"] == Decimal("1")
+
+    assert "OpenShift CPU" in su_definitions
+    assert su_definitions["OpenShift CPU"]["vCPUs"] == Decimal("5")
+    assert su_definitions["OpenShift CPU"]["RAM"] == Decimal("1024")
+    assert su_definitions["OpenShift CPU"]["GPUs"] == Decimal("0")
+
+    # This should get the default test value
+    assert su_definitions["OpenShift GPUH100"]["GPUs"] == Decimal("67")
+
+
+def test_load_and_merge_data(tmp_path, mock_metrics_file1, mock_metrics_file2):
+    """
+    Test that we can load metrics from the 2 files and merge the metrics from those.
+
+    Note that we already have tests that test the merging of the data, this mostly
+    focuses on the loading part.
+    """
+    p1 = tmp_path / "file1.json"
+    p2 = tmp_path / "file2.json"
+
+    p1.write_text(json.dumps(mock_metrics_file1))
+    p2.write_text(json.dumps(mock_metrics_file2))
+
+    processor = load_and_merge_metrics(2, [p1, p2])
+
+    pod1_metrics = processor.merged_data["namespace1"]["pod1"]["metrics"]
+
+    # check values from file1.json are in the merged_data
+    assert 60 in pod1_metrics  # 60 is the epoch time stamp
+    assert pod1_metrics[60]["cpu_request"] == 15
+    assert pod1_metrics[60]["memory_request"] == 15
+
+    # check values from file2.json are in the merged_data
+    assert 180 in pod1_metrics
+    assert pod1_metrics[180]["cpu_request"] == 10
+    assert pod1_metrics[180]["memory_request"] == 10
+
+
+def test_load_metrics_metadata(tmp_path, mock_metrics_file1, mock_metrics_file2):
+    """Test we can load metadata from the metrics files."""
+
+    p1 = tmp_path / "file1.json"
+    p2 = tmp_path / "file2.json"
+    p1.write_text(json.dumps(mock_metrics_file1))
+    p2.write_text(json.dumps(mock_metrics_file2))
+    metadata = load_metrics_metadata([p1, p2])
+    assert metadata.cluster_name == "ocp-prod"
+    assert metadata.report_start_date == "2025-09-20"
+    assert metadata.report_end_date == "2025-09-21"
+    assert metadata.interval_minutes == 15
+
+
+def test_load_metrics_metadata_failure(
+    tmp_path, mock_metrics_file2, mock_metrics_file3
+):
+    """Test that loading metadata fails when files have different interval_minutes."""
+
+    p2 = tmp_path / "file2.json"
+    p3 = tmp_path / "file3.json"
+    p2.write_text(json.dumps(mock_metrics_file2))
+    p3.write_text(json.dumps(mock_metrics_file3))
+
+    with pytest.raises(SystemExit):
+        load_metrics_metadata([p2, p3])
diff --git a/requirements.txt b/requirements.txt
index 8dc9c90..4c632f6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 requests>=2.18.4
 boto3>=1.42.6,<2.0
 nerc-rates>=1.0.1
+pytest
+pytest-mock

From c00aec53591cc31d41a27ac791eefe11ba45ead9 Mon Sep 17 00:00:00 2001
From: Naved Ansari <naved001@gmail.com>
Date: Tue, 3 Mar 2026 12:40:00 -0500
Subject: [PATCH 5/5] Formatting fixes

---
 openshift_metrics/merge.py | 46 ++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/openshift_metrics/merge.py b/openshift_metrics/merge.py
index 6eebfd0..ecd052d 100644
--- a/openshift_metrics/merge.py
+++ b/openshift_metrics/merge.py
@@ -41,6 +41,7 @@ def start_time_utc(self) -> datetime:
     def end_time_utc(self) -> datetime:
         return datetime.strptime(self.report_end_date, "%Y-%m-%d").replace(tzinfo=UTC)
 
+
 def load_metrics_metadata(files: List[str]) -> MetricsMetadata:
     """
     Load only the metadata from the metrics files.
@@ -124,6 +125,7 @@ def load_and_merge_metrics(interval_minutes, files: List[str]) -> MetricsProcess
     logger.info(f"Total metric files read: {len(files)}")
     return processor
 
+
 def compare_dates(date_str1, date_str2):
     """Returns true is date1 is earlier than date2"""
     date1 = datetime.strptime(date_str1, "%Y-%m-%d")
@@ -172,18 +174,26 @@ def get_su_definitions(report_month) -> dict:
     return su_definitions
 
 
-def get_rates_and_outages(args: argparse.Namespace, meta: MetricsMetadata) -> Tuple[invoice.Rates, list]:
+def get_rates_and_outages(
+    args: argparse.Namespace, meta: MetricsMetadata
+) -> Tuple[invoice.Rates, list]:
     if args.use_nerc_rates:
         logger.info("Using nerc rates for rates and outages")
         rates_data = rates.load_from_url()
         invoice_rates = invoice.Rates(
             cpu=rates_data.get_value_at("CPU SU Rate", meta.report_month, Decimal),
-            gpu_a100=rates_data.get_value_at("GPUA100 SU Rate", meta.report_month, Decimal),
+            gpu_a100=rates_data.get_value_at(
+                "GPUA100 SU Rate", meta.report_month, Decimal
+            ),
             gpu_a100sxm4=rates_data.get_value_at(
                 "GPUA100SXM4 SU Rate", meta.report_month, Decimal
             ),
-            gpu_v100=rates_data.get_value_at("GPUV100 SU Rate", meta.report_month, Decimal),
-            gpu_h100=rates_data.get_value_at("GPUH100 SU Rate", meta.report_month, Decimal),
+            gpu_v100=rates_data.get_value_at(
+                "GPUV100 SU Rate", meta.report_month, Decimal
+            ),
+            gpu_h100=rates_data.get_value_at(
+                "GPUH100 SU Rate", meta.report_month, Decimal
+            ),
         )
         outage_data = outages.load_from_url()
         ignore_hours = outage_data.get_outages_during(
@@ -206,7 +216,12 @@ def get_rates_and_outages(args: argparse.Namespace, meta: MetricsMetadata) -> Tu
     return invoice_rates, ignore_hours
 
 
-def upload_reports_to_s3(report_metadata: invoice.ReportMetadata, invoice_file: str, pod_report_file: str, class_invoice_file: str):
+def upload_reports_to_s3(
+    report_metadata: invoice.ReportMetadata,
+    invoice_file: str,
+    pod_report_file: str,
+    class_invoice_file: str,
+):
     report_month = report_metadata.report_month
     cluster_name = report_metadata.cluster_name
     report_date = report_metadata.report_end_time.strftime("%Y-%m-%d")
@@ -218,8 +233,12 @@ def upload_reports_to_s3(report_metadata: invoice.ReportMetadata, invoice_file:
     primary_location = f"{main_root}/{cluster_name} {report_month}.csv"
     daily_report_location = f"{main_root}/{cluster_name} {report_date}.csv"
     secondary_location = f"{archive_root}/{cluster_name} {report_month} {timestamp}.csv"
-    pod_report_location = f"{archive_root}/Pod-{cluster_name} {report_month} {timestamp}.csv"
-    class_invoice_location = f"{archive_root}/Class-{cluster_name} {report_month} {timestamp}.csv"
+    pod_report_location = (
+        f"{archive_root}/Pod-{cluster_name} {report_month} {timestamp}.csv"
+    )
+    class_invoice_location = (
+        f"{archive_root}/Class-{cluster_name} {report_month} {timestamp}.csv"
+    )
 
     for file, location in [
         (invoice_file, primary_location),
@@ -275,7 +294,9 @@ def main():
     report_month = metrics_metadata.report_month
 
     invoice_file = args.invoice_file or f"{cluster_name} {report_month}.csv"
-    class_invoice_file = args.class_invoice_file or f"Classes-{cluster_name} {report_month}.csv"
+    class_invoice_file = (
+        args.class_invoice_file or f"Classes-{cluster_name} {report_month}.csv"
+    )
     pod_report_file = args.pod_report_file or f"Pod-{cluster_name} {report_month}.csv"
 
     logger.info(
@@ -326,10 +347,11 @@ def main():
     )
 
     if args.upload_to_s3:
-        upload_reports_to_s3(report_metadata=report_metadata,
-                             invoice_file=invoice_file,
-                             pod_report_file=pod_report_file,
-                             class_invoice_file=class_invoice_file
+        upload_reports_to_s3(
+            report_metadata=report_metadata,
+            invoice_file=invoice_file,
+            pod_report_file=pod_report_file,
+            class_invoice_file=class_invoice_file,
         )