merging latest changes from main

shahdyousefak · shahdyousefak · commit 391bc1117b2e · 2025-03-28T01:29:39.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -15,5 +15,6 @@ config/make_request.env
 config/pp_daily_test.env
 config/restoreunstable.env
 config/sendimagereq.env
+config/slack-webhook-preprocessors.env
 *.sh
 docker-compose.override.yml
diff --git a/handlers/map-tactile-svg/map-svg.py b/handlers/map-tactile-svg/map-svg.py
@@ -22,6 +22,7 @@
 import time
 import drawSvg as draw
 from datetime import datetime
+import math
 from config.logging_utils import configure_logging
 
 configure_logging()
@@ -115,22 +116,25 @@ def handle():
         return response
 
     dimensions = 700, 700
+    data = preprocessor["ca.mcgill.a11y.image.preprocessor.openstreetmap"]
+    lat = data["bounds"]["latitude"]
+    lon = data["bounds"]["longitude"]
+    coords = getMidpoint(contents)
 
     renderingDescription = ("Tactile rendering of map centered at latitude " +
-                            str(contents["coordinates"]["latitude"]) +
+                            str(coords["latitude"]) +
                             " and longitude " +
-                            str(contents["coordinates"]["longitude"]))
+                            str(coords["longitude"]))
     caption = ("Map centered at latitude " +
-               str(contents["coordinates"]["latitude"]) +
+               str(coords["latitude"]) +
                " and longitude " +
-               str(contents["coordinates"]["longitude"]))
+               str(coords["longitude"]))
     # List of minor street types ('footway', 'crossing' and 'steps')
     # to be filtered out to simplify the resulting rendering
     remove_streets = ["footway", "crossing", "steps", "elevator"]
     svg = draw.Drawing(dimensions[0], dimensions[1],
                        origin=(0, -dimensions[1]))
 
-    data = preprocessor["ca.mcgill.a11y.image.preprocessor.openstreetmap"]
     if "streets" in data:
         streets = data["streets"]
         lat = data["bounds"]["latitude"]
@@ -438,14 +442,12 @@ def getNodeCategoryData(POI):
     category = POI["cat"]
     match category:
         case "crossing":
-            if POI["crossing"] == "marked":
-                tag += "Marked crossing, "
-            elif POI["crossing"] == "unmarked":
-                tag += "Unmarked crossing, "
-            elif POI["crossing"] == "traffic_signals":
-                tag += "Crossing with traffic signal, "
-            else:
-                tag += "Crossing, "
+            crossing_types = {
+                "marked": "Marked crossing, ",
+                "unmarked": "Unmarked crossing, ",
+                "traffic_signals": "Crossing with traffic signal, "
+            }
+            tag += crossing_types.get(POI.get("crossing"), "Crossing, ")
         case "traffic_signals":
             tag += "Traffic lights present, "
         case _:
@@ -491,5 +493,42 @@ def health():
     }), 200
 
 
+def getMidpoint(contents):
+    if "coordinates" in contents:
+        logging.debug("Coordinates found in request")
+        return {"latitude": contents["coordinates"]["latitude"],
+                "longitude": contents["coordinates"]["longitude"]}
+
+    logging.debug("Coordinates not found in request. "
+                  "Calculating midpoint from bounds.")
+    data = contents["preprocessors"][
+        "ca.mcgill.a11y.image.preprocessor.openstreetmap"]
+    lat = data["bounds"]["latitude"]
+    lon = data["bounds"]["longitude"]
+
+    # Convert degrees to radians
+    lat1, lon1, lat2, lon2 = map(math.radians,
+                                 [lat["min"], lon["min"],
+                                  lat["max"], lon["max"]])
+
+    # Convert to Cartesian coordinates
+    x1, y1, z1 = (math.cos(lat1) * math.cos(lon1),
+                  math.cos(lat1) * math.sin(lon1), math.sin(lat1))
+    x2, y2, z2 = (math.cos(lat2) * math.cos(lon2),
+                  math.cos(lat2) * math.sin(lon2), math.sin(lat2))
+
+    # Compute the midpoint in Cartesian coordinates
+    x_m, y_m, z_m = (x1 + x2) / 2, (y1 + y2) / 2, (z1 + z2) / 2
+
+    # Convert back to latitude and longitude
+    lon_m = math.atan2(y_m, x_m)
+    hyp = math.sqrt(x_m**2 + y_m**2)
+    lat_m = math.atan2(z_m, hyp)
+
+    # Convert radians back to degrees
+    return {"latitude": round(math.degrees(lat_m), 6),
+            "longitude": round(math.degrees(lon_m), 6)}
+
+
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=80, debug=True)
diff --git a/handlers/photo-audio-handler/src/server.ts b/handlers/photo-audio-handler/src/server.ts
@@ -135,9 +135,6 @@ app.post("/handler", async (req, res) => {
         }
     }
 
-    // Generate rendering title
-    const renderingTitle = utils.renderingTitle(semseg, objDet, objGroup);
-
     // Handle language if targetLanguage is not English
     if (targetLanguage != "en") {
         console.debug(`Translating ttsData values to ${targetLanguage}"`);
@@ -163,7 +160,7 @@ app.post("/handler", async (req, res) => {
         const textString = ttsData.map(x => x["value"]).join(" ");
         const rendering = {
             "type_id": "ca.mcgill.a11y.image.renderer.Text",
-            "description": renderingTitle + " (text only)",
+            "description": "Text description",
             "data": { "text": textString }
         };
         if (ajv.validate("https://image.a11y.mcgill.ca/renderers/text.schema.json", rendering["data"])) {
@@ -237,7 +234,7 @@ app.post("/handler", async (req, res) => {
                     console.debug("Constructing segment audio rendering")
                     const rendering = {
                         "type_id": "ca.mcgill.a11y.image.renderer.SegmentAudio",
-                        "description": renderingTitle,
+                        "description": "Rich audio description",
                         "data": {
                             "audioFile": dataURL,
                             "audioInfo": segArray
@@ -256,7 +253,7 @@ app.post("/handler", async (req, res) => {
                     console.debug("Constructing simple audio rendering")
                     const rendering = {
                         "type_id": "ca.mcgill.a11y.image.renderer.SimpleAudio",
-                        "description": renderingTitle,
+                        "description": "Rich audio description",
                         "data": {
                             "audio": dataURL
                         },
@@ -291,22 +288,22 @@ app.post("/handler", async (req, res) => {
     }
 
     // Translate renderings' description before sending response
-	if (targetLanguage !== "en") {
-		try {
-			console.debug("Translating renderings description to " + targetLanguage);
-			const translatedDesc = await utils.getTranslationSegments(
+    if (targetLanguage !== "en") {
+        try {
+            console.debug("Translating renderings description to " + targetLanguage);
+            const translatedDesc = await utils.getTranslationSegments(
                             renderings.map(x => x["description"]),
                             targetLanguage
                         );
 
-			for (let i = 0; i < renderings.length; i++) {
-				renderings[i]["description"] = translatedDesc[i];
-			}
-		} catch(e) {
-            console.error("Failed to generate audio!");
+            for (let i = 0; i < renderings.length; i++) {
+                renderings[i]["description"] = translatedDesc[i];
+            }
+        } catch(e) {
+            console.error("Failed to translate rendering descriptions to " + targetLanguage);
             piiLogger.pii(`Error: ${(e as Error).message}`);
-		}
-	}
+        }
+    }
     // Send response
 
     const response = utils.generateEmptyResponse(req.body["request_uuid"]);
diff --git a/handlers/photo-audio-handler/src/utils.ts b/handlers/photo-audio-handler/src/utils.ts
@@ -143,7 +143,7 @@ export function generateActions(objs: Obj[], group: number[], actionRec: Action)
         const act = actionRec["actions"].find((x: { "personID": number }) => x["personID"] === id);
         if (act !== undefined) {
             const label = act["action"].trim();
-            if (act["confidence"] < ACT_THRES) { 
+            if (act["confidence"] < ACT_THRES) {
                 if (maybeActions[label]) {
                     maybeActions[label].push(id);
                 }
@@ -158,9 +158,9 @@ export function generateActions(objs: Obj[], group: number[], actionRec: Action)
         }
         else {
             other.push(id);
-        } 
+        }
     }
-    
+
     for (const label in actions) {
         const len = actions[label].length;
         const pType = len > 1 ? "people" : "person";
@@ -172,7 +172,7 @@ export function generateActions(objs: Obj[], group: number[], actionRec: Action)
             "label": pType + " " + actionTxt,
             "value": len.toString() + " " + pType + " " + actionTxt + ","
         };
-        objects.push(object);       
+        objects.push(object);
     }
     for (const label in maybeActions) {
         const len = maybeActions[label].length;
@@ -184,7 +184,7 @@ export function generateActions(objs: Obj[], group: number[], actionRec: Action)
             "objects": acts,
             "value": len.toString() + " " + pType + " who might be " + actionTxt + ","
         };
-        objects.push(object);       
+        objects.push(object);
     }
     if (other.length > 0) {
         const len = other.length;
@@ -307,7 +307,7 @@ export async function getTTS(text: string[], language: string): Promise<TTSRespo
         console.error(`photo-audio-handler doesn't support '${language}' language`);
         throw new Error("Unable to send segment to TTS");
     }
-    
+
     return fetch(serviceURL, {
       method: "POST",
       headers: {
@@ -375,18 +375,3 @@ export async function sendOSC(jsonFile: string, outFile: string, server: string,
         })
     ]);
 }
-
-export function renderingTitle(semseg: { "segments": Record<string, unknown>[] }, objDet: ObjDet, objGroup: ObjGroup): string {
-    console.debug("Rendering title")
-    const hasSemseg = (semseg !== undefined) && (semseg["segments"].length > 0);
-    const hasObj = (objDet !== undefined) && (objGroup !== undefined) && (objDet["objects"].length > 0);
-    if (hasSemseg && hasObj) {
-        return "Regions, things, and people";
-    }
-    else if (hasSemseg) {
-        return "Outlines of regions";
-    }
-    else {
-        return "Things and people";
-    }
-}
diff --git a/preprocessors/mmsemseg/Dockerfile b/preprocessors/mmsemseg/Dockerfile
@@ -53,5 +53,6 @@ ENV FLASK_APP=segment.py
 USER python
 
 HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 CMD curl -f http://localhost:5000/health || exit 1
+HEALTHCHECK --interval=3600s --timeout=30s --start-period=120s --retries=3 CMD curl -f http://localhost:5000/health/gpu || exit 1
 
 CMD [ "gunicorn", "segment:app", "-b", "0.0.0.0:5000", "--capture-output", "--log-level=debug" ]
diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py
@@ -36,6 +36,7 @@
 import logging
 from config.logging_utils import configure_logging
 from datetime import datetime
+import subprocess
 
 configure_logging()
 # configuration and checkpoint files
@@ -262,5 +263,62 @@ def health():
     }), 200
 
 
+@app.route("/health/gpu", methods=["GET"])
+def gpu_driver_health_check():
+    """
+    Enhanced health check:
+    - Verifies CUDA & NVIDIA drivers are working
+    - Detects if the loaded NVIDIA driver matches `nvidia-smi`
+    - Ensures the container is using the correct GPU runtime
+    """
+
+    # Check if CUDA is available
+    if not torch.cuda.is_available():
+        return jsonify({
+            "status": "unhealthy",
+            "message": "CUDA not available inside the container",
+            "recommendation": "Check if the container is running with GPU \
+                access (--gpus all)"
+        }), 500
+
+    try:
+        # Get installed NVIDIA driver version from nvidia-smi
+        nvidia_smi_version = subprocess.check_output(
+            ["nvidia-smi", "--query-gpu=driver_version",
+             "--format=csv,noheader"],
+            text=True
+        ).strip()
+
+        # Get loaded driver version from /proc/driver/nvidia/version
+        loaded_driver_version = subprocess.check_output(
+            ["cat", "/proc/driver/nvidia/version"], text=True
+        ).split("\n")[0]
+
+        # Ensure they match
+        if nvidia_smi_version not in loaded_driver_version:
+            return jsonify({
+                "status": "unhealthy",
+                "message": "NVIDIA driver mismatch detected",
+                "nvidia_smi_version": nvidia_smi_version,
+                "loaded_driver_version": loaded_driver_version,
+                "recommendation": "Reboot the system to ensure the correct \
+                    driver is loaded?"
+            }), 500
+
+        return jsonify({
+            "status": "healthy",
+            "message": "NVIDIA drivers and CUDA are working correctly",
+            "nvidia_smi_version": nvidia_smi_version,
+            "loaded_driver_version": loaded_driver_version
+        }), 200
+
+    except Exception as e:
+        return jsonify({
+            "status": "unhealthy",
+            "message": f"NVIDIA driver check failed: {str(e)}",
+            "recommendation": "Check driver installation and restart system"
+        }), 500
+
+
 if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=True)