ProjectTech4DevAI · rkritika1508 · Mar 20, 2026 · Feb 24, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/.env.example b/.env.example
@@ -27,3 +27,6 @@ GUARDRAILS_HUB_API_KEY="<ADD-KEY>"
 AUTH_TOKEN="<ADD-HASH-TOKEN>"
 KAAPI_AUTH_URL="<ADD-KAAPI-AUTH-URL>"
 KAAPI_AUTH_TIMEOUT=5
+
+# URL for the guardrails API — required for the multiple_validators evaluation script
+GUARDRAILS_API_URL="http://localhost:8001/api/v1/guardrails/"
-GUARDRAILS_API_URL="http://localhost:8001/api/v1/guardrails/"
+GUARDRAILS_API_URL=http://localhost:8001/api/v1/guardrails/
-GUARDRAILS_API_URL="http://localhost:8001/api/v1/guardrails/"
+GUARDRAILS_API_URL=http://localhost:8001/api/v1/guardrails/
diff --git a/backend/README.md b/backend/README.md
@@ -143,7 +143,27 @@ This script runs the evaluators in sequence:
 - `app/evaluation/gender_assumption_bias/run.py`
 - `app/evaluation/ban_list/run.py`
 
-To evaluate any specific evaluator, run the offline evaluation script: `python <validator's eval script path>` 
+To evaluate any specific evaluator, run the offline evaluation script: `python <validator's eval script path>`
+
+## Multiple validators evaluation
+
+To run an end-to-end evaluation combining multiple validators against a dataset via the live API:
+
+1. Download the multi-validator dataset from [Google Drive](https://drive.google.com/drive/u/0/folders/1Rd1LH-oEwCkU0pBDRrYYedExorwmXA89) and place it in `backend/app/evaluation/datasets/` as `multi_validator_whatsapp_dataset.csv`.
+
+2. Edit `backend/app/evaluation/multiple_validators/config.json` to configure which validators to run, their parameters, and the dataset/output paths.
+
+   For the full list of supported validators and their config parameters (e.g. `severity`, `entity_types`, `banned_words`, `on_fail`), refer to:
+   `backend/app/core/validators/README.md`
+
+3. Ensure `GUARDRAILS_API_URL` is set in your `.env` file (see `.env.example`). Optionally set `GUARDRAILS_TIMEOUT_SECONDS` to override the default request timeout of 60s.
+
+4. Run the script from the `backend` directory:
+```bash
+python -m app.evaluation.multiple_validators.run --auth_token <your-token>
+```
+
+Output is written to `backend/app/evaluation/outputs/multiple_validators/predictions.csv`.
 
 ## Validator configuration guide
 

diff --git a/backend/app/evaluation/multiple_validators/config.json b/backend/app/evaluation/multiple_validators/config.json
@@ -0,0 +1,23 @@
+{
+  "_comment": "Edit this file to configure the evaluation run. All paths are relative to the 'evaluation' directory (i.e. backend/app/evaluation). Add or remove entries in 'validators' to control which validators run and with what settings.",
+  "dataset_path": "datasets/multi_validator_whatsapp_dataset.csv",
+  "out_path": "outputs/multi_validator_whatsapp/predictions.csv",
+  "organization_id": 1,
+  "project_id": 1,
+  "validators": [
+    {
+      "type": "uli_slur_match",
+      "severity": "all",
+      "on_fail": "fix"
+    },
+    {
+      "type": "pii_remover",
+      "on_fail": "fix"
+    },
+    {
+      "type": "ban_list",
+      "banned_words": ["sonography"],
+      "on_fail": "fix"
+    }
+  ]
+}
diff --git a/backend/app/evaluation/multiple_validators/run.py b/backend/app/evaluation/multiple_validators/run.py
@@ -0,0 +1,115 @@
+import json
+from pathlib import Path
+import argparse
+import os
+from uuid import uuid4
+
+import httpx
+import pandas as pd
+
+from app.evaluation.common.helper import write_csv
+from app.load_env import load_environment
+
+load_environment()
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+API_URL = os.getenv("GUARDRAILS_API_URL")
+if not API_URL:
+    raise ValueError("GUARDRAILS_API_URL environment variable must be set.")
+TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60"))
+
+
+def load_config(config_path: Path) -> dict:
+    with open(config_path) as f:
+        return json.load(f)
+
+
+def call_guardrails(
+    text: str,
+    validators_payload: list[dict],
+    organization_id: int,
+    project_id: int,
+    auth_token: str,
+) -> str:
+    headers = {"Content-Type": "application/json"}
+    if auth_token:
+        headers["Authorization"] = f"Bearer {auth_token}"
+
+    payload = {
+        "request_id": str(uuid4()),
+        "organization_id": organization_id,
+        "project_id": project_id,
+        "input": text,
+        "validators": validators_payload,
+    }
+
+    try:
+        response = httpx.post(
+            API_URL,
+            headers=headers,
+            json=payload,
+            timeout=TIMEOUT_SECONDS,
+        )
+        response.raise_for_status()
+        body = response.json()
+        safe_text = body.get("data", {}).get("safe_text")
+        if safe_text is None:
+            return ""
+        return str(safe_text)
+    except httpx.HTTPError as exc:
+        return f"REQUEST_ERROR: {exc}"
+    except ValueError as exc:
+        return f"JSON_ERROR: {exc}"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--auth_token",
+        required=True,
+        help="Bearer token value (without the 'Bearer ' prefix).",
+    )
+    args = parser.parse_args()
+
+    config = load_config(Path(__file__).resolve().parent / "config.json")
+
+    dataset_path = BASE_DIR / config["dataset_path"]
+    out_path = BASE_DIR / config["out_path"]
+    organization_id = config["organization_id"]
+    project_id = config["project_id"]
+    validators_payload = config["validators"]
+
+    if not validators_payload:
+        raise ValueError("No validators defined in config.")
+
+    df = pd.read_csv(dataset_path)
+
+    rows = []
+    for _, row in df.iterrows():
+        source_text = str(row.get("Text", ""))
+        safe_text = call_guardrails(
+            source_text,
+            validators_payload,
+            organization_id,
+            project_id,
+            args.auth_token,
+        )
+
+        rows.append(
+            {
+                "ID": row.get("ID"),
+                "text": source_text,
+                "validators_present": row.get("Validators_present", ""),
+                "response": safe_text,
+            }
+        )
+
+    out_df = pd.DataFrame(
+        rows, columns=["ID", "text", "validators_present", "response"]
+    )
+    write_csv(out_df, out_path)
+
+
+if __name__ == "__main__":
+    main()