From 0bd8545e5e3fddcc4fb1806c88755c1469328730 Mon Sep 17 00:00:00 2001
From: rkritika1508 <rkritika1508@gmail.com>
Date: Tue, 24 Feb 2026 15:18:47 +0530
Subject: [PATCH 1/4] Added evaluation of multiple validators together

---
 backend/app/api/routes/guardrails.py          |   7 +-
 .../multi_validator_whatsapp/run.py           | 120 ++++++++++++++++++
 2 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100644 backend/app/evaluation/multi_validator_whatsapp/run.py

diff --git a/backend/app/api/routes/guardrails.py b/backend/app/api/routes/guardrails.py
index def2e61..4eb3abe 100644
--- a/backend/app/api/routes/guardrails.py
+++ b/backend/app/api/routes/guardrails.py
@@ -45,7 +45,12 @@ def run_guardrails(
     except ValueError:
         return APIResponse.failure_response(error="Invalid request_id")
 
-    _resolve_ban_list_banned_words(payload, session)
+    if any(
+        isinstance(validator, BanListSafetyValidatorConfig)
+        and validator.banned_words is None
+        for validator in payload.validators
+    ):
+        _resolve_ban_list_banned_words(payload, session)
     return _validate_with_guard(
         payload,
         request_log_crud,
diff --git a/backend/app/evaluation/multi_validator_whatsapp/run.py b/backend/app/evaluation/multi_validator_whatsapp/run.py
new file mode 100644
index 0000000..efa318b
--- /dev/null
+++ b/backend/app/evaluation/multi_validator_whatsapp/run.py
@@ -0,0 +1,120 @@
+from pathlib import Path
+import argparse
+import os
+from uuid import uuid4
+
+import httpx
+import pandas as pd
+
+from app.evaluation.common.helper import write_csv
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+DATASET_PATH = BASE_DIR / "datasets" / "multi_validator_whatsapp_dataset.csv"
+OUT_PATH = BASE_DIR / "outputs" / "multi_validator_whatsapp" / "predictions.csv"
+
+API_URL = os.getenv("GUARDRAILS_API_URL", "http://localhost:8001/api/v1/guardrails/")
+TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60"))
+
+VALIDATOR_TEMPLATES = {
+    "uli_slur_match": {
+        "type": "uli_slur_match",
+        "severity": "all",
+        "on_fail": "fix",
+    },
+    "pii_remover": {
+        "type": "pii_remover",
+        "on_fail": "fix",
+    },
+    "ban_list": {
+        "type": "ban_list",
+        "banned_words": ["sonography"],
+        "on_fail": "fix",
+    },
+}
+
+
+def call_guardrails(text: str, validators_payload: list[dict], auth_token: str) -> str:
+    headers = {"Content-Type": "application/json"}
+    if auth_token:
+        headers["Authorization"] = f"Bearer {auth_token}"
+
+    payload = {
+        "request_id": str(uuid4()),
+        "organization_id": 1,
+        "project_id": 1,
+        "input": text,
+        "validators": validators_payload,
+    }
+
+    try:
+        response = httpx.post(
+            API_URL,
+            headers=headers,
+            json=payload,
+            timeout=TIMEOUT_SECONDS,
+        )
+        response.raise_for_status()
+        body = response.json()
+        safe_text = body.get("data", {}).get("safe_text")
+        if safe_text is None:
+            return ""
+        return str(safe_text)
+    except httpx.HTTPError as exc:
+        return f"REQUEST_ERROR: {exc}"
+    except ValueError as exc:
+        return f"JSON_ERROR: {exc}"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--validators_payload",
+        required=True,
+        help="Comma-separated validators, e.g. uli_slur_match or uli_slur_match,pii_remover",
+    )
+    parser.add_argument(
+        "--auth_token",
+        required=True,
+        help="Bearer token value (without the 'Bearer ' prefix).",
+    )
+    args = parser.parse_args()
+
+    selected_validators = [
+        value.strip() for value in args.validators_payload.split(",") if value.strip()
+    ]
+    unknown = [name for name in selected_validators if name not in VALIDATOR_TEMPLATES]
+    if not selected_validators or unknown:
+        raise ValueError(
+            "Invalid validators_payload. Supported values: "
+            f"{', '.join(VALIDATOR_TEMPLATES.keys())}"
+        )
+
+    validators_payload = [
+        dict(VALIDATOR_TEMPLATES[name]) for name in selected_validators
+    ]
+
+    df = pd.read_csv(DATASET_PATH)
+
+    # Keep output names exactly as requested.
+    rows = []
+    for _, row in df.iterrows():
+        source_text = str(row.get("Text", ""))
+        safe_text = call_guardrails(source_text, validators_payload, args.auth_token)
+
+        rows.append(
+            {
+                "ID": row.get("ID"),
+                "text": source_text,
+                "validators_present": row.get("Validators_present", ""),
+                "response": safe_text,
+            }
+        )
+
+    out_df = pd.DataFrame(
+        rows, columns=["ID", "text", "validators_present", "response"]
+    )
+    write_csv(out_df, OUT_PATH)
+
+
+if __name__ == "__main__":
+    main()

From 900510be6c742a47c9cfc1300aa77d88bc9112d1 Mon Sep 17 00:00:00 2001
From: rkritika1508 <rkritika1508@gmail.com>
Date: Thu, 19 Mar 2026 12:10:12 +0530
Subject: [PATCH 2/4] resolved comments

---
 .../multiple_validators/config.json           | 23 ++++++
 .../run.py                                    | 79 +++++++++----------
 2 files changed, 60 insertions(+), 42 deletions(-)
 create mode 100644 backend/app/evaluation/multiple_validators/config.json
 rename backend/app/evaluation/{multi_validator_whatsapp => multiple_validators}/run.py (55%)

diff --git a/backend/app/evaluation/multiple_validators/config.json b/backend/app/evaluation/multiple_validators/config.json
new file mode 100644
index 0000000..8bc1fc8
--- /dev/null
+++ b/backend/app/evaluation/multiple_validators/config.json
@@ -0,0 +1,23 @@
+{
+  "_comment": "Edit this file to configure the evaluation run. All paths are relative to the 'evaluation' directory (i.e. backend/app/evaluation). Add or remove entries in 'validators' to control which validators run and with what settings.",
+  "dataset_path": "datasets/multi_validator_whatsapp_dataset.csv",
+  "out_path": "outputs/multi_validator_whatsapp/predictions.csv",
+  "organization_id": 1,
+  "project_id": 1,
+  "validators": [
+    {
+      "type": "uli_slur_match",
+      "severity": "all",
+      "on_fail": "fix"
+    },
+    {
+      "type": "pii_remover",
+      "on_fail": "fix"
+    },
+    {
+      "type": "ban_list",
+      "banned_words": ["sonography"],
+      "on_fail": "fix"
+    }
+  ]
+}
diff --git a/backend/app/evaluation/multi_validator_whatsapp/run.py b/backend/app/evaluation/multiple_validators/run.py
similarity index 55%
rename from backend/app/evaluation/multi_validator_whatsapp/run.py
rename to backend/app/evaluation/multiple_validators/run.py
index efa318b..b9952a9 100644
--- a/backend/app/evaluation/multi_validator_whatsapp/run.py
+++ b/backend/app/evaluation/multiple_validators/run.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 import argparse
 import os
@@ -9,39 +10,31 @@
 from app.evaluation.common.helper import write_csv
 
 BASE_DIR = Path(__file__).resolve().parent.parent
-DATASET_PATH = BASE_DIR / "datasets" / "multi_validator_whatsapp_dataset.csv"
-OUT_PATH = BASE_DIR / "outputs" / "multi_validator_whatsapp" / "predictions.csv"
 
 API_URL = os.getenv("GUARDRAILS_API_URL", "http://localhost:8001/api/v1/guardrails/")
 TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60"))
 
-VALIDATOR_TEMPLATES = {
-    "uli_slur_match": {
-        "type": "uli_slur_match",
-        "severity": "all",
-        "on_fail": "fix",
-    },
-    "pii_remover": {
-        "type": "pii_remover",
-        "on_fail": "fix",
-    },
-    "ban_list": {
-        "type": "ban_list",
-        "banned_words": ["sonography"],
-        "on_fail": "fix",
-    },
-}
-
-
-def call_guardrails(text: str, validators_payload: list[dict], auth_token: str) -> str:
+
+def load_config(config_path: Path) -> dict:
+    with open(config_path) as f:
+        return json.load(f)
+
+
+def call_guardrails(
+    text: str,
+    validators_payload: list[dict],
+    organization_id: int,
+    project_id: int,
+    auth_token: str,
+) -> str:
     headers = {"Content-Type": "application/json"}
     if auth_token:
         headers["Authorization"] = f"Bearer {auth_token}"
 
     payload = {
         "request_id": str(uuid4()),
-        "organization_id": 1,
-        "project_id": 1,
+        "organization_id": organization_id,
+        "project_id": project_id,
         "input": text,
         "validators": validators_payload,
     }
@@ -68,9 +61,9 @@ def call_guardrails(text: str, validators_payload: list[dict], auth_token: str)
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--validators_payload",
-        required=True,
-        help="Comma-separated validators, e.g. uli_slur_match or uli_slur_match,pii_remover",
+        "--config",
+        default=str(Path(__file__).resolve().parent / "config.json"),
+        help="Path to the JSON config file (default: config.json next to this script).",
     )
     parser.add_argument(
         "--auth_token",
@@ -79,27 +72,29 @@ def main():
     )
     args = parser.parse_args()
 
-    selected_validators = [
-        value.strip() for value in args.validators_payload.split(",") if value.strip()
-    ]
-    unknown = [name for name in selected_validators if name not in VALIDATOR_TEMPLATES]
-    if not selected_validators or unknown:
-        raise ValueError(
-            "Invalid validators_payload. Supported values: "
-            f"{', '.join(VALIDATOR_TEMPLATES.keys())}"
-        )
+    config = load_config(Path(args.config))
+
+    dataset_path = BASE_DIR / config["dataset_path"]
+    out_path = BASE_DIR / config["out_path"]
+    organization_id = config["organization_id"]
+    project_id = config["project_id"]
+    validators_payload = config["validators"]
 
-    validators_payload = [
-        dict(VALIDATOR_TEMPLATES[name]) for name in selected_validators
-    ]
+    if not validators_payload:
+        raise ValueError("No validators defined in config.")
 
-    df = pd.read_csv(DATASET_PATH)
+    df = pd.read_csv(dataset_path)
 
-    # Keep output names exactly as requested.
     rows = []
     for _, row in df.iterrows():
         source_text = str(row.get("Text", ""))
-        safe_text = call_guardrails(source_text, validators_payload, args.auth_token)
+        safe_text = call_guardrails(
+            source_text,
+            validators_payload,
+            organization_id,
+            project_id,
+            args.auth_token,
+        )
 
         rows.append(
             {
@@ -113,7 +108,7 @@ def main():
     out_df = pd.DataFrame(
         rows, columns=["ID", "text", "validators_present", "response"]
     )
-    write_csv(out_df, OUT_PATH)
+    write_csv(out_df, out_path)
 
 
 if __name__ == "__main__":

From 366df52eff7b248b2486e57e9c0b391db4339b45 Mon Sep 17 00:00:00 2001
From: rkritika1508 <rkritika1508@gmail.com>
Date: Fri, 20 Mar 2026 17:03:28 +0530
Subject: [PATCH 3/4] resolved comments

---
 backend/README.md                             | 20 ++++++++++++++++++-
 .../app/evaluation/multiple_validators/run.py |  7 +------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/backend/README.md b/backend/README.md
index 37c01c5..5be53d2 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -143,7 +143,25 @@ This script runs the evaluators in sequence:
 - `app/evaluation/gender_assumption_bias/run.py`
 - `app/evaluation/ban_list/run.py`
 
-To evaluate any specific evaluator, run the offline evaluation script: `python <validator's eval script path>` 
+To evaluate any specific evaluator, run the offline evaluation script: `python <validator's eval script path>`
+
+## Multiple validators evaluation
+
+To run an end-to-end evaluation combining multiple validators against a dataset via the live API:
+
+1. Download the multi-validator dataset from [Google Drive](https://drive.google.com/drive/u/0/folders/1Rd1LH-oEwCkU0pBDRrYYedExorwmXA89) and place it in `backend/app/evaluation/datasets/` as `multi_validator_whatsapp_dataset.csv`.
+
+2. Edit `backend/app/evaluation/multiple_validators/config.json` to configure which validators to run, their parameters, and the dataset/output paths.
+
+   For the full list of supported validators and their config parameters (e.g. `severity`, `entity_types`, `banned_words`, `on_fail`), refer to:
+   `backend/app/core/validators/README.md`
+
+3. Run the script from the `backend` directory:
+```bash
+python -m app.evaluation.multiple_validators.run --auth_token <your-token>
+```
+
+Output is written to `backend/app/evaluation/outputs/multiple_validators/predictions.csv`.
 
 ## Validator configuration guide
 
diff --git a/backend/app/evaluation/multiple_validators/run.py b/backend/app/evaluation/multiple_validators/run.py
index b9952a9..903b14e 100644
--- a/backend/app/evaluation/multiple_validators/run.py
+++ b/backend/app/evaluation/multiple_validators/run.py
@@ -60,11 +60,6 @@ def call_guardrails(
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--config",
-        default=str(Path(__file__).resolve().parent / "config.json"),
-        help="Path to the JSON config file (default: config.json next to this script).",
-    )
     parser.add_argument(
         "--auth_token",
         required=True,
@@ -72,7 +67,7 @@ def main():
     )
     args = parser.parse_args()
 
-    config = load_config(Path(args.config))
+    config = load_config(Path(__file__).resolve().parent / "config.json")
 
     dataset_path = BASE_DIR / config["dataset_path"]
     out_path = BASE_DIR / config["out_path"]

From 63045013d4e7df9ee6592a4a491d4a2967be1931 Mon Sep 17 00:00:00 2001
From: rkritika1508 <rkritika1508@gmail.com>
Date: Fri, 20 Mar 2026 17:20:07 +0530
Subject: [PATCH 4/4] resolved comment

---
 .env.example                                      | 3 +++
 backend/README.md                                 | 4 +++-
 backend/app/evaluation/multiple_validators/run.py | 7 ++++++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 9156e8f..e8baa84 100644
--- a/.env.example
+++ b/.env.example
@@ -27,3 +27,6 @@ GUARDRAILS_HUB_API_KEY="<ADD-KEY>"
 AUTH_TOKEN="<ADD-HASH-TOKEN>"
 KAAPI_AUTH_URL="<ADD-KAAPI-AUTH-URL>"
 KAAPI_AUTH_TIMEOUT=5
+
+# URL for the guardrails API — required for the multiple_validators evaluation script
+GUARDRAILS_API_URL="http://localhost:8001/api/v1/guardrails/"
diff --git a/backend/README.md b/backend/README.md
index 5be53d2..77aa89d 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -156,7 +156,9 @@ To run an end-to-end evaluation combining multiple validators against a dataset
    For the full list of supported validators and their config parameters (e.g. `severity`, `entity_types`, `banned_words`, `on_fail`), refer to:
    `backend/app/core/validators/README.md`
 
-3. Run the script from the `backend` directory:
+3. Ensure `GUARDRAILS_API_URL` is set in your `.env` file (see `.env.example`). Optionally set `GUARDRAILS_TIMEOUT_SECONDS` to override the default request timeout of 60s.
+
+4. Run the script from the `backend` directory:
 ```bash
 python -m app.evaluation.multiple_validators.run --auth_token <your-token>
 ```
diff --git a/backend/app/evaluation/multiple_validators/run.py b/backend/app/evaluation/multiple_validators/run.py
index 903b14e..99661c9 100644
--- a/backend/app/evaluation/multiple_validators/run.py
+++ b/backend/app/evaluation/multiple_validators/run.py
@@ -8,10 +8,15 @@
 import pandas as pd
 
 from app.evaluation.common.helper import write_csv
+from app.load_env import load_environment
+
+load_environment()
 
 BASE_DIR = Path(__file__).resolve().parent.parent
 
-API_URL = os.getenv("GUARDRAILS_API_URL", "http://localhost:8001/api/v1/guardrails/")
+API_URL = os.getenv("GUARDRAILS_API_URL")
+if not API_URL:
+    raise ValueError("GUARDRAILS_API_URL environment variable must be set.")
 TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60"))