From 0bd8545e5e3fddcc4fb1806c88755c1469328730 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Tue, 24 Feb 2026 15:18:47 +0530 Subject: [PATCH 1/4] Added evaluation of multiple validators together --- backend/app/api/routes/guardrails.py | 7 +- .../multi_validator_whatsapp/run.py | 120 ++++++++++++++++++ 2 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 backend/app/evaluation/multi_validator_whatsapp/run.py diff --git a/backend/app/api/routes/guardrails.py b/backend/app/api/routes/guardrails.py index def2e61..4eb3abe 100644 --- a/backend/app/api/routes/guardrails.py +++ b/backend/app/api/routes/guardrails.py @@ -45,7 +45,12 @@ def run_guardrails( except ValueError: return APIResponse.failure_response(error="Invalid request_id") - _resolve_ban_list_banned_words(payload, session) + if any( + isinstance(validator, BanListSafetyValidatorConfig) + and validator.banned_words is None + for validator in payload.validators + ): + _resolve_ban_list_banned_words(payload, session) return _validate_with_guard( payload, request_log_crud, diff --git a/backend/app/evaluation/multi_validator_whatsapp/run.py b/backend/app/evaluation/multi_validator_whatsapp/run.py new file mode 100644 index 0000000..efa318b --- /dev/null +++ b/backend/app/evaluation/multi_validator_whatsapp/run.py @@ -0,0 +1,120 @@ +from pathlib import Path +import argparse +import os +from uuid import uuid4 + +import httpx +import pandas as pd + +from app.evaluation.common.helper import write_csv + +BASE_DIR = Path(__file__).resolve().parent.parent +DATASET_PATH = BASE_DIR / "datasets" / "multi_validator_whatsapp_dataset.csv" +OUT_PATH = BASE_DIR / "outputs" / "multi_validator_whatsapp" / "predictions.csv" + +API_URL = os.getenv("GUARDRAILS_API_URL", "http://localhost:8001/api/v1/guardrails/") +TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60")) + +VALIDATOR_TEMPLATES = { + "uli_slur_match": { + "type": "uli_slur_match", + "severity": "all", + "on_fail": "fix", + }, + "pii_remover": { + "type": "pii_remover", + "on_fail": "fix", + }, + "ban_list": { + "type": "ban_list", + "banned_words": ["sonography"], + "on_fail": "fix", + }, +} + + +def call_guardrails(text: str, validators_payload: list[dict], auth_token: str) -> str: + headers = {"Content-Type": "application/json"} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + + payload = { + "request_id": str(uuid4()), + "organization_id": 1, + "project_id": 1, + "input": text, + "validators": validators_payload, + } + + try: + response = httpx.post( + API_URL, + headers=headers, + json=payload, + timeout=TIMEOUT_SECONDS, + ) + response.raise_for_status() + body = response.json() + safe_text = body.get("data", {}).get("safe_text") + if safe_text is None: + return "" + return str(safe_text) + except httpx.HTTPError as exc: + return f"REQUEST_ERROR: {exc}" + except ValueError as exc: + return f"JSON_ERROR: {exc}" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--validators_payload", + required=True, + help="Comma-separated validators, e.g. uli_slur_match or uli_slur_match,pii_remover", + ) + parser.add_argument( + "--auth_token", + required=True, + help="Bearer token value (without the 'Bearer ' prefix).", + ) + args = parser.parse_args() + + selected_validators = [ + value.strip() for value in args.validators_payload.split(",") if value.strip() + ] + unknown = [name for name in selected_validators if name not in VALIDATOR_TEMPLATES] + if not selected_validators or unknown: + raise ValueError( + "Invalid validators_payload. Supported values: " + f"{', '.join(VALIDATOR_TEMPLATES.keys())}" + ) + + validators_payload = [ + dict(VALIDATOR_TEMPLATES[name]) for name in selected_validators + ] + + df = pd.read_csv(DATASET_PATH) + + # Keep output names exactly as requested. + rows = [] + for _, row in df.iterrows(): + source_text = str(row.get("Text", "")) + safe_text = call_guardrails(source_text, validators_payload, args.auth_token) + + rows.append( + { + "ID": row.get("ID"), + "text": source_text, + "validators_present": row.get("Validators_present", ""), + "response": safe_text, + } + ) + + out_df = pd.DataFrame( + rows, columns=["ID", "text", "validators_present", "response"] + ) + write_csv(out_df, OUT_PATH) + + +if __name__ == "__main__": + main() From 900510be6c742a47c9cfc1300aa77d88bc9112d1 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Thu, 19 Mar 2026 12:10:12 +0530 Subject: [PATCH 2/4] resolved comments --- .../multiple_validators/config.json | 23 ++++++ .../run.py | 79 +++++++++---------- 2 files changed, 60 insertions(+), 42 deletions(-) create mode 100644 backend/app/evaluation/multiple_validators/config.json rename backend/app/evaluation/{multi_validator_whatsapp => multiple_validators}/run.py (55%) diff --git a/backend/app/evaluation/multiple_validators/config.json b/backend/app/evaluation/multiple_validators/config.json new file mode 100644 index 0000000..8bc1fc8 --- /dev/null +++ b/backend/app/evaluation/multiple_validators/config.json @@ -0,0 +1,23 @@ +{ + "_comment": "Edit this file to configure the evaluation run. All paths are relative to the 'evaluation' directory (i.e. backend/app/evaluation). Add or remove entries in 'validators' to control which validators run and with what settings.", + "dataset_path": "datasets/multi_validator_whatsapp_dataset.csv", + "out_path": "outputs/multi_validator_whatsapp/predictions.csv", + "organization_id": 1, + "project_id": 1, + "validators": [ + { + "type": "uli_slur_match", + "severity": "all", + "on_fail": "fix" + }, + { + "type": "pii_remover", + "on_fail": "fix" + }, + { + "type": "ban_list", + "banned_words": ["sonography"], + "on_fail": "fix" + } + ] +} diff --git a/backend/app/evaluation/multi_validator_whatsapp/run.py b/backend/app/evaluation/multiple_validators/run.py similarity index 55% rename from backend/app/evaluation/multi_validator_whatsapp/run.py rename to backend/app/evaluation/multiple_validators/run.py index efa318b..b9952a9 100644 --- a/backend/app/evaluation/multi_validator_whatsapp/run.py +++ b/backend/app/evaluation/multiple_validators/run.py @@ -1,3 +1,4 @@ +import json from pathlib import Path import argparse import os @@ -9,39 +10,31 @@ from app.evaluation.common.helper import write_csv BASE_DIR = Path(__file__).resolve().parent.parent -DATASET_PATH = BASE_DIR / "datasets" / "multi_validator_whatsapp_dataset.csv" -OUT_PATH = BASE_DIR / "outputs" / "multi_validator_whatsapp" / "predictions.csv" API_URL = os.getenv("GUARDRAILS_API_URL", "http://localhost:8001/api/v1/guardrails/") TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60")) -VALIDATOR_TEMPLATES = { - "uli_slur_match": { - "type": "uli_slur_match", - "severity": "all", - "on_fail": "fix", - }, - "pii_remover": { - "type": "pii_remover", - "on_fail": "fix", - }, - "ban_list": { - "type": "ban_list", - "banned_words": ["sonography"], - "on_fail": "fix", - }, -} - - -def call_guardrails(text: str, validators_payload: list[dict], auth_token: str) -> str: + +def load_config(config_path: Path) -> dict: + with open(config_path) as f: + return json.load(f) + + +def call_guardrails( + text: str, + validators_payload: list[dict], + organization_id: int, + project_id: int, + auth_token: str, +) -> str: headers = {"Content-Type": "application/json"} if auth_token: headers["Authorization"] = f"Bearer {auth_token}" payload = { "request_id": str(uuid4()), - "organization_id": 1, - "project_id": 1, + "organization_id": organization_id, + "project_id": project_id, "input": text, "validators": validators_payload, } @@ -68,9 +61,9 @@ def call_guardrails(text: str, validators_payload: list[dict], auth_token: str) def main(): parser = argparse.ArgumentParser() parser.add_argument( - "--validators_payload", - required=True, - help="Comma-separated validators, e.g. uli_slur_match or uli_slur_match,pii_remover", + "--config", + default=str(Path(__file__).resolve().parent / "config.json"), + help="Path to the JSON config file (default: config.json next to this script).", ) parser.add_argument( "--auth_token", @@ -79,27 +72,29 @@ def main(): ) args = parser.parse_args() - selected_validators = [ - value.strip() for value in args.validators_payload.split(",") if value.strip() - ] - unknown = [name for name in selected_validators if name not in VALIDATOR_TEMPLATES] - if not selected_validators or unknown: - raise ValueError( - "Invalid validators_payload. Supported values: " - f"{', '.join(VALIDATOR_TEMPLATES.keys())}" - ) + config = load_config(Path(args.config)) + + dataset_path = BASE_DIR / config["dataset_path"] + out_path = BASE_DIR / config["out_path"] + organization_id = config["organization_id"] + project_id = config["project_id"] + validators_payload = config["validators"] - validators_payload = [ - dict(VALIDATOR_TEMPLATES[name]) for name in selected_validators - ] + if not validators_payload: + raise ValueError("No validators defined in config.") - df = pd.read_csv(DATASET_PATH) + df = pd.read_csv(dataset_path) - # Keep output names exactly as requested. rows = [] for _, row in df.iterrows(): source_text = str(row.get("Text", "")) - safe_text = call_guardrails(source_text, validators_payload, args.auth_token) + safe_text = call_guardrails( + source_text, + validators_payload, + organization_id, + project_id, + args.auth_token, + ) rows.append( { @@ -113,7 +108,7 @@ def main(): out_df = pd.DataFrame( rows, columns=["ID", "text", "validators_present", "response"] ) - write_csv(out_df, OUT_PATH) + write_csv(out_df, out_path) if __name__ == "__main__": From 366df52eff7b248b2486e57e9c0b391db4339b45 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 20 Mar 2026 17:03:28 +0530 Subject: [PATCH 3/4] resolved comments --- backend/README.md | 20 ++++++++++++++++++- .../app/evaluation/multiple_validators/run.py | 7 +------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/backend/README.md b/backend/README.md index 37c01c5..5be53d2 100644 --- a/backend/README.md +++ b/backend/README.md @@ -143,7 +143,25 @@ This script runs the evaluators in sequence: - `app/evaluation/gender_assumption_bias/run.py` - `app/evaluation/ban_list/run.py` -To evaluate any specific evaluator, run the offline evaluation script: `python ` +To evaluate any specific evaluator, run the offline evaluation script: `python ` + +## Multiple validators evaluation + +To run an end-to-end evaluation combining multiple validators against a dataset via the live API: + +1. Download the multi-validator dataset from [Google Drive](https://drive.google.com/drive/u/0/folders/1Rd1LH-oEwCkU0pBDRrYYedExorwmXA89) and place it in `backend/app/evaluation/datasets/` as `multi_validator_whatsapp_dataset.csv`. + +2. Edit `backend/app/evaluation/multiple_validators/config.json` to configure which validators to run, their parameters, and the dataset/output paths. + + For the full list of supported validators and their config parameters (e.g. `severity`, `entity_types`, `banned_words`, `on_fail`), refer to: + `backend/app/core/validators/README.md` + +3. Run the script from the `backend` directory: +```bash +python -m app.evaluation.multiple_validators.run --auth_token +``` + +Output is written to `backend/app/evaluation/outputs/multiple_validators/predictions.csv`. ## Validator configuration guide diff --git a/backend/app/evaluation/multiple_validators/run.py b/backend/app/evaluation/multiple_validators/run.py index b9952a9..903b14e 100644 --- a/backend/app/evaluation/multiple_validators/run.py +++ b/backend/app/evaluation/multiple_validators/run.py @@ -60,11 +60,6 @@ def call_guardrails( def main(): parser = argparse.ArgumentParser() - parser.add_argument( - "--config", - default=str(Path(__file__).resolve().parent / "config.json"), - help="Path to the JSON config file (default: config.json next to this script).", - ) parser.add_argument( "--auth_token", required=True, @@ -72,7 +67,7 @@ def main(): ) args = parser.parse_args() - config = load_config(Path(args.config)) + config = load_config(Path(__file__).resolve().parent / "config.json") dataset_path = BASE_DIR / config["dataset_path"] out_path = BASE_DIR / config["out_path"] From 63045013d4e7df9ee6592a4a491d4a2967be1931 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 20 Mar 2026 17:20:07 +0530 Subject: [PATCH 4/4] resolved comment --- .env.example | 3 +++ backend/README.md | 4 +++- backend/app/evaluation/multiple_validators/run.py | 7 ++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 9156e8f..e8baa84 100644 --- a/.env.example +++ b/.env.example @@ -27,3 +27,6 @@ GUARDRAILS_HUB_API_KEY="" AUTH_TOKEN="" KAAPI_AUTH_URL="" KAAPI_AUTH_TIMEOUT=5 + +# URL for the guardrails API — required for the multiple_validators evaluation script +GUARDRAILS_API_URL="http://localhost:8001/api/v1/guardrails/" diff --git a/backend/README.md b/backend/README.md index 5be53d2..77aa89d 100644 --- a/backend/README.md +++ b/backend/README.md @@ -156,7 +156,9 @@ To run an end-to-end evaluation combining multiple validators against a dataset For the full list of supported validators and their config parameters (e.g. `severity`, `entity_types`, `banned_words`, `on_fail`), refer to: `backend/app/core/validators/README.md` -3. Run the script from the `backend` directory: +3. Ensure `GUARDRAILS_API_URL` is set in your `.env` file (see `.env.example`). Optionally set `GUARDRAILS_TIMEOUT_SECONDS` to override the default request timeout of 60s. + +4. Run the script from the `backend` directory: ```bash python -m app.evaluation.multiple_validators.run --auth_token ``` diff --git a/backend/app/evaluation/multiple_validators/run.py b/backend/app/evaluation/multiple_validators/run.py index 903b14e..99661c9 100644 --- a/backend/app/evaluation/multiple_validators/run.py +++ b/backend/app/evaluation/multiple_validators/run.py @@ -8,10 +8,15 @@ import pandas as pd from app.evaluation.common.helper import write_csv +from app.load_env import load_environment + +load_environment() BASE_DIR = Path(__file__).resolve().parent.parent -API_URL = os.getenv("GUARDRAILS_API_URL", "http://localhost:8001/api/v1/guardrails/") +API_URL = os.getenv("GUARDRAILS_API_URL") +if not API_URL: + raise ValueError("GUARDRAILS_API_URL environment variable must be set.") TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60"))