diff --git a/.env.example b/.env.example index 9156e8f..e8baa84 100644 --- a/.env.example +++ b/.env.example @@ -27,3 +27,6 @@ GUARDRAILS_HUB_API_KEY="" AUTH_TOKEN="" KAAPI_AUTH_URL="" KAAPI_AUTH_TIMEOUT=5 + +# URL for the guardrails API — required for the multiple_validators evaluation script +GUARDRAILS_API_URL="http://localhost:8001/api/v1/guardrails/" diff --git a/backend/README.md b/backend/README.md index 37c01c5..77aa89d 100644 --- a/backend/README.md +++ b/backend/README.md @@ -143,7 +143,27 @@ This script runs the evaluators in sequence: - `app/evaluation/gender_assumption_bias/run.py` - `app/evaluation/ban_list/run.py` -To evaluate any specific evaluator, run the offline evaluation script: `python ` +To evaluate any specific evaluator, run the offline evaluation script: `python ` + +## Multiple validators evaluation + +To run an end-to-end evaluation combining multiple validators against a dataset via the live API: + +1. Download the multi-validator dataset from [Google Drive](https://drive.google.com/drive/u/0/folders/1Rd1LH-oEwCkU0pBDRrYYedExorwmXA89) and place it in `backend/app/evaluation/datasets/` as `multi_validator_whatsapp_dataset.csv`. + +2. Edit `backend/app/evaluation/multiple_validators/config.json` to configure which validators to run, their parameters, and the dataset/output paths. + + For the full list of supported validators and their config parameters (e.g. `severity`, `entity_types`, `banned_words`, `on_fail`), refer to: + `backend/app/core/validators/README.md` + +3. Ensure `GUARDRAILS_API_URL` is set in your `.env` file (see `.env.example`). Optionally set `GUARDRAILS_TIMEOUT_SECONDS` to override the default request timeout of 60s. + +4. Run the script from the `backend` directory: +```bash +python -m app.evaluation.multiple_validators.run --auth_token +``` + +Output is written to `backend/app/evaluation/outputs/multiple_validators/predictions.csv`. ## Validator configuration guide diff --git a/backend/app/evaluation/multiple_validators/config.json b/backend/app/evaluation/multiple_validators/config.json new file mode 100644 index 0000000..8bc1fc8 --- /dev/null +++ b/backend/app/evaluation/multiple_validators/config.json @@ -0,0 +1,23 @@ +{ + "_comment": "Edit this file to configure the evaluation run. All paths are relative to the 'evaluation' directory (i.e. backend/app/evaluation). Add or remove entries in 'validators' to control which validators run and with what settings.", + "dataset_path": "datasets/multi_validator_whatsapp_dataset.csv", + "out_path": "outputs/multi_validator_whatsapp/predictions.csv", + "organization_id": 1, + "project_id": 1, + "validators": [ + { + "type": "uli_slur_match", + "severity": "all", + "on_fail": "fix" + }, + { + "type": "pii_remover", + "on_fail": "fix" + }, + { + "type": "ban_list", + "banned_words": ["sonography"], + "on_fail": "fix" + } + ] +} diff --git a/backend/app/evaluation/multiple_validators/run.py b/backend/app/evaluation/multiple_validators/run.py new file mode 100644 index 0000000..99661c9 --- /dev/null +++ b/backend/app/evaluation/multiple_validators/run.py @@ -0,0 +1,115 @@ +import json +from pathlib import Path +import argparse +import os +from uuid import uuid4 + +import httpx +import pandas as pd + +from app.evaluation.common.helper import write_csv +from app.load_env import load_environment + +load_environment() + +BASE_DIR = Path(__file__).resolve().parent.parent + +API_URL = os.getenv("GUARDRAILS_API_URL") +if not API_URL: + raise ValueError("GUARDRAILS_API_URL environment variable must be set.") +TIMEOUT_SECONDS = float(os.getenv("GUARDRAILS_TIMEOUT_SECONDS", "60")) + + +def load_config(config_path: Path) -> dict: + with open(config_path) as f: + return json.load(f) + + +def call_guardrails( + text: str, + validators_payload: list[dict], + organization_id: int, + project_id: int, + auth_token: str, +) -> str: + headers = {"Content-Type": "application/json"} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + + payload = { + "request_id": str(uuid4()), + "organization_id": organization_id, + "project_id": project_id, + "input": text, + "validators": validators_payload, + } + + try: + response = httpx.post( + API_URL, + headers=headers, + json=payload, + timeout=TIMEOUT_SECONDS, + ) + response.raise_for_status() + body = response.json() + safe_text = body.get("data", {}).get("safe_text") + if safe_text is None: + return "" + return str(safe_text) + except httpx.HTTPError as exc: + return f"REQUEST_ERROR: {exc}" + except ValueError as exc: + return f"JSON_ERROR: {exc}" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--auth_token", + required=True, + help="Bearer token value (without the 'Bearer ' prefix).", + ) + args = parser.parse_args() + + config = load_config(Path(__file__).resolve().parent / "config.json") + + dataset_path = BASE_DIR / config["dataset_path"] + out_path = BASE_DIR / config["out_path"] + organization_id = config["organization_id"] + project_id = config["project_id"] + validators_payload = config["validators"] + + if not validators_payload: + raise ValueError("No validators defined in config.") + + df = pd.read_csv(dataset_path) + + rows = [] + for _, row in df.iterrows(): + source_text = str(row.get("Text", "")) + safe_text = call_guardrails( + source_text, + validators_payload, + organization_id, + project_id, + args.auth_token, + ) + + rows.append( + { + "ID": row.get("ID"), + "text": source_text, + "validators_present": row.get("Validators_present", ""), + "response": safe_text, + } + ) + + out_df = pd.DataFrame( + rows, columns=["ID", "text", "validators_present", "response"] + ) + write_csv(out_df, out_path) + + +if __name__ == "__main__": + main()