Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ @article{palaskar2025vlsu
url = {https://arxiv.org/abs/2510.18214},
}

@article{wang2026visualleakbench,
title = {{VisualLeakBench}: Auditing the Fragility of Large Vision-Language Models against {PII} Leakage and Social Engineering},
author = {Youting Wang and Yuan Tang and Yitian Qian and Chen Zhao},
journal = {arXiv preprint arXiv:2603.13385},
year = {2026},
url = {https://arxiv.org/abs/2603.13385},
}

@article{scheuerman2025transphobia,
title = {Transphobia is in the Eye of the Prompter: Trans-Centered Perspectives on Large Language Models},
author = {Morgan Klaus Scheuerman and Katy Weathington and Adrian Petterson and Dylan Thomas Doyle and Dipto Das and Michael Ann DeVito and Jed R. Brubaker},
Expand Down
8 changes: 8 additions & 0 deletions pyrit/datasets/seed_datasets/remote/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@
from pyrit.datasets.seed_datasets.remote.transphobia_awareness_dataset import ( # noqa: F401
_TransphobiaAwarenessDataset,
)
from pyrit.datasets.seed_datasets.remote.visual_leak_bench_dataset import (
VisualLeakBenchCategory,
VisualLeakBenchPIIType,
_VisualLeakBenchDataset,
) # noqa: F401
from pyrit.datasets.seed_datasets.remote.vlsu_multimodal_dataset import (
_VLSUMultimodalDataset,
) # noqa: F401
Expand Down Expand Up @@ -144,5 +149,8 @@
"_ToxicChatDataset",
"_TransphobiaAwarenessDataset",
"_VLSUMultimodalDataset",
"_VisualLeakBenchDataset",
"VisualLeakBenchCategory",
"VisualLeakBenchPIIType",
"_XSTestDataset",
]
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,8 @@ def __init__(
self.source_type: Literal["public_url", "file"] = source_type
self.categories = categories

# Validate categories if provided
if categories is not None:
valid_categories = {category.value for category in SemanticCategory}
invalid_categories = {
cat.value if isinstance(cat, SemanticCategory) else cat for cat in categories
} - valid_categories
if invalid_categories:
raise ValueError(f"Invalid semantic categories: {', '.join(invalid_categories)}")
self._validate_enums(categories, SemanticCategory, "semantic category")

@property
def dataset_name(self) -> str:
Expand Down
17 changes: 2 additions & 15 deletions pyrit/datasets/seed_datasets/remote/promptintel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,10 @@ def __init__(
self._api_key = api_key

if severity is not None:
valid_severities = {s.value for s in PromptIntelSeverity}
sev_value = severity.value if isinstance(severity, PromptIntelSeverity) else severity
if sev_value not in valid_severities:
raise ValueError(
f"Invalid severity: {sev_value}. Valid values: {[s.value for s in PromptIntelSeverity]}"
)
self._validate_enum(severity, PromptIntelSeverity, "severity")

if categories is not None:
valid_categories = {c.value for c in PromptIntelCategory}
invalid_categories = {
cat.value if isinstance(cat, PromptIntelCategory) else cat for cat in categories
} - valid_categories
if invalid_categories:
raise ValueError(
f"Invalid categories: {', '.join(str(c) for c in invalid_categories)}. "
f"Valid values: {[c.value for c in PromptIntelCategory]}"
)
self._validate_enums(categories, PromptIntelCategory, "category")

self._severity = severity
self._categories = categories
Expand Down
48 changes: 47 additions & 1 deletion pyrit/datasets/seed_datasets/remote/remote_dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import logging
import tempfile
from abc import ABC
from collections.abc import Callable
from collections.abc import Callable, Sequence
from dataclasses import fields
from enum import Enum
from pathlib import Path
from typing import Any, Literal, Optional, TextIO, cast

Expand Down Expand Up @@ -51,6 +52,51 @@ class _RemoteDatasetLoader(SeedDatasetProvider, ABC):
- dataset_name property: Human-readable name for the dataset
"""

@staticmethod
def _validate_enums(
values: Sequence[Enum],
enum_cls: type[Enum],
label: str,
) -> None:
"""
Validate that all values are instances of the expected enum class.

Args:
values: List of values to validate.
enum_cls: The enum class that all values must be instances of.
label: Human-readable label for error messages (e.g. "category").

Raises:
ValueError: If any value is not an instance of the expected enum class.
"""
for v in values:
if not isinstance(v, enum_cls):
valid = ", ".join(f"{enum_cls.__name__}.{m.name}" for m in enum_cls)
raise ValueError(f"Expected {enum_cls.__name__}, got {type(v).__name__}: {v!r}. Valid values: {valid}")

@staticmethod
def _validate_enum(
value: Enum,
enum_cls: type[Enum],
label: str,
) -> None:
"""
Validate that a single value is an instance of the expected enum class.

Args:
value: The value to validate.
enum_cls: The enum class that the value must be an instance of.
label: Human-readable label for error messages (e.g. "severity").

Raises:
ValueError: If the value is not an instance of the expected enum class.
"""
if not isinstance(value, enum_cls):
valid = ", ".join(f"{enum_cls.__name__}.{m.name}" for m in enum_cls)
raise ValueError(
f"Expected {enum_cls.__name__}, got {type(value).__name__}: {value!r}. Valid values: {valid}"
)

def _get_cache_file_name(self, *, source: str, file_type: str) -> str:
"""
Generate a cache file name based on the source URL and file type.
Expand Down
Loading
Loading