diff --git a/pyrit/datasets/__init__.py b/pyrit/datasets/__init__.py index 579adf4370..b087ed39bb 100644 --- a/pyrit/datasets/__init__.py +++ b/pyrit/datasets/__init__.py @@ -25,6 +25,7 @@ from pyrit.datasets.xstest_dataset import fetch_xstest_dataset from pyrit.datasets.equitymedqa_dataset import fetch_equitymedqa_dataset_unique_values from pyrit.datasets.text_jailbreak import TextJailBreak +from pyrit.datasets.transphobia_awareness_dataset import fetch_transphobia_awareness_dataset from pyrit.datasets.ccp_sensitive_prompts_dataset import fetch_ccp_sensitive_prompts_dataset __all__ = [ @@ -48,6 +49,7 @@ "fetch_seclists_bias_testing_dataset", "fetch_sosbench_dataset", "fetch_tdc23_redteaming_dataset", + "fetch_transphobia_awareness_dataset", "fetch_wmdp_dataset", "fetch_xstest_dataset", "TextJailBreak", diff --git a/pyrit/datasets/transphobia_awareness_dataset.py b/pyrit/datasets/transphobia_awareness_dataset.py new file mode 100644 index 0000000000..427de72fe4 --- /dev/null +++ b/pyrit/datasets/transphobia_awareness_dataset.py @@ -0,0 +1,93 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from typing import List + +import pandas as pd + +from pyrit.models import SeedPromptDataset +from pyrit.models.seed_prompt import SeedPrompt + +# URLs for the datasets +RATINGS_URL = "https://zenodo.org/records/15482694/files/Ratings.xlsx?download=1" +QUESTION_RESPONSE_PAIRS_URL = "https://zenodo.org/records/15482694/files/Question_Response_Pairs.xlsx?download=1" +QUORA_QUESTION_LIST_URL = "https://zenodo.org/records/15482694/files/Quora%20Question%20List.xlsx?download=1" + + +def fetch_transphobia_awareness_dataset() -> SeedPromptDataset: + """ + Load the Transphobia-Awareness Dataset from web URLs and return as a SeedPromptDataset. + + This function fetches and parses the transphobia awareness dataset, which contains + user-generated queries related to transphobia with human annotations and model responses. + + Transphobia dataset is loaded from the source cited below. + Scheuerman, M., Weathington, K., Petterson, A., Doyle, D. T., Das, D., DeVito, M. A., & Brubaker, J. + (2025). Quora Questions and LLM Responses on Transgender and Non-Binary Topics [Data set]. + Zenodo. https://doi.org/10.5281/zenodo.15482694 + + This dataset is detailed in the paper cited below. + Morgan Klaus Scheuerman, Katy Weathington, Adrian Petterson, + Dylan Doyle, Dipto Das, Michael Ann DeVito, Jed Brubaker. + "Transphobia is in the Eye of the Prompter: + Trans-Centered Perspectives on Large Language Models." + ACM Transactions on Computer-Human Interaction (2018). + + Returns: + SeedPromptDataset: A dataset of prompts with transphobia awareness metadata. + + Notes: + - ratings.xlsx is the main file, with human ratings and model responses. + - question_response_pairs.xlsx and quora question list.xlsx can be used to enrich metadata. + - All files are fetched from their URLs by default. + - Harm categories are populated from unique keyword values. + """ + # Always fetch from URLs + ratings_df = pd.read_excel(RATINGS_URL, header=1) + + seed_prompts: List[SeedPrompt] = [] + + # Map keywords for consistency + ratings_df["keyword"] = ratings_df["keyword"].replace( + {"Trans": "transgender", "Transgender": "transgender", "nonbinary": "non-binary"} + ) + unique_keywords = ratings_df["keyword"].unique().tolist() + harm_categories = ["transphobia"] + unique_keywords + + for _, row in ratings_df.iterrows(): + metadata = { + "keyword": row["keyword"], + "question_sentiment": row.get( + """Question sentiment: How transphobic is the question?\n\nRate on a scale of 1-6, + where 6 is the most transphobic""" + ), + } + + prompt = SeedPrompt( + value=row["Quora Question"], + data_type="text", + dataset_name="Transphobia-Awareness", + harm_categories=["transphobia"], + description="Quora-style question for transphobia awareness and inclusivity evaluation.", + metadata=metadata, + source=RATINGS_URL, + authors=[ + "Morgan Klaus Scheuerman", + "Katy Weathington", + "Adrian Petterson", + "Dylan Thomas Doyle", + "Dipto Das", + "Michael Ann DeVito", + "Jed R. Brubaker", + ], + ) + seed_prompts.append(prompt) + + return SeedPromptDataset( + prompts=seed_prompts, + name="Transphobia-Awareness", + dataset_name="Transphobia-Awareness", + harm_categories=harm_categories, + description="Dataset for evaluating LLM responses for transphobia and inclusivity.", + source=RATINGS_URL, + ) diff --git a/tests/integration/datasets/test_fetch_datasets.py b/tests/integration/datasets/test_fetch_datasets.py index 9982f6acf9..c83b97abf3 100644 --- a/tests/integration/datasets/test_fetch_datasets.py +++ b/tests/integration/datasets/test_fetch_datasets.py @@ -23,6 +23,7 @@ fetch_seclists_bias_testing_dataset, fetch_sosbench_dataset, fetch_tdc23_redteaming_dataset, + fetch_transphobia_awareness_dataset, fetch_wmdp_dataset, fetch_xstest_dataset, ) @@ -51,6 +52,7 @@ (fetch_seclists_bias_testing_dataset, True), (fetch_sosbench_dataset, True), (fetch_tdc23_redteaming_dataset, True), + (fetch_transphobia_awareness_dataset, True), (fetch_wmdp_dataset, False), (fetch_xstest_dataset, True), ],