diff --git a/Makefile b/Makefile index 0b0c33cc21..699feea617 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ PYMODULE:=pyrit TESTS:=tests UNIT_TESTS:=tests/unit INTEGRATION_TESTS:=tests/integration +PARTNER_INTEGRATION_TESTS:=tests/partner_integration END_TO_END_TESTS:=tests/end_to_end all: pre-commit @@ -36,5 +37,8 @@ integration-test: end-to-end-test: $(CMD) pytest $(END_TO_END_TESTS) -v --junitxml=junit/test-results.xml +partner-integration-test: + $(CMD) pytest $(PARTNER_INTEGRATION_TESTS) -v --junitxml=junit/partner-test-results.xml + #clean: # git clean -Xdf # Delete all files in .gitignore diff --git a/tests/partner_integration/__init__.py b/tests/partner_integration/__init__.py new file mode 100644 index 0000000000..9a0454564d --- /dev/null +++ b/tests/partner_integration/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. diff --git a/tests/partner_integration/azure_ai_evaluation/__init__.py b/tests/partner_integration/azure_ai_evaluation/__init__.py new file mode 100644 index 0000000000..9a0454564d --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. diff --git a/tests/partner_integration/azure_ai_evaluation/test_auth_contract.py b/tests/partner_integration/azure_ai_evaluation/test_auth_contract.py new file mode 100644 index 0000000000..43bf668e3d --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_auth_contract.py @@ -0,0 +1,19 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for authentication utilities used by azure-ai-evaluation. + +The azure-ai-evaluation red team module uses: +- get_azure_openai_auth: Called in _utils/strategy_utils.py to authenticate + OpenAIChatTarget for tense/translation converter strategies. +""" + +from pyrit.auth import get_azure_openai_auth + + +class TestAuthContract: + """Validate authentication utility availability.""" + + def test_get_azure_openai_auth_is_callable(self): + """strategy_utils.py calls get_azure_openai_auth() for OpenAI target auth.""" + assert callable(get_azure_openai_auth) diff --git a/tests/partner_integration/azure_ai_evaluation/test_converter_contract.py b/tests/partner_integration/azure_ai_evaluation/test_converter_contract.py new file mode 100644 index 0000000000..83257e3701 --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_converter_contract.py @@ -0,0 +1,93 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for PromptConverter interface and specific converters used by azure-ai-evaluation. + +The azure-ai-evaluation red team module: +- Extends PromptConverter via _DefaultConverter +- Imports 20+ specific converters in _agent/_agent_utils.py and strategy_utils.py +- Uses ConverterResult as the return type +""" + +import pytest + +from pyrit.prompt_converter import ConverterResult, PromptConverter + + +class TestPromptConverterContract: + """Validate PromptConverter base class interface stability.""" + + def test_prompt_converter_has_convert_async(self): + """_DefaultConverter overrides convert_async.""" + assert hasattr(PromptConverter, "convert_async") + + def test_prompt_converter_subclassable(self): + """_DefaultConverter subclasses PromptConverter with convert_async.""" + + class TestConverter(PromptConverter): + SUPPORTED_INPUT_TYPES = ("text",) + SUPPORTED_OUTPUT_TYPES = ("text",) + + async def convert_async(self, *, prompt, input_type="text"): + return ConverterResult(output_text=prompt, output_type="text") + + converter = TestConverter() + assert isinstance(converter, PromptConverter) + + +class TestSpecificConvertersImportable: + """Validate that all converters imported by azure-ai-evaluation are available. + + These converters are imported in: + - _agent/_agent_utils.py (20+ converters) + - _utils/strategy_utils.py (converter instantiation) + """ + + @pytest.mark.parametrize( + "converter_name", + [ + "AnsiAttackConverter", + "AsciiArtConverter", + "AtbashConverter", + "Base64Converter", + "BinaryConverter", + "CaesarConverter", + "CharacterSpaceConverter", + # NOTE: _agent/_agent_utils.py imports "CharSwapGenerator" but PyRIT + # exports "CharSwapConverter". This is a naming discrepancy in the SDK; + # the canonical PyRIT name is CharSwapConverter. + "CharSwapConverter", + "DiacriticConverter", + "FlipConverter", + "LeetspeakConverter", + "MathPromptConverter", + "MorseConverter", + "ROT13Converter", + "StringJoinConverter", + "SuffixAppendConverter", + "TenseConverter", + "UnicodeConfusableConverter", + "UnicodeSubstitutionConverter", + "UrlConverter", + ], + ) + def test_converter_importable(self, converter_name): + """Each converter used by azure-ai-evaluation must be importable from pyrit.prompt_converter.""" + import pyrit.prompt_converter as pc + + converter_class = getattr(pc, converter_name, None) + assert converter_class is not None, ( + f"{converter_name} not found in pyrit.prompt_converter — azure-ai-evaluation depends on this converter" + ) + + def test_ascii_smuggler_converter_importable(self): + """AsciiSmugglerConverter is imported in _agent/_agent_utils.py.""" + from pyrit.prompt_converter import AsciiSmugglerConverter + + assert AsciiSmugglerConverter is not None + + def test_llm_generic_text_converter_importable(self): + """LLMGenericTextConverter is used for tense/translation strategies.""" + from pyrit.prompt_converter import LLMGenericTextConverter + + assert LLMGenericTextConverter is not None diff --git a/tests/partner_integration/azure_ai_evaluation/test_exceptions_contract.py b/tests/partner_integration/azure_ai_evaluation/test_exceptions_contract.py new file mode 100644 index 0000000000..1ada6ba3d4 --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_exceptions_contract.py @@ -0,0 +1,65 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for PyRIT exception types and retry decorators used by azure-ai-evaluation. + +The azure-ai-evaluation red team module uses these in: +- _callback_chat_target.py: EmptyResponseException, RateLimitException, pyrit_target_retry +- _rai_service_target.py: remove_markdown_json +""" + +from pyrit.exceptions import ( + EmptyResponseException, + RateLimitException, + pyrit_target_retry, + remove_markdown_json, +) + + +class TestExceptionTypesContract: + """Validate exception types exist and are proper Exception subclasses.""" + + def test_empty_response_exception_is_exception(self): + """_CallbackChatTarget catches EmptyResponseException.""" + assert issubclass(EmptyResponseException, Exception) + + def test_rate_limit_exception_is_exception(self): + """_CallbackChatTarget catches RateLimitException.""" + assert issubclass(RateLimitException, Exception) + + def test_empty_response_exception_instantiable(self): + """Verify EmptyResponseException can be raised with a message.""" + exc = EmptyResponseException() + assert isinstance(exc, Exception) + + def test_rate_limit_exception_instantiable(self): + """Verify RateLimitException can be raised with a message.""" + exc = RateLimitException() + assert isinstance(exc, Exception) + + +class TestRetryDecoratorContract: + """Validate retry decorator availability.""" + + def test_pyrit_target_retry_is_callable(self): + """_CallbackChatTarget uses @pyrit_target_retry decorator.""" + assert callable(pyrit_target_retry) + + +class TestUtilityFunctionsContract: + """Validate utility functions used by azure-ai-evaluation.""" + + def test_remove_markdown_json_is_callable(self): + """_rai_service_target.py uses remove_markdown_json.""" + assert callable(remove_markdown_json) + + def test_remove_markdown_json_handles_plain_text(self): + """Verify remove_markdown_json passes through plain text.""" + result = remove_markdown_json("plain text") + assert isinstance(result, str) + + def test_remove_markdown_json_strips_markdown_fences(self): + """Verify remove_markdown_json strips ```json fences.""" + input_text = '```json\n{"key": "value"}\n```' + result = remove_markdown_json(input_text) + assert "```" not in result diff --git a/tests/partner_integration/azure_ai_evaluation/test_foundry_scenario_contract.py b/tests/partner_integration/azure_ai_evaluation/test_foundry_scenario_contract.py new file mode 100644 index 0000000000..e2e65a070b --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_foundry_scenario_contract.py @@ -0,0 +1,76 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for Foundry scenario APIs used by azure-ai-evaluation. + +The azure-ai-evaluation red team module uses the scenario framework for attack execution: +- FoundryExecutionManager creates FoundryScenario instances per risk category +- StrategyMapper maps AttackStrategy enum → FoundryStrategy +- DatasetConfigurationBuilder produces DatasetConfiguration from RAI objectives +- ScenarioOrchestrator processes ScenarioResult and AttackResult +- RAIServiceScorer uses AttackScoringConfig for scoring configuration +""" + +from pyrit.executor.attack import AttackScoringConfig +from pyrit.scenario import ScenarioStrategy +from pyrit.scenario.foundry import FoundryStrategy + + +class TestRedTeamStrategyContract: + """Validate FoundryStrategy availability and structure.""" + + def test_foundry_strategy_is_scenario_strategy(self): + """FoundryStrategy should extend ScenarioStrategy.""" + assert issubclass(FoundryStrategy, ScenarioStrategy) + + +class TestRedTeamScenarioContract: + """Validate FoundryScenario importability.""" + + def test_foundry_scenario_importable(self): + """ScenarioOrchestrator creates FoundryScenario instances.""" + from pyrit.scenario.foundry import FoundryScenario # noqa: F811 + + assert FoundryScenario is not None + + +class TestDatasetConfigurationContract: + """Validate DatasetConfiguration importability.""" + + def test_dataset_configuration_importable(self): + """DatasetConfigurationBuilder produces DatasetConfiguration.""" + from pyrit.scenario import DatasetConfiguration # noqa: F811 + + assert DatasetConfiguration is not None + + +class TestAttackScoringConfigContract: + """Validate AttackScoringConfig availability.""" + + def test_attack_scoring_config_has_expected_fields(self): + """AttackScoringConfig should accept objective_scorer and refusal_scorer.""" + config = AttackScoringConfig() + assert hasattr(config, "objective_scorer") + assert hasattr(config, "refusal_scorer") + + +class TestScenarioResultContract: + """Validate ScenarioResult and AttackResult importability.""" + + def test_scenario_result_importable(self): + """ScenarioOrchestrator reads ScenarioResult.""" + from pyrit.models.scenario_result import ScenarioResult # noqa: F811 + + assert ScenarioResult is not None + + def test_attack_result_importable(self): + """FoundryResultProcessor processes AttackResult.""" + from pyrit.models import AttackResult + + assert AttackResult is not None + + def test_attack_outcome_importable(self): + """FoundryResultProcessor checks AttackOutcome values.""" + from pyrit.models import AttackOutcome + + assert AttackOutcome is not None diff --git a/tests/partner_integration/azure_ai_evaluation/test_import_smoke.py b/tests/partner_integration/azure_ai_evaluation/test_import_smoke.py new file mode 100644 index 0000000000..1bcd839b93 --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_import_smoke.py @@ -0,0 +1,113 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Import smoke tests for azure-ai-evaluation red team module integration. + +These tests verify that the azure-ai-evaluation red team module can be imported +and that its PyRIT subclasses correctly extend PyRIT base classes. + +Tests are SKIPPED if azure-ai-evaluation[redteam] is not installed. +""" + +import pytest + +from pyrit.prompt_target import PromptTarget +from pyrit.score.true_false.true_false_scorer import TrueFalseScorer + + +def _azure_ai_evaluation_available() -> bool: + """Check if azure-ai-evaluation[redteam] is installed.""" + try: + from azure.ai.evaluation.red_team import RedTeam # noqa: F401 + + return True + except ImportError: + return False + + +requires_azure_ai_evaluation = pytest.mark.skipif( + not _azure_ai_evaluation_available(), + reason="azure-ai-evaluation[redteam] is not installed", +) + + +@requires_azure_ai_evaluation +class TestRedTeamModuleImports: + """Verify azure-ai-evaluation red_team module imports succeed with current PyRIT.""" + + def test_redteam_public_api_imports(self): + """Verify all public classes from azure.ai.evaluation.red_team are importable.""" + from azure.ai.evaluation.red_team import ( + AttackStrategy, + RedTeam, + RedTeamResult, + RiskCategory, + SupportedLanguages, + ) + + assert RedTeam is not None + assert AttackStrategy is not None + assert RiskCategory is not None + assert RedTeamResult is not None + assert SupportedLanguages is not None + + +class TestPromptChatTargetTransitionalCompat: + """Verify PromptChatTarget still exists and extends PromptTarget. + + The SDK currently imports PromptChatTarget in 6+ production files + (_callback_chat_target.py, _orchestrator_manager.py, _scenario_orchestrator.py, + _execution_manager.py, strategy_utils.py, _rai_service_target.py). PyRIT is + migrating from PromptChatTarget to PromptTarget, but during the transition + both must exist with correct inheritance. + """ + + def test_prompt_chat_target_exists(self): + """PromptChatTarget must remain importable during the transition.""" + from pyrit.prompt_target import PromptChatTarget + + assert PromptChatTarget is not None + + def test_prompt_chat_target_extends_prompt_target(self): + """PromptChatTarget must be a subclass of PromptTarget.""" + from pyrit.prompt_target import PromptChatTarget + + assert issubclass(PromptChatTarget, PromptTarget) + + +@requires_azure_ai_evaluation +class TestCallbackChatTargetInheritance: + """Verify _CallbackChatTarget correctly extends PromptTarget. + + NOTE: These tests intentionally import private (_-prefixed) modules from + azure-ai-evaluation. This is correct for contract testing — we need to verify + the actual subclass relationships that PyRIT API changes could break. + + Explicit inheritance checks are REQUIRED here because: + 1. PyRIT orchestrators and scenarios detect subclasses via issubclass() at + runtime to determine capabilities (multi-turn, system prompt support, etc.) + 2. If the inheritance chain breaks, attacks silently fall back to single-turn + mode or skip system prompt injection — causing false negatives. + 3. These checks catch breaking changes that import-only tests would miss. + """ + + def test_callback_chat_target_extends_prompt_target(self): + """_CallbackChatTarget must be a subclass of pyrit.prompt_target.PromptTarget.""" + from azure.ai.evaluation.red_team._callback_chat_target import _CallbackChatTarget + + assert issubclass(_CallbackChatTarget, PromptTarget) + + +@requires_azure_ai_evaluation +class TestRAIScorerInheritance: + """Verify RAIServiceScorer correctly extends TrueFalseScorer. + + Explicit inheritance check — see TestCallbackChatTargetInheritance docstring + for why issubclass() contract tests are necessary. + """ + + def test_rai_scorer_extends_true_false_scorer(self): + """RAIServiceScorer must be a subclass of pyrit.score.true_false.TrueFalseScorer.""" + from azure.ai.evaluation.red_team._foundry._rai_scorer import RAIServiceScorer # private: intentional + + assert issubclass(RAIServiceScorer, TrueFalseScorer) diff --git a/tests/partner_integration/azure_ai_evaluation/test_model_contract.py b/tests/partner_integration/azure_ai_evaluation/test_model_contract.py new file mode 100644 index 0000000000..78f565c768 --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_model_contract.py @@ -0,0 +1,325 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for PyRIT data models used by azure-ai-evaluation. + +The red team module uses these models extensively: +- Message / MessagePiece: Every request/response path +- Score / UnvalidatedScore: Scoring pipeline +- SeedPrompt / SeedObjective / SeedGroup: DatasetConfigurationBuilder +- AttackResult / AttackOutcome: FoundryResultProcessor +- ChatMessage: formatting_utils.py +- PromptDataType: Type enum used across converters and models +- construct_response_from_request: Response construction +""" + +import uuid + +from pyrit.models import ( + Message, + MessagePiece, + PromptDataType, + SeedGroup, + SeedObjective, + SeedPrompt, + construct_response_from_request, +) + + +class TestMessageContract: + """Validate Message and MessagePiece interfaces.""" + + def test_message_piece_minimal_constructor(self): + """_CallbackChatTarget creates MessagePiece with role, original_value, conversation_id.""" + piece = MessagePiece( + role="user", + original_value="test prompt", + conversation_id=str(uuid.uuid4()), + ) + assert piece.api_role == "user" + assert piece.original_value == "test prompt" + + def test_message_piece_to_message(self): + """_CallbackChatTarget calls piece.to_message() to convert to Message.""" + piece = MessagePiece( + role="user", + original_value="test", + conversation_id=str(uuid.uuid4()), + ) + msg = piece.to_message() + assert isinstance(msg, Message) + assert len(msg.message_pieces) == 1 + + def test_message_get_value(self): + """_CallbackChatTarget accesses message.get_value() for the response text.""" + piece = MessagePiece( + role="assistant", + original_value="response text", + conversation_id=str(uuid.uuid4()), + ) + msg = piece.to_message() + assert msg.get_value() == "response text" + + def test_message_pieces_attribute(self): + """azure-ai-evaluation accesses message.message_pieces list.""" + piece = MessagePiece( + role="user", + original_value="test", + conversation_id=str(uuid.uuid4()), + ) + msg = piece.to_message() + assert hasattr(msg, "message_pieces") + assert isinstance(msg.message_pieces, (list, tuple)) + + def test_message_piece_has_converted_value(self): + """azure-ai-evaluation reads message_piece.converted_value for responses.""" + piece = MessagePiece( + role="assistant", + original_value="original", + converted_value="converted", + conversation_id=str(uuid.uuid4()), + ) + assert piece.converted_value == "converted" + + def test_message_piece_has_conversation_id(self): + """Conversation tracking relies on conversation_id field.""" + conv_id = str(uuid.uuid4()) + piece = MessagePiece( + role="user", + original_value="test", + conversation_id=conv_id, + ) + assert piece.conversation_id == conv_id + + def test_message_piece_has_prompt_metadata(self): + """_CallbackChatTarget reads piece.prompt_metadata for context extraction. + + In the Foundry path, context SeedPrompts are stored as prepended_conversation + in memory. _CallbackChatTarget reads prompt_metadata (is_context, tool_name, + context_type) via getattr(piece, 'prompt_metadata', None) to reconstruct + the context dict for agent callbacks. + """ + piece = MessagePiece( + role="user", + original_value="context content", + conversation_id=str(uuid.uuid4()), + prompt_metadata={"is_context": True, "tool_name": "doc_reader"}, + ) + assert hasattr(piece, "prompt_metadata") + metadata = getattr(piece, "prompt_metadata", None) or {} + assert metadata.get("is_context") is True + assert metadata.get("tool_name") == "doc_reader" + + def test_message_piece_prompt_metadata_defaults_empty(self): + """prompt_metadata should default to empty/None when not provided.""" + piece = MessagePiece( + role="user", + original_value="test", + conversation_id=str(uuid.uuid4()), + ) + metadata = getattr(piece, "prompt_metadata", None) or {} + assert not metadata.get("is_context") + + +class TestScoreModels: + """Validate Score and UnvalidatedScore interfaces.""" + + def test_score_importable(self): + """RAIServiceScorer and AzureRAIServiceTrueFalseScorer return Score objects.""" + from pyrit.models import Score + + assert Score is not None + + def test_unvalidated_score_importable(self): + """Scorers create UnvalidatedScore before validation.""" + from pyrit.models import UnvalidatedScore + + assert UnvalidatedScore is not None + + +class TestSeedModels: + """Validate seed data models used by DatasetConfigurationBuilder. + + These tests cover the full contract including context propagation patterns + from PR #46151 (sensitive_data_leakage tool context flow). + """ + + def test_seed_prompt_accepts_value(self): + """SeedPrompt requires a value field (the actual prompt text).""" + prompt = SeedPrompt(value="test prompt") + assert prompt.value == "test prompt" + + def test_seed_prompt_has_data_type(self): + """SeedPrompt.data_type defaults to 'text' for string values.""" + prompt = SeedPrompt(value="test") + assert prompt.data_type == "text" + + def test_seed_prompt_explicit_text_data_type(self): + """DatasetConfigurationBuilder passes data_type='text' explicitly for context SeedPrompts.""" + prompt = SeedPrompt(value="context content", data_type="text") + assert prompt.data_type == "text" + + def test_seed_prompt_has_harm_categories(self): + """DatasetConfigurationBuilder sets harm_categories on SeedPrompt.""" + prompt = SeedPrompt(value="test", harm_categories=["violence"]) + assert "violence" in prompt.harm_categories + + def test_seed_prompt_has_role(self): + """SeedPrompt supports role field for conversation context.""" + prompt = SeedPrompt(value="test", role="user") + assert prompt.role == "user" + + def test_seed_prompt_has_metadata(self): + """DatasetConfigurationBuilder attaches metadata to SeedPrompt.""" + prompt = SeedPrompt(value="test", metadata={"key": "val"}) + assert prompt.metadata["key"] == "val" + + def test_seed_prompt_has_prompt_group_id(self): + """DatasetConfigurationBuilder sets prompt_group_id for grouping seeds.""" + group_id = str(uuid.uuid4()) + prompt = SeedPrompt(value="test", prompt_group_id=group_id) + assert prompt.prompt_group_id == group_id + + def test_seed_prompt_has_sequence(self): + """DatasetConfigurationBuilder uses sequence for ordering within a group. + + Context SeedPrompts get lower sequence values; the objective SeedPrompt + gets a higher sequence so PyRIT uses it as next_message. + """ + prompt = SeedPrompt(value="test", sequence=3) + assert prompt.sequence == 3 + + def test_seed_prompt_context_pattern(self): + """DatasetConfigurationBuilder creates context SeedPrompts with is_context metadata. + + This pattern is critical for sensitive_data_leakage: context SeedPrompts + carry tool_name and context_type in metadata so _CallbackChatTarget can + extract them from conversation history and pass to the agent callback. + """ + group_id = str(uuid.uuid4()) + ctx_metadata = { + "is_context": True, + "context_index": 0, + "original_content_length": 42, + "tool_name": "document_client_smode", + "context_type": "document", + } + prompt = SeedPrompt( + value="SSN: 123-45-6789", + data_type="text", + prompt_group_id=group_id, + metadata=ctx_metadata, + role="user", + sequence=1, + ) + assert prompt.metadata["is_context"] is True + assert prompt.metadata["tool_name"] == "document_client_smode" + assert prompt.metadata["context_type"] == "document" + assert prompt.value == "SSN: 123-45-6789" + assert prompt.data_type == "text" + assert prompt.prompt_group_id == group_id + assert prompt.sequence == 1 + assert prompt.role == "user" + + def test_seed_objective_accepts_value(self): + """SeedObjective requires a value field (the objective text).""" + obj = SeedObjective(value="test objective") + assert obj.value == "test objective" + + def test_seed_objective_has_harm_categories(self): + """DatasetConfigurationBuilder sets harm_categories on SeedObjective.""" + obj = SeedObjective(value="test", harm_categories=["hate"]) + assert "hate" in obj.harm_categories + + def test_seed_objective_has_metadata(self): + """DatasetConfigurationBuilder attaches context_items to SeedObjective metadata.""" + context_items = [{"content": "secret data", "tool_name": "doc_reader"}] + obj = SeedObjective(value="test", metadata={"context_items": context_items}) + assert obj.metadata["context_items"] == context_items + + def test_seed_group_accepts_seeds(self): + """SeedGroup groups multiple seeds together.""" + prompt = SeedPrompt(value="prompt text", role="user") + obj = SeedObjective(value="objective text") + group = SeedGroup(seeds=[prompt, obj]) + assert len(group.seeds) == 2 + + def test_seed_group_mixed_context_pattern(self): + """DatasetConfigurationBuilder creates SeedGroups with objective + context seeds. + + For standard attacks with context (sensitive_data_leakage), a SeedGroup contains: + 1. SeedObjective — the attack objective + 2. Context SeedPrompt(s) — at lower sequence, is_context=True in metadata + 3. Objective SeedPrompt — at highest sequence (the actual prompt sent to the target) + """ + group_id = str(uuid.uuid4()) + + objective = SeedObjective( + value="Extract PII from the document", + metadata={"context_items": [{"content": "SSN: 123", "tool_name": "doc_reader"}]}, + ) + context_seed = SeedPrompt( + value="SSN: 123-45-6789", + data_type="text", + prompt_group_id=group_id, + metadata={"is_context": True, "tool_name": "doc_reader", "context_type": "document"}, + role="user", + sequence=1, + ) + objective_prompt = SeedPrompt( + value="Extract PII from the document", + data_type="text", + prompt_group_id=group_id, + role="user", + sequence=2, + ) + + group = SeedGroup(seeds=[objective, context_seed, objective_prompt]) + assert len(group.seeds) == 3 + + # Verify sequence ordering: context < objective prompt + seed_prompts = [s for s in group.seeds if isinstance(s, SeedPrompt)] + context_seeds = [s for s in seed_prompts if s.metadata.get("is_context")] + non_context_seeds = [s for s in seed_prompts if not s.metadata.get("is_context")] + assert len(context_seeds) == 1 + assert len(non_context_seeds) == 1 + assert context_seeds[0].sequence < non_context_seeds[0].sequence + + +class TestMiscModels: + """Validate miscellaneous models used by azure-ai-evaluation.""" + + def test_chat_message_importable(self): + """formatting_utils.py imports ChatMessage.""" + from pyrit.models import ChatMessage + + assert ChatMessage is not None + + def test_prompt_data_type_has_text(self): + """_DefaultConverter and _dataset_builder check for 'text' data type.""" + # PromptDataType is a Literal type; verify "text" is a valid value + from typing import get_args + + valid_types = get_args(PromptDataType) + assert "text" in valid_types + + def test_scenario_result_importable(self): + """ScenarioOrchestrator reads ScenarioResult.""" + from pyrit.models import ScenarioResult + + assert ScenarioResult is not None + + def test_construct_response_from_request_signature(self): + """Verify construct_response_from_request accepts expected parameters.""" + piece = MessagePiece( + role="user", + original_value="test", + conversation_id=str(uuid.uuid4()), + ) + # Call with positional request + response_text_pieces + result = construct_response_from_request( + request=piece, + response_text_pieces=["response"], + response_type="text", + ) + assert isinstance(result, Message) diff --git a/tests/partner_integration/azure_ai_evaluation/test_prompt_target_contract.py b/tests/partner_integration/azure_ai_evaluation/test_prompt_target_contract.py new file mode 100644 index 0000000000..68f69f5c8d --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_prompt_target_contract.py @@ -0,0 +1,99 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for PromptTarget interface used by azure-ai-evaluation. + +The azure-ai-evaluation red team module extends PromptTarget in four places: +- _CallbackChatTarget (wraps user callbacks) +- AzureRAIServiceTarget (sends prompts to RAI service) +- RAIServiceEvalChatTarget (evaluation-specific RAI target) +- _rai_service_target.py (multi-turn jailbreak target) + +These tests ensure the base class interface remains stable. +""" + +import uuid + +import pytest + +from pyrit.models import Message, MessagePiece, construct_response_from_request +from pyrit.prompt_target import PromptTarget + + +class _MinimalTarget(PromptTarget): + """Minimal concrete PromptTarget for contract testing.""" + + async def send_prompt_async(self, *, message: Message) -> list[Message]: + return [] + + def _validate_request(self, *, message) -> None: + pass + + +class TestPromptTargetContract: + """Validate PromptTarget base class interface stability.""" + + def test_prompt_target_is_abstract(self): + """PromptTarget should not be directly instantiable (has abstract methods).""" + with pytest.raises(TypeError): + PromptTarget() + + def test_prompt_target_has_send_prompt_async(self): + """azure-ai-evaluation overrides send_prompt_async in all subclasses.""" + assert hasattr(PromptTarget, "send_prompt_async") + + def test_prompt_target_subclassable_with_send_prompt_async(self): + """azure-ai-evaluation creates subclasses that implement send_prompt_async.""" + target = _MinimalTarget() + assert isinstance(target, PromptTarget) + + def test_prompt_target_init_accepts_keyword_args(self): + """PromptTarget.__init__ should accept max_requests_per_minute.""" + target = _MinimalTarget(max_requests_per_minute=60) + assert target is not None + + def test_construct_response_from_request_is_callable(self): + """AzureRAIServiceTarget uses construct_response_from_request to build responses.""" + assert callable(construct_response_from_request) + + def test_construct_response_from_request_returns_message(self): + """Verify construct_response_from_request produces a Message from a MessagePiece.""" + request_piece = MessagePiece( + role="user", + original_value="test prompt", + conversation_id=str(uuid.uuid4()), + ) + response = construct_response_from_request( + request=request_piece, + response_text_pieces=["test response"], + ) + assert isinstance(response, Message) + assert len(response.message_pieces) == 1 + assert response.message_pieces[0].converted_value == "test response" + assert response.message_pieces[0].api_role == "assistant" + + def test_prompt_target_has_memory_attribute(self): + """azure-ai-evaluation accesses self._memory on PromptTarget subclasses.""" + target = _MinimalTarget() + # _memory is set during initialization or via property + assert hasattr(target, "_memory") + + +class TestOpenAIChatTargetContract: + """Validate OpenAIChatTarget importability and interface. + + strategy_utils.py imports OpenAIChatTarget for get_chat_target() and + converter strategy instantiation (e.g., TenseConverter needs a chat target). + """ + + def test_openai_chat_target_importable(self): + """OpenAIChatTarget must be importable from pyrit.prompt_target.""" + from pyrit.prompt_target import OpenAIChatTarget + + assert OpenAIChatTarget is not None + + def test_openai_chat_target_extends_prompt_target(self): + """OpenAIChatTarget must be a PromptTarget subclass.""" + from pyrit.prompt_target import OpenAIChatTarget + + assert issubclass(OpenAIChatTarget, PromptTarget) diff --git a/tests/partner_integration/azure_ai_evaluation/test_scorer_contract.py b/tests/partner_integration/azure_ai_evaluation/test_scorer_contract.py new file mode 100644 index 0000000000..d48c895b14 --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_scorer_contract.py @@ -0,0 +1,58 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for Scorer and TrueFalseScorer interfaces used by azure-ai-evaluation. + +The azure-ai-evaluation red team module extends these classes: +- AzureRAIServiceTrueFalseScorer extends Scorer +- RAIServiceScorer extends TrueFalseScorer + +Both are critical for scoring attack results. +""" + +from pyrit.score import ScorerPromptValidator +from pyrit.score.scorer import Scorer +from pyrit.score.true_false.true_false_scorer import TrueFalseScorer + + +class TestScorerContract: + """Validate Scorer base class interface stability.""" + + def test_scorer_has_score_piece_async(self): + """Scorer subclasses must implement _score_piece_async.""" + assert hasattr(Scorer, "_score_piece_async") + + def test_scorer_has_validate_return_scores(self): + """Scorer subclasses must implement validate_return_scores.""" + assert hasattr(Scorer, "validate_return_scores") + + def test_scorer_has_get_scorer_metrics(self): + """Scorer subclasses must implement get_scorer_metrics.""" + assert hasattr(Scorer, "get_scorer_metrics") + + +class TestTrueFalseScorerContract: + """Validate TrueFalseScorer interface stability.""" + + def test_true_false_scorer_extends_scorer(self): + """RAIServiceScorer extends TrueFalseScorer which extends Scorer.""" + assert issubclass(TrueFalseScorer, Scorer) + + def test_true_false_scorer_has_validate_return_scores(self): + """TrueFalseScorer implements validate_return_scores.""" + assert hasattr(TrueFalseScorer, "validate_return_scores") + + +class TestScorerUtilities: + """Validate scorer utility classes used by azure-ai-evaluation.""" + + def test_scorer_identifier_importable(self): + """RAIServiceScorer uses ScorerIdentifier for identity tracking.""" + from pyrit.identifiers import ScorerIdentifier + + assert ScorerIdentifier is not None + + def test_scorer_prompt_validator_instantiable(self): + """ScorerPromptValidator should accept supported_data_types kwarg.""" + validator = ScorerPromptValidator(supported_data_types=["text"]) + assert validator is not None diff --git a/tests/partner_integration/azure_ai_evaluation/test_sqlite_memory_contract.py b/tests/partner_integration/azure_ai_evaluation/test_sqlite_memory_contract.py new file mode 100644 index 0000000000..71b580a2f2 --- /dev/null +++ b/tests/partner_integration/azure_ai_evaluation/test_sqlite_memory_contract.py @@ -0,0 +1,138 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Contract tests for SQLiteMemory used by azure-ai-evaluation. + +The azure-ai-evaluation RedTeam class initializes PyRIT memory during __init__: + CentralMemory.set_memory_instance(SQLiteMemory()) + +Multiple modules also access memory via CentralMemory.get_memory_instance(). +These tests validate both the memory lifecycle contract and the query methods +the SDK calls to store/retrieve attack results, scores, and conversations. + +Memory query methods used by the SDK: +- get_scenario_results(): _scenario_orchestrator.py (partial result recovery) +- add_scores_to_memory(): _rai_scorer.py (score storage) +- get_message_pieces(): _foundry_result_processor.py (conversation retrieval) +- get_prompt_request_pieces(): formatting_utils.py (label-based queries) +- get_conversation(): _callback_chat_target.py (multi-turn history) +""" + +from pyrit.memory import CentralMemory, SQLiteMemory + + +class TestMemoryLifecycleContract: + """Validate CentralMemory/SQLiteMemory interface stability.""" + + def test_sqlite_memory_default_constructor(self): + """RedTeam.__init__ calls SQLiteMemory() with no args.""" + memory = SQLiteMemory() + assert memory is not None + memory.dispose_engine() + + def test_sqlite_memory_in_memory_constructor(self): + """Partner tests use SQLiteMemory(db_path=':memory:').""" + memory = SQLiteMemory(db_path=":memory:") + assert memory is not None + memory.dispose_engine() + + def test_central_memory_set_and_get_instance(self): + """RedTeam.__init__ sets memory; formatting_utils.py and _rai_scorer.py retrieve it.""" + memory = SQLiteMemory(db_path=":memory:") + CentralMemory.set_memory_instance(memory) + retrieved = CentralMemory.get_memory_instance() + assert retrieved is memory + memory.dispose_engine() + + def test_sqlite_memory_has_disable_embedding(self): + """Test fixtures call disable_embedding() on SQLiteMemory.""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "disable_embedding") + assert callable(memory.disable_embedding) + memory.disable_embedding() + memory.dispose_engine() + + def test_sqlite_memory_has_reset_database(self): + """Test fixtures call reset_database() on SQLiteMemory.""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "reset_database") + assert callable(memory.reset_database) + memory.dispose_engine() + + def test_sqlite_memory_has_dispose_engine(self): + """Cleanup requires dispose_engine().""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "dispose_engine") + assert callable(memory.dispose_engine) + memory.dispose_engine() + + +class TestMemoryQueryMethodContract: + """Validate that memory query methods used by azure-ai-evaluation exist. + + These tests verify method existence and callability on the memory interface. + The SDK calls these methods to store/retrieve attack results, scores, + and conversation history during red team scans. + """ + + def test_memory_has_get_scenario_results(self): + """_scenario_orchestrator.py calls memory.get_scenario_results(scenario_result_ids=[...]).""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "get_scenario_results") + assert callable(memory.get_scenario_results) + memory.dispose_engine() + + def test_memory_has_add_scores_to_memory(self): + """_rai_scorer.py calls memory.add_scores_to_memory(scores=[Score]).""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "add_scores_to_memory") + assert callable(memory.add_scores_to_memory) + memory.dispose_engine() + + def test_memory_has_get_message_pieces(self): + """_foundry_result_processor.py calls memory.get_message_pieces(conversation_id=...).""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "get_message_pieces") + assert callable(memory.get_message_pieces) + memory.dispose_engine() + + def test_memory_has_get_prompt_request_pieces_or_equivalent(self): + """formatting_utils.py calls memory.get_prompt_request_pieces(labels={...}). + + NOTE: In newer PyRIT versions, this was consolidated into get_message_pieces(labels=...). + The SDK may need updating if it still references the old name. This test validates + that get_message_pieces accepts a labels parameter (the current equivalent). + """ + memory = SQLiteMemory(db_path=":memory:") + has_legacy = hasattr(memory, "get_prompt_request_pieces") + has_current = hasattr(memory, "get_message_pieces") + assert has_legacy or has_current, ( + "Neither get_prompt_request_pieces nor get_message_pieces found on memory. " + "formatting_utils.py depends on one of these for label-based queries." + ) + memory.dispose_engine() + + def test_memory_has_get_conversation(self): + """_callback_chat_target.py calls memory.get_conversation(conversation_id=...).""" + memory = SQLiteMemory(db_path=":memory:") + assert hasattr(memory, "get_conversation") + assert callable(memory.get_conversation) + memory.dispose_engine() + + def test_get_conversation_returns_list(self, sqlite_instance): + """get_conversation should return a list (empty for unknown conversation_id).""" + result = sqlite_instance.get_conversation(conversation_id="nonexistent-id") + assert isinstance(result, list) + + def test_get_message_pieces_with_labels_returns_list(self, sqlite_instance): + """get_message_pieces(labels={...}) should return a list (empty for no matches). + + This is the current equivalent of the SDK's get_prompt_request_pieces(labels=...) call. + """ + result = sqlite_instance.get_message_pieces(labels={"nonexistent": "label"}) + assert isinstance(result, (list, tuple)) + + def test_get_message_pieces_returns_list(self, sqlite_instance): + """get_message_pieces should return a list (empty for unknown conversation_id).""" + result = sqlite_instance.get_message_pieces(conversation_id="nonexistent-id") + assert isinstance(result, list) diff --git a/tests/partner_integration/conftest.py b/tests/partner_integration/conftest.py new file mode 100644 index 0000000000..65c51ff24e --- /dev/null +++ b/tests/partner_integration/conftest.py @@ -0,0 +1,72 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Shared fixtures for partner integration tests. + +These tests validate that PyRIT's public APIs remain compatible with +partner packages that depend on them (e.g., azure-ai-evaluation[redteam]). +They do NOT require Azure credentials — all tests use in-memory fixtures. +""" + +import asyncio +import os +import tempfile +from collections.abc import Generator +from unittest.mock import patch + +import pytest +from sqlalchemy import inspect + +from pyrit.memory.central_memory import CentralMemory +from pyrit.memory.sqlite_memory import SQLiteMemory +from pyrit.setup import IN_MEMORY, initialize_pyrit_async + +# Limit retries for deterministic testing +os.environ["RETRY_MAX_NUM_ATTEMPTS"] = "3" +os.environ["RETRY_WAIT_MIN_SECONDS"] = "0" +os.environ["RETRY_WAIT_MAX_SECONDS"] = "1" + + +@pytest.fixture(scope="session", autouse=True) +def _initialize_pyrit(): + """Initialize PyRIT with in-memory database once per test session.""" + asyncio.run(initialize_pyrit_async(memory_db_type=IN_MEMORY)) + + +@pytest.fixture(autouse=True) +def _restore_central_memory(): + """Save and restore CentralMemory singleton between tests. + + Prevents tests that call CentralMemory.set_memory_instance() from + leaking state into subsequent tests. + """ + previous = CentralMemory._memory_instance + yield + CentralMemory._memory_instance = previous + + +@pytest.fixture +def sqlite_instance() -> Generator[SQLiteMemory, None, None]: + """Provide an in-memory SQLite database for partner integration tests.""" + sqlite_memory = SQLiteMemory(db_path=":memory:") + temp_dir = tempfile.TemporaryDirectory() + sqlite_memory.results_path = temp_dir.name + sqlite_memory.disable_embedding() + sqlite_memory.reset_database() + + inspector = inspect(sqlite_memory.engine) + assert "PromptMemoryEntries" in inspector.get_table_names() + assert "ScoreEntries" in inspector.get_table_names() + assert "SeedPromptEntries" in inspector.get_table_names() + + CentralMemory.set_memory_instance(sqlite_memory) + yield sqlite_memory + temp_dir.cleanup() + sqlite_memory.dispose_engine() + + +@pytest.fixture +def patch_central_database(sqlite_instance): + """Mock CentralMemory.get_memory_instance for isolated tests.""" + with patch.object(CentralMemory, "get_memory_instance", return_value=sqlite_instance) as mock: + yield mock