Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 53 additions & 52 deletions doc/code/converters/0_converters.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -84,58 +84,59 @@
"23 text text AtbashConverter\n",
"24 text text Base2048Converter\n",
"25 text text Base64Converter\n",
"26 text text BinAsciiConverter\n",
"27 text text BinaryConverter\n",
"28 text text BrailleConverter\n",
"29 text text CaesarConverter\n",
"30 text text CharSwapConverter\n",
"31 text text CharacterSpaceConverter\n",
"32 text text CodeChameleonConverter\n",
"33 text text ColloquialWordswapConverter\n",
"34 text text DenylistConverter\n",
"35 text text DiacriticConverter\n",
"36 text text EcojiConverter\n",
"37 text text EmojiConverter\n",
"38 text text FirstLetterConverter\n",
"39 text text FlipConverter\n",
"40 text text ImagePromptStyleConverter\n",
"41 text text InsertPunctuationConverter\n",
"42 text text JsonStringConverter\n",
"43 text text LLMGenericTextConverter\n",
"44 text text LeetspeakConverter\n",
"45 text text MaliciousQuestionGeneratorConverter\n",
"46 text text MathObfuscationConverter\n",
"47 text text MathPromptConverter\n",
"48 text text MorseConverter\n",
"49 text text NatoConverter\n",
"50 text text NegationTrapConverter\n",
"51 text text NoiseConverter\n",
"52 text text PersuasionConverter\n",
"53 text text ROT13Converter\n",
"54 text text RandomCapitalLettersConverter\n",
"55 text text RandomTranslationConverter\n",
"56 text text RepeatTokenConverter\n",
"57 text text ScientificTranslationConverter\n",
"58 text text SearchReplaceConverter\n",
"59 text text SelectiveTextConverter\n",
"60 text text SneakyBitsSmugglerConverter\n",
"61 text text StringJoinConverter\n",
"62 text text SuffixAppendConverter\n",
"63 text text SuperscriptConverter\n",
"64 text text TemplateSegmentConverter\n",
"65 text text TenseConverter\n",
"66 text text TextJailbreakConverter\n",
"67 text text ToneConverter\n",
"68 text text ToxicSentenceGeneratorConverter\n",
"69 text text TranslationConverter\n",
"70 text text UnicodeConfusableConverter\n",
"71 text text UnicodeReplacementConverter\n",
"72 text text UnicodeSubstitutionConverter\n",
"73 text text UrlConverter\n",
"74 text text VariationConverter\n",
"75 text text VariationSelectorSmugglerConverter\n",
"76 text text ZalgoConverter\n",
"77 text text ZeroWidthConverter\n"
"26 text text BidiConverter\n",
"27 text text BinAsciiConverter\n",
"28 text text BinaryConverter\n",
"29 text text BrailleConverter\n",
"30 text text CaesarConverter\n",
"31 text text CharSwapConverter\n",
"32 text text CharacterSpaceConverter\n",
"33 text text CodeChameleonConverter\n",
"34 text text ColloquialWordswapConverter\n",
"35 text text DenylistConverter\n",
"36 text text DiacriticConverter\n",
"37 text text EcojiConverter\n",
"38 text text EmojiConverter\n",
"39 text text FirstLetterConverter\n",
"40 text text FlipConverter\n",
"41 text text ImagePromptStyleConverter\n",
"42 text text InsertPunctuationConverter\n",
"43 text text JsonStringConverter\n",
"44 text text LLMGenericTextConverter\n",
"45 text text LeetspeakConverter\n",
"46 text text MaliciousQuestionGeneratorConverter\n",
"47 text text MathObfuscationConverter\n",
"48 text text MathPromptConverter\n",
"49 text text MorseConverter\n",
"50 text text NatoConverter\n",
"51 text text NegationTrapConverter\n",
"52 text text NoiseConverter\n",
"53 text text PersuasionConverter\n",
"54 text text ROT13Converter\n",
"55 text text RandomCapitalLettersConverter\n",
"56 text text RandomTranslationConverter\n",
"57 text text RepeatTokenConverter\n",
"58 text text ScientificTranslationConverter\n",
"59 text text SearchReplaceConverter\n",
"60 text text SelectiveTextConverter\n",
"61 text text SneakyBitsSmugglerConverter\n",
"62 text text StringJoinConverter\n",
"63 text text SuffixAppendConverter\n",
"64 text text SuperscriptConverter\n",
"65 text text TemplateSegmentConverter\n",
"66 text text TenseConverter\n",
"67 text text TextJailbreakConverter\n",
"68 text text ToneConverter\n",
"69 text text ToxicSentenceGeneratorConverter\n",
"70 text text TranslationConverter\n",
"71 text text UnicodeConfusableConverter\n",
"72 text text UnicodeReplacementConverter\n",
"73 text text UnicodeSubstitutionConverter\n",
"74 text text UrlConverter\n",
"75 text text VariationConverter\n",
"76 text text VariationSelectorSmugglerConverter\n",
"77 text text ZalgoConverter\n",
"78 text text ZeroWidthConverter\n"
]
}
],
Expand Down
4 changes: 4 additions & 0 deletions doc/code/converters/1_text_to_text_converters.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@
"source": [
"from pyrit.prompt_converter import (\n",
" AnsiAttackConverter,\n",
" BidiConverter,\n",
" CharacterSpaceConverter,\n",
" CharSwapConverter,\n",
" CodeChameleonConverter,\n",
Expand Down Expand Up @@ -311,6 +312,9 @@
"# Character Space [@robustintelligence2024bypass] inserts spaces between characters\n",
"print(\"Character Space:\", await CharacterSpaceConverter().convert_async(prompt=prompt)) # type: ignore\n",
"print(\"Diacritic:\", await DiacriticConverter().convert_async(prompt=prompt)) # type: ignore\n",
"\n",
"# Bidi [@boucher2023trojan] wraps text in Unicode bidirectional control characters\n",
"print(\"Bidi:\", await BidiConverter().convert_async(prompt=prompt)) # type: ignore\n",
"print(\"Superscript:\", await SuperscriptConverter().convert_async(prompt=prompt)) # type: ignore\n",
"print(\"Zalgo:\", await ZalgoConverter().convert_async(prompt=prompt)) # type: ignore\n",
"\n",
Expand Down
4 changes: 4 additions & 0 deletions doc/code/converters/1_text_to_text_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
# %%
from pyrit.prompt_converter import (
AnsiAttackConverter,
BidiConverter,
CharacterSpaceConverter,
CharSwapConverter,
CodeChameleonConverter,
Expand Down Expand Up @@ -125,6 +126,9 @@
# Character Space [@robustintelligence2024bypass] inserts spaces between characters
print("Character Space:", await CharacterSpaceConverter().convert_async(prompt=prompt)) # type: ignore
print("Diacritic:", await DiacriticConverter().convert_async(prompt=prompt)) # type: ignore

# Bidi [@boucher2023trojan] wraps text in Unicode bidirectional control characters
print("Bidi:", await BidiConverter().convert_async(prompt=prompt)) # type: ignore
print("Superscript:", await SuperscriptConverter().convert_async(prompt=prompt)) # type: ignore
print("Zalgo:", await ZalgoConverter().convert_async(prompt=prompt)) # type: ignore

Expand Down
8 changes: 8 additions & 0 deletions doc/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,14 @@ @misc{microsoft2024skeletonkey
note = {Microsoft Security Blog},
}

@misc{boucher2023trojan,
title = {Trojan Source: Invisible Vulnerabilities},
author = {Nicholas Boucher and Ross Anderson},
year = {2023},
url = {https://trojansource.codes/},
note = {CVE-2021-42574, USENIX Security Symposium},
}

@misc{embracethered2024unicode,
title = {Hiding and Finding Text with Unicode Tags},
author = {Johann Rehberger},
Expand Down
2 changes: 2 additions & 0 deletions pyrit/prompt_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pyrit.prompt_converter.azure_speech_text_to_audio_converter import AzureSpeechTextToAudioConverter
from pyrit.prompt_converter.base64_converter import Base64Converter
from pyrit.prompt_converter.base2048_converter import Base2048Converter
from pyrit.prompt_converter.bidi_converter import BidiConverter
from pyrit.prompt_converter.bin_ascii_converter import BinAsciiConverter
from pyrit.prompt_converter.binary_converter import BinaryConverter
from pyrit.prompt_converter.braille_converter import BrailleConverter
Expand Down Expand Up @@ -156,6 +157,7 @@ def __getattr__(name: str) -> object:
"AzureSpeechTextToAudioConverter",
"Base2048Converter",
"Base64Converter",
"BidiConverter",
"BinAsciiConverter",
"BinaryConverter",
"BrailleConverter",
Expand Down
96 changes: 96 additions & 0 deletions pyrit/prompt_converter/bidi_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import ClassVar, Literal

from pyrit.identifiers import ComponentIdentifier
from pyrit.models import PromptDataType
from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter

logger = logging.getLogger(__name__)


class BidiConverter(PromptConverter):
"""
Wraps text in Unicode bidirectional control characters.

The converter surrounds the prompt with a matched pair of bidirectional formatting code points
so that the logical (stored) code point order can differ from the order a human reader sees
rendered. This is the family of manipulation behind the "Trojan Source" findings
(CVE-2021-42574). The transformation is deterministic: no language model or randomness is
involved, so the same input and scheme always produce the same output.

Schemes (per the Unicode Bidirectional Algorithm, UAX #9):
- ``"override"``: RIGHT-TO-LEFT OVERRIDE (U+202E) ... POP DIRECTIONAL FORMATTING (U+202C).
- ``"embedding"``: RIGHT-TO-LEFT EMBEDDING (U+202B) ... POP DIRECTIONAL FORMATTING (U+202C).
- ``"isolate"``: RIGHT-TO-LEFT ISOLATE (U+2067) ... POP DIRECTIONAL ISOLATE (U+2069).

References:
- Boucher and Anderson, "Trojan Source: Invisible Vulnerabilities" (CVE-2021-42574),
https://trojansource.codes/
- Unicode Standard Annex #9, "Unicode Bidirectional Algorithm",
https://www.unicode.org/reports/tr9/
"""

SUPPORTED_INPUT_TYPES = ("text",)
SUPPORTED_OUTPUT_TYPES = ("text",)

# Scheme name mapped to its (opening, closing) control characters, built from code points to
# keep the source file pure ASCII
_SCHEMES: ClassVar[dict[str, tuple[str, str]]] = {
"override": (chr(0x202E), chr(0x202C)),
"embedding": (chr(0x202B), chr(0x202C)),
"isolate": (chr(0x2067), chr(0x2069)),
}

def __init__(self, *, scheme: Literal["override", "embedding", "isolate"] = "override") -> None:
"""
Initialize the converter with the bidirectional control scheme.

Args:
scheme (Literal["override", "embedding", "isolate"]): The bidirectional control scheme
used to wrap the prompt. Defaults to ``"override"``.

Raises:
ValueError: If ``scheme`` is not recognized.
"""
super().__init__()

if scheme not in self._SCHEMES:
raise ValueError(f"Scheme '{scheme}' not recognized. Choose from {list(self._SCHEMES)}.")

self._scheme = scheme

def _build_identifier(self) -> ComponentIdentifier:
"""
Build the converter identifier with the bidi scheme parameter.

Returns:
ComponentIdentifier: The identifier for this converter.
"""
return self._create_identifier(params={"scheme": self._scheme})

async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult:
"""
Convert the given prompt by wrapping it in bidirectional control characters.

Args:
prompt (str): The prompt to be converted.
input_type (PromptDataType): The type of input data.

Returns:
ConverterResult: The result containing the wrapped text, or an empty string if the
prompt is empty.

Raises:
ValueError: If the input type is not supported.
"""
if not self.input_supported(input_type):
raise ValueError("Input type not supported")

if not prompt:
return ConverterResult(output_text="", output_type="text")

prefix, suffix = self._SCHEMES[self._scheme]
return ConverterResult(output_text=f"{prefix}{prompt}{suffix}", output_type="text")
59 changes: 59 additions & 0 deletions tests/unit/prompt_converter/test_bidi_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import pytest

from pyrit.prompt_converter import BidiConverter, ConverterResult

# Bidirectional control characters, built from code points to keep this file pure ASCII
RLO = chr(0x202E) # Right-to-left override
RLE = chr(0x202B) # Right-to-left embedding
PDF = chr(0x202C) # Pop directional formatting
RLI = chr(0x2067) # Right-to-left isolate
PDI = chr(0x2069) # Pop directional isolate


def test_input_supported():
converter = BidiConverter()
assert converter.input_supported("text") is True
assert converter.input_supported("image") is False


async def test_default_scheme_wraps_in_rlo_override():
result = await BidiConverter().convert_async(prompt="abc", input_type="text")
assert isinstance(result, ConverterResult)
assert result.output_type == "text"
assert result.output_text == f"{RLO}abc{PDF}"


async def test_embedding_scheme():
result = await BidiConverter(scheme="embedding").convert_async(prompt="abc")
assert result.output_text == f"{RLE}abc{PDF}"


async def test_isolate_scheme():
result = await BidiConverter(scheme="isolate").convert_async(prompt="abc")
assert result.output_text == f"{RLI}abc{PDI}"


async def test_empty_prompt_returns_empty():
result = await BidiConverter().convert_async(prompt="")
assert result.output_text == ""


@pytest.mark.parametrize("scheme", ["override", "embedding", "isolate"])
async def test_conversion_is_deterministic(scheme):
converter = BidiConverter(scheme=scheme)
first = await converter.convert_async(prompt="some prompt")
second = await converter.convert_async(prompt="some prompt")
assert first.output_text == second.output_text


def test_invalid_scheme_raises():
with pytest.raises(ValueError):
BidiConverter(scheme="nonsense")


async def test_input_type_not_supported_raises():
with pytest.raises(ValueError):
await BidiConverter().convert_async(prompt="abc", input_type="image")
Loading