From 133efe9ea5948d224882fdc3d9c59f47abc32cf3 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Tue, 16 Jan 2024 15:24:00 -0800 Subject: [PATCH 1/6] fixing bug with red_teaming_bot --- .gitignore | 3 +++ examples/demo/1_gandalf.ipynb | 10 ++++----- pyrit/agent/red_teaming_bot.py | 27 +++++++++++----------- tests/test_red_teaming_bot.py | 41 ++++++++++++++++++++++++++++------ 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 59da102671..5bcd760145 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# PYrit secrets file +.env \ No newline at end of file diff --git a/examples/demo/1_gandalf.ipynb b/examples/demo/1_gandalf.ipynb index 86ce602227..6918b2483e 100644 --- a/examples/demo/1_gandalf.ipynb +++ b/examples/demo/1_gandalf.ipynb @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2023-10-27T18:50:44.458559Z", @@ -164,7 +164,7 @@ "\n", " # Step 3. Score the response\n", " score = gandalf_password_scorer.score_text(text=gandalf_text_response)\n", - " \n", + "\n", " # Step 4. Ask if we should continue\n", " print(f\"Score: {score.score_value}\")\n", " if score.score_value:\n", @@ -206,9 +206,9 @@ ], "metadata": { "kernelspec": { - "display_name": "pyrit_kernel", + "display_name": ".venv", "language": "python", - "name": "pyrit_kernel" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -220,7 +220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/pyrit/agent/red_teaming_bot.py b/pyrit/agent/red_teaming_bot.py index 0d31aa4a3c..7cfb059eb0 100644 --- a/pyrit/agent/red_teaming_bot.py +++ b/pyrit/agent/red_teaming_bot.py @@ -62,15 +62,21 @@ def get_session_chat_messages(self) -> list[ChatMessage]: return self._conversation_memory.get_chat_messages_with_session_id(session_id=self.session_id) def complete_chat_user(self, message: str, labels: list[str] = []) -> str: - session_messages: list[ChatMessage] = self.get_session_chat_messages() - if not session_messages: + message_list: list[ChatMessage] = [] + if not self.get_session_chat_messages(): # If there are no messages, then this is the first message of the conversation - self._add_chat_message_to_memory(ChatMessage(role="system", content=self._system_prompt), labels) + message_list.append(ChatMessage(role="system", content=self._system_prompt)) - self._add_chat_message_to_memory(ChatMessage(role="user", content=message), labels) + message_list.append(ChatMessage(role="user", content=message)) - response_msg = self._chat_engine.complete_chat(messages=session_messages) - self._add_chat_message_to_memory(ChatMessage(role="assistant", content=response_msg), labels) + response_msg = self._chat_engine.complete_chat(messages=message_list) + message_list.append(ChatMessage(role="assistant", content=response_msg)) + + self._conversation_memory.add_chat_messages_to_memory( + conversations=message_list, + session=self.session_id, + labels=self._global_memory_labels + labels, + ) return response_msg @@ -90,11 +96,4 @@ def is_conversation_complete(self) -> bool: if RED_TEAM_CONVERSATION_END_TOKEN in current_messages[-1].content: # If the last message contains the conversation end token, then the conversation is complete return True - return False - - def _add_chat_message_to_memory(self, message: ChatMessage, labels: list[str]): - self._conversation_memory.add_chat_message_to_memory( - conversation=message, - session=self.session_id, - labels=self._global_memory_labels + labels, - ) + return False \ No newline at end of file diff --git a/tests/test_red_teaming_bot.py b/tests/test_red_teaming_bot.py index 1b713659fd..f79c952710 100644 --- a/tests/test_red_teaming_bot.py +++ b/tests/test_red_teaming_bot.py @@ -13,6 +13,7 @@ from pyrit.chat import AzureOpenAIChat from pyrit.models import PromptTemplate from pyrit.memory import FileMemory +from pyrit.common.path import HOME_PATH @pytest.fixture @@ -23,7 +24,9 @@ def openai_mock_return() -> ChatCompletion: choices=[ Choice( index=0, - message=ChatCompletionMessage(role="assistant", content="hi, I'm adversary chat."), + message=ChatCompletionMessage( + role="assistant", content="hi, I'm adversary chat." + ), finish_reason="stop", logprobs=None, ) @@ -41,7 +44,10 @@ def chat_completion_engine() -> AzureOpenAIChat: @pytest.fixture def red_teaming_bot(chat_completion_engine: AzureOpenAIChat, tmp_path: pathlib.Path): attack_strategy = PromptTemplate.from_yaml_file( - pathlib.Path(os.getcwd()) / "datasets" / "attack_strategies" / "red_team_chatbot_with_objective.yaml" + pathlib.Path(HOME_PATH) + / "datasets" + / "attack_strategies" + / "red_team_chatbot_with_objective.yaml" ) file_memory = FileMemory(filepath=tmp_path / "test.json.memory") @@ -56,7 +62,9 @@ def red_teaming_bot(chat_completion_engine: AzureOpenAIChat, tmp_path: pathlib.P def test_complete_chat_user(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" + mock.return_value = ( + "Hello, this is a message sent by the assistant. How can i help you?" + ) red_teaming_bot.complete_chat_user("hi, I am a victim chatbot, how can I help?") chats = red_teaming_bot.get_session_chat_messages() assert len(chats) == 3, f"Expected 3 chats, got {len(chats)}" @@ -71,9 +79,24 @@ def test_complete_chat_user(red_teaming_bot: RedTeamingBot): assert "Instructions" in chats[0].content +def test_complete_chat_user_calls_complete_chat(red_teaming_bot: RedTeamingBot): + with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: + mock.return_value = ( + "Hello, this is a message sent by the assistant. How can i help you?" + ) + red_teaming_bot.complete_chat_user("new chat") + + args, kwargs = mock.call_args + assert kwargs["messages"] is not None + assert kwargs["messages"][0].role == "system" + assert kwargs["messages"][1].content == "new chat" + + def test_is_conversation_complete_false(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" + mock.return_value = ( + "Hello, this is a message sent by the assistant. How can i help you?" + ) red_teaming_bot.complete_chat_user("hi, I am a victim chatbot, how can I help?") red_teaming_bot.complete_chat_user("hi, I am a victim chatbot, how can I help?") @@ -84,8 +107,12 @@ def test_is_conversation_complete_false(red_teaming_bot: RedTeamingBot): def test_is_conversation_complete_emptyhistory_false(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" - assert red_teaming_bot.is_conversation_complete() is False, "Conversation is empty, objective not realized" + mock.return_value = ( + "Hello, this is a message sent by the assistant. How can i help you?" + ) + assert ( + red_teaming_bot.is_conversation_complete() is False + ), "Conversation is empty, objective not realized" def test_is_conversation_complete_true(red_teaming_bot: RedTeamingBot): @@ -94,4 +121,4 @@ def test_is_conversation_complete_true(red_teaming_bot: RedTeamingBot): red_teaming_bot.complete_chat_user(message="bad stuff is done") assert ( red_teaming_bot.is_conversation_complete() is True - ), "Conversation should be complete, objective is realized" + ), "Conversation should be complete, objective is realized" \ No newline at end of file From 171bcda6c6117d09ecea9cac4fe197581ed9bee9 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Tue, 16 Jan 2024 15:29:32 -0800 Subject: [PATCH 2/6] reverting galndalf --- examples/demo/1_gandalf.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/demo/1_gandalf.ipynb b/examples/demo/1_gandalf.ipynb index 6918b2483e..86ce602227 100644 --- a/examples/demo/1_gandalf.ipynb +++ b/examples/demo/1_gandalf.ipynb @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-10-27T18:50:44.458559Z", @@ -164,7 +164,7 @@ "\n", " # Step 3. Score the response\n", " score = gandalf_password_scorer.score_text(text=gandalf_text_response)\n", - "\n", + " \n", " # Step 4. Ask if we should continue\n", " print(f\"Score: {score.score_value}\")\n", " if score.score_value:\n", @@ -206,9 +206,9 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "pyrit_kernel", "language": "python", - "name": "python3" + "name": "pyrit_kernel" }, "language_info": { "codemirror_mode": { @@ -220,7 +220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.12" } }, "nbformat": 4, From 85547be37d9ef70c959f57cc3219ae44d85bd0c2 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Tue, 16 Jan 2024 15:47:05 -0800 Subject: [PATCH 3/6] precheck changes --- .gitignore | 2 +- pyrit/agent/red_teaming_bot.py | 2 +- tests/test_red_teaming_bot.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 5bcd760145..d4dfaad6b5 100644 --- a/.gitignore +++ b/.gitignore @@ -165,4 +165,4 @@ cython_debug/ #.idea/ # PYrit secrets file -.env \ No newline at end of file +.env diff --git a/pyrit/agent/red_teaming_bot.py b/pyrit/agent/red_teaming_bot.py index 7cfb059eb0..9588d7a8b8 100644 --- a/pyrit/agent/red_teaming_bot.py +++ b/pyrit/agent/red_teaming_bot.py @@ -96,4 +96,4 @@ def is_conversation_complete(self) -> bool: if RED_TEAM_CONVERSATION_END_TOKEN in current_messages[-1].content: # If the last message contains the conversation end token, then the conversation is complete return True - return False \ No newline at end of file + return False diff --git a/tests/test_red_teaming_bot.py b/tests/test_red_teaming_bot.py index f79c952710..d534de4f26 100644 --- a/tests/test_red_teaming_bot.py +++ b/tests/test_red_teaming_bot.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import os import pathlib from unittest.mock import patch From abc114d2458bd3ee3d6981eb580b7f70ef65a73c Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Tue, 16 Jan 2024 15:51:46 -0800 Subject: [PATCH 4/6] changing build scrip precheck --- .github/workflows/build_and_test.yml | 2 +- tests/test_red_teaming_bot.py | 31 +++++++--------------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 98c839acf9..a6fd0ad970 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -55,4 +55,4 @@ jobs: output: both thresholds: '60 80' - name: "Run pre-commit hooks" - run: git ls-files -- . | xargs poetry run pre-commit run --files + run: poetry run pre-commit run --all-files \ No newline at end of file diff --git a/tests/test_red_teaming_bot.py b/tests/test_red_teaming_bot.py index d534de4f26..35c0cc277b 100644 --- a/tests/test_red_teaming_bot.py +++ b/tests/test_red_teaming_bot.py @@ -23,9 +23,7 @@ def openai_mock_return() -> ChatCompletion: choices=[ Choice( index=0, - message=ChatCompletionMessage( - role="assistant", content="hi, I'm adversary chat." - ), + message=ChatCompletionMessage(role="assistant", content="hi, I'm adversary chat."), finish_reason="stop", logprobs=None, ) @@ -43,10 +41,7 @@ def chat_completion_engine() -> AzureOpenAIChat: @pytest.fixture def red_teaming_bot(chat_completion_engine: AzureOpenAIChat, tmp_path: pathlib.Path): attack_strategy = PromptTemplate.from_yaml_file( - pathlib.Path(HOME_PATH) - / "datasets" - / "attack_strategies" - / "red_team_chatbot_with_objective.yaml" + pathlib.Path(HOME_PATH) / "datasets" / "attack_strategies" / "red_team_chatbot_with_objective.yaml" ) file_memory = FileMemory(filepath=tmp_path / "test.json.memory") @@ -61,9 +56,7 @@ def red_teaming_bot(chat_completion_engine: AzureOpenAIChat, tmp_path: pathlib.P def test_complete_chat_user(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = ( - "Hello, this is a message sent by the assistant. How can i help you?" - ) + mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" red_teaming_bot.complete_chat_user("hi, I am a victim chatbot, how can I help?") chats = red_teaming_bot.get_session_chat_messages() assert len(chats) == 3, f"Expected 3 chats, got {len(chats)}" @@ -80,9 +73,7 @@ def test_complete_chat_user(red_teaming_bot: RedTeamingBot): def test_complete_chat_user_calls_complete_chat(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = ( - "Hello, this is a message sent by the assistant. How can i help you?" - ) + mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" red_teaming_bot.complete_chat_user("new chat") args, kwargs = mock.call_args @@ -93,9 +84,7 @@ def test_complete_chat_user_calls_complete_chat(red_teaming_bot: RedTeamingBot): def test_is_conversation_complete_false(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = ( - "Hello, this is a message sent by the assistant. How can i help you?" - ) + mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" red_teaming_bot.complete_chat_user("hi, I am a victim chatbot, how can I help?") red_teaming_bot.complete_chat_user("hi, I am a victim chatbot, how can I help?") @@ -106,12 +95,8 @@ def test_is_conversation_complete_false(red_teaming_bot: RedTeamingBot): def test_is_conversation_complete_emptyhistory_false(red_teaming_bot: RedTeamingBot): with patch.object(red_teaming_bot._chat_engine, "complete_chat") as mock: - mock.return_value = ( - "Hello, this is a message sent by the assistant. How can i help you?" - ) - assert ( - red_teaming_bot.is_conversation_complete() is False - ), "Conversation is empty, objective not realized" + mock.return_value = "Hello, this is a message sent by the assistant. How can i help you?" + assert red_teaming_bot.is_conversation_complete() is False, "Conversation is empty, objective not realized" def test_is_conversation_complete_true(red_teaming_bot: RedTeamingBot): @@ -120,4 +105,4 @@ def test_is_conversation_complete_true(red_teaming_bot: RedTeamingBot): red_teaming_bot.complete_chat_user(message="bad stuff is done") assert ( red_teaming_bot.is_conversation_complete() is True - ), "Conversation should be complete, objective is realized" \ No newline at end of file + ), "Conversation should be complete, objective is realized" From b58bf4aeba768f268165658cbe1b5c6713cca064 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Tue, 16 Jan 2024 15:57:26 -0800 Subject: [PATCH 5/6] one more whitespace fix --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a6fd0ad970..0c6df98d7c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -55,4 +55,4 @@ jobs: output: both thresholds: '60 80' - name: "Run pre-commit hooks" - run: poetry run pre-commit run --all-files \ No newline at end of file + run: poetry run pre-commit run --all-files From e5c22f97c2c251f6c48d57a20ee4f9fe2d87bfb5 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Wed, 17 Jan 2024 08:51:52 -0800 Subject: [PATCH 6/6] pr feedback --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d4dfaad6b5..2a11dc1ba1 100644 --- a/.gitignore +++ b/.gitignore @@ -164,5 +164,5 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -# PYrit secrets file +# PyRIT secrets file .env