From e82365dd0bda1f1ef55317b85457ba8140effdf6 Mon Sep 17 00:00:00 2001 From: biefan <70761325+biefan@users.noreply.github.com> Date: Mon, 16 Mar 2026 20:45:19 +0000 Subject: [PATCH 1/2] support relative blob paths in AzureBlobStorageIO --- pyrit/models/storage_io.py | 25 +++++++++++++++---- tests/unit/models/test_storage_io.py | 36 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/pyrit/models/storage_io.py b/pyrit/models/storage_io.py index 3555a3648b..cf8af6edb3 100644 --- a/pyrit/models/storage_io.py +++ b/pyrit/models/storage_io.py @@ -254,6 +254,23 @@ def parse_blob_url(self, file_path: str) -> tuple[str, str]: return container_name, blob_name raise ValueError("Invalid blob URL") + def _resolve_blob_name(self, path: Union[Path, str]) -> str: + """ + Resolve a blob name from either a full blob URL or a relative blob path. + + Args: + path (Union[Path, str]): Blob URL or relative blob path. + + Returns: + str: The resolved blob name. + """ + path_str = str(path) + parsed_url = urlparse(path_str) + if parsed_url.scheme and parsed_url.netloc: + _, blob_name = self.parse_blob_url(path_str) + return blob_name + return path_str + async def read_file(self, path: Union[Path, str]) -> bytes: """ Asynchronously reads the content of a file (blob) from Azure Blob Storage. @@ -284,7 +301,7 @@ async def read_file(self, path: Union[Path, str]) -> bytes: if not self._client_async: await self._create_container_client_async() - _, blob_name = self.parse_blob_url(str(path)) + blob_name = self._resolve_blob_name(path) try: blob_client = self._client_async.get_blob_client(blob=blob_name) @@ -311,7 +328,7 @@ async def write_file(self, path: Union[Path, str], data: bytes) -> None: """ if not self._client_async: await self._create_container_client_async() - _, blob_name = self.parse_blob_url(str(path)) + blob_name = self._resolve_blob_name(path) try: await self._upload_blob_async(file_name=blob_name, data=data, content_type=self._blob_content_type) except Exception as exc: @@ -335,7 +352,7 @@ async def path_exists(self, path: Union[Path, str]) -> bool: if not self._client_async: await self._create_container_client_async() try: - _, blob_name = self.parse_blob_url(str(path)) + blob_name = self._resolve_blob_name(path) blob_client = self._client_async.get_blob_client(blob=blob_name) await blob_client.get_blob_properties() return True @@ -359,7 +376,7 @@ async def is_file(self, path: Union[Path, str]) -> bool: if not self._client_async: await self._create_container_client_async() try: - _, blob_name = self.parse_blob_url(str(path)) + blob_name = self._resolve_blob_name(path) blob_client = self._client_async.get_blob_client(blob=blob_name) blob_properties = await blob_client.get_blob_properties() return blob_properties.size > 0 diff --git a/tests/unit/models/test_storage_io.py b/tests/unit/models/test_storage_io.py index 67e3ece09a..1aafa3c263 100644 --- a/tests/unit/models/test_storage_io.py +++ b/tests/unit/models/test_storage_io.py @@ -101,6 +101,25 @@ async def test_azure_blob_storage_io_read_file(azure_blob_storage_io): assert result == b"Test file content" +@pytest.mark.asyncio +async def test_azure_blob_storage_io_read_file_with_relative_path(azure_blob_storage_io): + mock_container_client = AsyncMock() + azure_blob_storage_io._client_async = mock_container_client + + mock_blob_client = AsyncMock() + mock_blob_stream = AsyncMock() + + mock_container_client.get_blob_client = Mock(return_value=mock_blob_client) + mock_blob_client.download_blob = AsyncMock(return_value=mock_blob_stream) + mock_blob_stream.readall = AsyncMock(return_value=b"Test file content") + mock_container_client.close = AsyncMock() + + result = await azure_blob_storage_io.read_file("dir1/dir2/sample.png") + + assert result == b"Test file content" + mock_container_client.get_blob_client.assert_called_once_with(blob="dir1/dir2/sample.png") + + @pytest.mark.asyncio async def test_azure_blob_storage_io_write_file(): container_url = "https://youraccount.blob.core.windows.net/yourcontainer" @@ -143,6 +162,23 @@ async def test_azure_storage_io_path_exists(azure_blob_storage_io): assert exists is True +@pytest.mark.asyncio +async def test_azure_storage_io_path_exists_with_relative_path(azure_blob_storage_io): + mock_container_client = AsyncMock() + azure_blob_storage_io._client_async = mock_container_client + + mock_blob_client = AsyncMock() + + mock_container_client.get_blob_client = Mock(return_value=mock_blob_client) + mock_blob_client.get_blob_properties = AsyncMock() + mock_container_client.close = AsyncMock() + + exists = await azure_blob_storage_io.path_exists("dir1/dir2/blob_name.txt") + + assert exists is True + mock_container_client.get_blob_client.assert_called_once_with(blob="dir1/dir2/blob_name.txt") + + @pytest.mark.asyncio async def test_azure_storage_io_is_file(azure_blob_storage_io): azure_blob_storage_io._client_async = AsyncMock() From ed26e171cbf48d1cb9a0d90431434d615b39dac2 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Sat, 11 Apr 2026 17:41:06 -0700 Subject: [PATCH 2/2] Improve _resolve_blob_name: eliminate double urlparse, fix Windows paths, expand tests - Replace double urlparse with try/except around parse_blob_url - Normalize backslashes to forward slashes for Windows Path safety - Update write_file() docstring to document relative path support - Add comment explaining intentional container name discard - Add missing tests for write_file and is_file with relative paths - Add edge case tests for _resolve_blob_name (backslashes, Path objects, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/models/storage_io.py | 22 +++++++--- tests/unit/models/test_storage_io.py | 65 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/pyrit/models/storage_io.py b/pyrit/models/storage_io.py index cf8af6edb3..a1bc5f13af 100644 --- a/pyrit/models/storage_io.py +++ b/pyrit/models/storage_io.py @@ -258,18 +258,27 @@ def _resolve_blob_name(self, path: Union[Path, str]) -> str: """ Resolve a blob name from either a full blob URL or a relative blob path. + When a full URL is provided the blob name is extracted from it. The container + name embedded in the URL is intentionally discarded — operations always run + against the container configured in the constructor. + + Backslashes are normalized to forward slashes so that ``Path`` objects + created on Windows still produce valid blob names. + Args: path (Union[Path, str]): Blob URL or relative blob path. Returns: str: The resolved blob name. + """ - path_str = str(path) - parsed_url = urlparse(path_str) - if parsed_url.scheme and parsed_url.netloc: + path_str = str(path).replace("\\", "/") + try: + # parse_blob_url validates scheme + netloc internally _, blob_name = self.parse_blob_url(path_str) return blob_name - return path_str + except ValueError: + return path_str async def read_file(self, path: Union[Path, str]) -> bytes: """ @@ -321,8 +330,11 @@ async def write_file(self, path: Union[Path, str], data: bytes) -> None: """ Write data to Azure Blob Storage at the specified path. + If the provided ``path`` is a full URL, the blob name is extracted from it. + If a relative path is provided, it is used as the blob name directly. + Args: - path (str): The full Azure Blob Storage URL + path (Union[Path, str]): Full blob URL or relative blob path. data (bytes): The data to write. """ diff --git a/tests/unit/models/test_storage_io.py b/tests/unit/models/test_storage_io.py index 1aafa3c263..6b36f30ab7 100644 --- a/tests/unit/models/test_storage_io.py +++ b/tests/unit/models/test_storage_io.py @@ -148,6 +148,29 @@ async def test_azure_blob_storage_io_write_file(): ) +@pytest.mark.asyncio +async def test_azure_blob_storage_io_write_file_with_relative_path(): + container_url = "https://youraccount.blob.core.windows.net/yourcontainer" + azure_blob_storage_io = AzureBlobStorageIO( + container_url=container_url, blob_content_type=SupportedContentType.PLAIN_TEXT + ) + + mock_container_client = AsyncMock() + + with patch.object(azure_blob_storage_io, "_create_container_client_async", return_value=None): + azure_blob_storage_io._client_async = mock_container_client + azure_blob_storage_io._upload_blob_async = AsyncMock() + + data_to_write = b"Test data" + await azure_blob_storage_io.write_file("dir1/dir2/testfile.txt", data_to_write) + + azure_blob_storage_io._upload_blob_async.assert_awaited_with( + file_name="dir1/dir2/testfile.txt", + data=data_to_write, + content_type=SupportedContentType.PLAIN_TEXT.value, + ) + + @pytest.mark.asyncio async def test_azure_storage_io_path_exists(azure_blob_storage_io): azure_blob_storage_io._client_async = AsyncMock() @@ -194,6 +217,24 @@ async def test_azure_storage_io_is_file(azure_blob_storage_io): assert is_file is True +@pytest.mark.asyncio +async def test_azure_storage_io_is_file_with_relative_path(azure_blob_storage_io): + mock_container_client = AsyncMock() + azure_blob_storage_io._client_async = mock_container_client + + mock_blob_client = AsyncMock() + + mock_container_client.get_blob_client = Mock(return_value=mock_blob_client) + mock_blob_properties = Mock(size=1024) + mock_blob_client.get_blob_properties = AsyncMock(return_value=mock_blob_properties) + mock_container_client.close = AsyncMock() + + is_file = await azure_blob_storage_io.is_file("dir1/dir2/blob_name.txt") + + assert is_file is True + mock_container_client.get_blob_client.assert_called_once_with(blob="dir1/dir2/blob_name.txt") + + def test_azure_storage_io_parse_blob_url_valid(azure_blob_storage_io): file_path = "https://example.blob.core.windows.net/container/dir1/dir2/blob_name.txt" container_name, blob_name = azure_blob_storage_io.parse_blob_url(file_path) @@ -215,3 +256,27 @@ def test_azure_storage_io_parse_blob_url_without_scheme(azure_blob_storage_io): def test_azure_storage_io_parse_blob_url_without_netloc(azure_blob_storage_io): with pytest.raises(ValueError, match="Invalid blob URL"): azure_blob_storage_io.parse_blob_url("https:///container/dir1/blob_name.txt") + + +def test_resolve_blob_name_with_full_url(azure_blob_storage_io): + result = azure_blob_storage_io._resolve_blob_name("https://account.blob.core.windows.net/container/dir1/file.txt") + assert result == "dir1/file.txt" + + +def test_resolve_blob_name_with_relative_path(azure_blob_storage_io): + assert azure_blob_storage_io._resolve_blob_name("dir1/dir2/file.txt") == "dir1/dir2/file.txt" + + +def test_resolve_blob_name_with_simple_filename(azure_blob_storage_io): + assert azure_blob_storage_io._resolve_blob_name("file.txt") == "file.txt" + + +def test_resolve_blob_name_normalizes_backslashes(azure_blob_storage_io): + assert azure_blob_storage_io._resolve_blob_name("dir1\\dir2\\file.txt") == "dir1/dir2/file.txt" + + +def test_resolve_blob_name_with_path_object(azure_blob_storage_io): + from pathlib import PurePosixPath + + result = azure_blob_storage_io._resolve_blob_name(PurePosixPath("dir1/dir2/file.txt")) + assert result == "dir1/dir2/file.txt"