Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/apify/request_loaders/_apify_request_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ async def _process_remote_url(request_input: _RequestsFromUrlInput, http_client:
"""Fetch a remote URL and extract links from the response body."""
http_response = await http_client.send_request(method='GET', url=request_input.requests_from_url)
response_body = await http_response.read()
matches = re.finditer(URL_NO_COMMAS_REGEX, response_body.decode('utf-8'))
matches = re.finditer(URL_NO_COMMAS_REGEX, response_body.decode('utf-8', errors='replace'))

return [
Request.from_url(
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/actor/test_request_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,22 @@ async def test_request_list_open_from_url_additional_inputs(httpserver: HTTPServ
assert request.user_data == expected_user_data


async def test_request_list_open_from_url_non_utf8_body(httpserver: HTTPServer) -> None:
"""Test that a non-UTF-8 response body does not crash ApifyRequestList.open."""
expected_url = 'https://www.someurl.com'
# latin-1 encoded body containing non-ASCII bytes (0xE9 = 'é') that would raise
# UnicodeDecodeError under strict utf-8 decoding.
response_body = f'café {expected_url} naïve'.encode('latin-1')
httpserver.expect_oneshot_request('/file.txt').respond_with_data(status=200, response_data=response_body)

request_list = await ApifyRequestList.open(
request_list_sources_input=[{'requestsFromUrl': httpserver.url_for('/file.txt'), 'method': 'GET'}]
)
request = await request_list.fetch_next_request()
assert request is not None
assert request.url == expected_url


async def test_request_list_open_name() -> None:
name = 'some_name'
request_list = await ApifyRequestList.open(name=name)
Expand Down
Loading