From 56374c01643f84c419ac178456c015abb29625e9 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 15 Jun 2026 14:15:44 +0200 Subject: [PATCH 1/2] removal of datadog tracer --- docs-website/docs/development/tracing.mdx | 32 +----- haystack/tracing/datadog.py | 97 ---------------- haystack/tracing/tracer.py | 17 +-- pyproject.toml | 1 - ...emove-datadog-tracer-4f1c8a2b9d6e0a73.yaml | 36 ++++++ test/tracing/test_datadog.py | 107 ------------------ test/tracing/test_tracer.py | 19 ---- 7 files changed, 43 insertions(+), 266 deletions(-) delete mode 100644 haystack/tracing/datadog.py create mode 100644 releasenotes/notes/remove-datadog-tracer-4f1c8a2b9d6e0a73.yaml delete mode 100644 test/tracing/test_datadog.py diff --git a/docs-website/docs/development/tracing.mdx b/docs-website/docs/development/tracing.mdx index 589c638766..1233c2bfa5 100644 --- a/docs-website/docs/development/tracing.mdx +++ b/docs-website/docs/development/tracing.mdx @@ -15,7 +15,7 @@ Traces document the flow of requests through your application and are vital for ## Configuring a Tracing Backend -Instrumented applications typically send traces to a trace collector or a tracing backend. Haystack provides out-of-the-box support for [OpenTelemetry](https://opentelemetry.io/) and [Datadog](https://app.datadoghq.eu/dashboard/lists). You can also quickly implement support for additional providers of your choosing. +Instrumented applications typically send traces to a trace collector or a tracing backend. Haystack provides out-of-the-box support for [OpenTelemetry](https://opentelemetry.io/) and, through integrations, for backends such as [Datadog](https://www.datadoghq.com/), [Langfuse](https://langfuse.com/), and [MLflow](https://mlflow.org/). You can also quickly implement support for additional providers of your choosing. ### OpenTelemetry @@ -88,32 +88,13 @@ To use OpenTelemetry as your tracing backend, follow these steps: ### Datadog -To use Datadog as your tracing backend, follow these steps: +The `DatadogConnector` component lets you trace your Haystack pipelines with [Datadog](https://www.datadoghq.com/). -1. Install [Datadog’s tracing library ddtrace](https://ddtrace.readthedocs.io/en/stable/#). +Simply install the integration with `pip install datadog-haystack`, then add the connector to your pipeline. - ```shell - pip install ddtrace - ``` -2. There are two options for how to hook Haystack to ddtrace. - - - Run your Haystack application using the `ddtrace`: - ```shell - ddtrace -# -# SPDX-License-Identifier: Apache-2.0 - -import contextlib -from collections.abc import Iterator -from typing import Any - -from haystack.lazy_imports import LazyImport -from haystack.tracing import Span, Tracer -from haystack.tracing import utils as tracing_utils - -with LazyImport("Run 'pip install ddtrace'") as ddtrace_import: - import ddtrace - from ddtrace.trace import Span as ddSpan - from ddtrace.trace import Tracer as ddTracer - -_COMPONENT_NAME_KEY = "haystack.component.name" -_COMPONENT_TYPE_KEY = "haystack.component.type" -_COMPONENT_RUN_OPERATION_NAME = "haystack.component.run" - - -class DatadogSpan(Span): - def __init__(self, span: "ddSpan") -> None: - """Creates an instance of DatadogSpan.""" - self._span = span - - def set_tag(self, key: str, value: Any) -> None: - """ - Set a single tag on the span. - - :param key: the name of the tag. - :param value: the value of the tag. - """ - coerced_value = tracing_utils.coerce_tag_value(value) - # Although set_tag declares value: Optional[str], its implementation accepts other types. - # https://github.com/DataDog/dd-trace-py/blob/200b33c5221db1af975f6f7017738cd99a2da4a4/ddtrace/_trace/span.py - self._span.set_tag(key, coerced_value) # type: ignore[arg-type] - - def raw_span(self) -> Any: - """ - Provides access to the underlying span object of the tracer. - - :return: The underlying span object. - """ - return self._span - - def get_correlation_data_for_logs(self) -> dict[str, Any]: - """Return a dictionary with correlation data for logs.""" - - # https://docs.datadoghq.com/tracing/other_telemetry/connect_logs_and_traces/python/#no-standard-library-logging - return ddtrace.tracer.get_log_correlation_context() - - -class DatadogTracer(Tracer): - def __init__(self, tracer: "ddTracer") -> None: - """Creates an instance of DatadogTracer.""" - ddtrace_import.check() - self._tracer = tracer - - @staticmethod - def _get_span_resource_name(operation_name: str, tags: dict[str, Any] | None) -> str | None: - """ - Get the resource name for the Datadog span. - """ - if operation_name == _COMPONENT_RUN_OPERATION_NAME and tags: - component_type = tags.get(_COMPONENT_TYPE_KEY, "") - component_name = tags.get(_COMPONENT_NAME_KEY, "") - - return f"{component_type}: {component_name}" - - return None - - @contextlib.contextmanager - def trace( - self, - operation_name: str, - tags: dict[str, Any] | None = None, - parent_span: Span | None = None, # noqa: ARG002 - ) -> Iterator[Span]: - """Activate and return a new span that inherits from the current active span.""" - resource_name = self._get_span_resource_name(operation_name, tags) - - with self._tracer.trace(name=operation_name, resource=resource_name) as span: - custom_span = DatadogSpan(span) - if tags: - custom_span.set_tags(tags) - - yield custom_span - - def current_span(self) -> Span | None: - """Return the current active span""" - current_span = self._tracer.current_span() - if current_span is None: - return None - - return DatadogSpan(current_span) diff --git a/haystack/tracing/tracer.py b/haystack/tracing/tracer.py index f93e1c0969..c170e044be 100644 --- a/haystack/tracing/tracer.py +++ b/haystack/tracing/tracer.py @@ -198,7 +198,7 @@ def auto_enable_tracing() -> None: if is_tracing_enabled(): return # tracing already enabled - tracer = _auto_configured_opentelemetry_tracer() or _auto_configured_datadog_tracer() + tracer = _auto_configured_opentelemetry_tracer() if tracer: enable_tracing(tracer) logger.info("Auto-enabled tracing for '{tracer}'", tracer=tracer.__class__.__name__) @@ -226,19 +226,4 @@ def _auto_configured_opentelemetry_tracer() -> Tracer | None: return None -def _auto_configured_datadog_tracer() -> Tracer | None: - # we implement this here and not in the `datadog` module to avoid import warnings when Datadog is not installed - try: - from ddtrace.trace import tracer - - from haystack.tracing.datadog import DatadogTracer - - if tracer.enabled: - return DatadogTracer(tracer=tracer) - except ImportError: - pass - - return None - - auto_enable_tracing() diff --git a/pyproject.toml b/pyproject.toml index 6b0eba89f6..eb7fd93419 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,7 +129,6 @@ dependencies = [ # Tracing "opentelemetry-sdk", - "ddtrace", # Structured logging "structlog", diff --git a/releasenotes/notes/remove-datadog-tracer-4f1c8a2b9d6e0a73.yaml b/releasenotes/notes/remove-datadog-tracer-4f1c8a2b9d6e0a73.yaml new file mode 100644 index 0000000000..6256a73ebd --- /dev/null +++ b/releasenotes/notes/remove-datadog-tracer-4f1c8a2b9d6e0a73.yaml @@ -0,0 +1,36 @@ +--- +upgrade: + - | + The ``DatadogTracer`` has been moved out of Haystack into the ``datadog-haystack`` integration package. + It is no longer auto-enabled when ``ddtrace`` is installed; instead, use the new ``DatadogConnector`` component + to enable Datadog tracing in your pipeline. Install the new package with ``pip install datadog-haystack``. + + Before: + + .. code:: python + + import ddtrace + from haystack import tracing + from haystack.tracing.datadog import DatadogTracer + + tracing.enable_tracing(DatadogTracer(ddtrace.tracer)) + + After: + + .. code:: python + + from haystack import Pipeline + from haystack_integrations.components.connectors.datadog import DatadogConnector + + pipe = Pipeline() + pipe.add_component("tracer", DatadogConnector()) + + Alternatively, you can still enable the tracer manually: + + .. code:: python + + import ddtrace + from haystack import tracing + from haystack_integrations.tracing.datadog import DatadogTracer + + tracing.enable_tracing(DatadogTracer(ddtrace.tracer)) diff --git a/test/tracing/test_datadog.py b/test/tracing/test_datadog.py deleted file mode 100644 index ec8c65575c..0000000000 --- a/test/tracing/test_datadog.py +++ /dev/null @@ -1,107 +0,0 @@ -# SPDX-FileCopyrightText: 2022-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -import functools -import json - -import pytest -from _pytest.capture import CaptureFixture -from _pytest.monkeypatch import MonkeyPatch -from ddtrace.trace import Span as ddSpan -from ddtrace.trace import Tracer as ddTracer - -from haystack.tracing.datadog import DatadogTracer - - -@pytest.fixture() -def datadog_tracer(monkeypatch: MonkeyPatch) -> ddTracer: - # For the purpose of the tests we want to use the log writer. - # We simulate being in AWS Lambda, where the log writer is active. - # See https://github.com/DataDog/dd-trace-py/blob/ae4c189ebf8e539f39905f21c7918cc19de69d13/ddtrace/internal/writer/writer.py#L680 - # for more details. - monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "test-function") - - return ddTracer() - - -def get_traces_from_console(capfd: CaptureFixture) -> list[dict]: - output = capfd.readouterr().out - parsed = json.loads(output) - nested_traces = parsed["traces"] - return list(functools.reduce(lambda x, y: x + y, nested_traces, [])) - - -class TestDatadogTracer: - def test_opentelemetry_tracer(self, datadog_tracer: ddTracer, capfd: CaptureFixture) -> None: - tracer = DatadogTracer(datadog_tracer) - - component_tags = { - "haystack.component.name": "test_component", - "haystack.component.type": "TestType", - "haystack.component.input": {"input_key": "input_value"}, - "haystack.component.output": {"output_key": "output_value"}, - } - - with tracer.trace("haystack.component.run", tags=component_tags) as span: - span.set_tag("key", "value") - - traces = get_traces_from_console(capfd) - assert len(traces) == 1 - - trace = traces[0] - - assert trace["name"] == "haystack.component.run" - assert "test_component" in trace["resource"] - assert "TestType" in trace["resource"] - - def test_tagging(self, datadog_tracer: ddTracer, capfd: CaptureFixture) -> None: - tracer = DatadogTracer(datadog_tracer) - - with tracer.trace("test", tags={"key1": "value1"}) as span: - span.set_tag("key2", "value2") - - spans = get_traces_from_console(capfd) - assert len(spans) == 1 - assert spans[0]["meta"]["key1"] == "value1" - assert spans[0]["meta"]["key2"] == "value2" - - def test_current_span(self, datadog_tracer: ddTracer, capfd: CaptureFixture) -> None: - tracer = DatadogTracer(datadog_tracer) - - with tracer.trace("test"): - current_span = tracer.current_span() - assert current_span is not None - current_span.set_tag("key1", "value1") - - raw_span = current_span.raw_span() - assert raw_span is not None - assert isinstance(raw_span, ddSpan) - - raw_span.set_tag("key2", "value2") - - spans = get_traces_from_console(capfd) - assert len(spans) == 1 - assert spans[0]["meta"]["key1"] == "value1" - assert spans[0]["meta"]["key2"] == "value2" - - def test_tracing_complex_values(self, datadog_tracer: ddTracer, capfd: CaptureFixture) -> None: - tracer = DatadogTracer(datadog_tracer) - - with tracer.trace("test") as span: - span.set_tag("key", {"a": 1, "b": [2, 3, 4]}) - - spans = get_traces_from_console(capfd) - assert len(spans) == 1 - assert spans[0]["meta"]["key"] == '{"a": 1, "b": [2, 3, 4]}' - - def test_get_log_correlation_info(self, datadog_tracer: ddTracer) -> None: - tracer = DatadogTracer(datadog_tracer) - with tracer.trace("test") as span: - span.set_tag("key", "value") - - correlation_data = span.get_correlation_data_for_logs() - - for field in ["dd.trace_id", "dd.span_id", "dd.service", "dd.env", "dd.version"]: - assert field in correlation_data - assert isinstance(correlation_data[field], str) diff --git a/test/tracing/test_tracer.py b/test/tracing/test_tracer.py index 94a1060cf6..3199a12dc1 100644 --- a/test/tracing/test_tracer.py +++ b/test/tracing/test_tracer.py @@ -6,7 +6,6 @@ from collections.abc import Generator from unittest.mock import Mock -import ddtrace import opentelemetry.trace import pytest from _pytest.monkeypatch import MonkeyPatch @@ -15,7 +14,6 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter -from haystack.tracing.datadog import DatadogTracer from haystack.tracing.opentelemetry import OpenTelemetryTracer from haystack.tracing.tracer import ( HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR, @@ -23,7 +21,6 @@ NullTracer, ProxyTracer, Tracer, - _auto_configured_datadog_tracer, _auto_configured_opentelemetry_tracer, auto_enable_tracing, disable_tracing, @@ -130,13 +127,6 @@ def test_enable_opentelemetry_tracer(self, configured_opentelemetry_tracing: Non assert isinstance(activated_tracer, OpenTelemetryTracer) assert is_tracing_enabled() - def test_add_datadog_tracer(self) -> None: - auto_enable_tracing() - - activated_tracer = tracer.actual_tracer - assert isinstance(activated_tracer, DatadogTracer) - assert is_tracing_enabled() - def test__auto_configured_opentelemetry_tracer(self, configured_opentelemetry_tracing): tracer = _auto_configured_opentelemetry_tracer() assert isinstance(tracer, OpenTelemetryTracer) @@ -146,15 +136,6 @@ def test__auto_configured_opentelemetry_tracer_with_failing_import(self, monkeyp tracer = _auto_configured_opentelemetry_tracer() assert tracer is None - def test__auto_configured_datadog_tracer(self): - tracer = _auto_configured_datadog_tracer() - assert isinstance(tracer, DatadogTracer) - - def test__auto_configured_datadog_tracer_with_failing_import(self, monkeypatch): - monkeypatch.setattr(ddtrace.tracer, "enabled", False) - tracer = _auto_configured_datadog_tracer() - assert tracer is None - class TestTracingContent: def test_set_content_tag_with_enabled_content_tracing(self, spying_tracer: SpyingTracer) -> None: From 71575410d990bbce601bba470f0682172c4e320c Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 15 Jun 2026 14:24:50 +0200 Subject: [PATCH 2/2] update migration md --- MIGRATION.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/MIGRATION.md b/MIGRATION.md index 689d9fad4c..6069f68304 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -99,6 +99,54 @@ pip install | `from haystack.components.routers import TransformersTextRouter` | `transformers-haystack` | `from haystack_integrations.components.routers.transformers import TransformersTextRouter` | | `from haystack.components.routers import TransformersZeroShotTextRouter` | `transformers-haystack` | `from haystack_integrations.components.routers.transformers import TransformersZeroShotTextRouter` | | `from haystack.components.websearch import SerperDevWebSearch` | `serperdev-haystack` | `from haystack_integrations.components.websearch.serperdev import SerperDevWebSearch` | +| `from haystack.tracing.datadog import DatadogTracer` | `datadog-haystack` | `from haystack_integrations.tracing.datadog import DatadogTracer` | + +### `DatadogTracer` moved to the `datadog-haystack` integration + +**What changed:** The `DatadogTracer` has been moved out of Haystack into the `datadog-haystack` integration package. +In addition, Haystack no longer automatically enables Datadog tracing when `ddtrace` is installed. You now enable it +explicitly by adding the new `DatadogConnector` component to your pipeline. + +**Why:** Moving the tracer to a dedicated package keeps Haystack's dependencies leaner and lets the integration be +released independently. Removing the implicit auto-enable makes tracing setup explicit and predictable. + +**How to migrate:** + +Install the integration: + +```bash +pip install datadog-haystack +``` + +Before (v2.x), Datadog tracing was auto-enabled when `ddtrace` was installed, or set up manually: + +```python +import ddtrace +from haystack import tracing +from haystack.tracing.datadog import DatadogTracer + +tracing.enable_tracing(DatadogTracer(ddtrace.tracer)) +``` + +After (v3.0), add the `DatadogConnector` to your pipeline to enable tracing: + +```python +from haystack import Pipeline +from haystack_integrations.components.connectors.datadog import DatadogConnector + +pipe = Pipeline() +pipe.add_component("tracer", DatadogConnector()) +``` + +Alternatively, you can still enable the tracer manually using the new import path: + +```python +import ddtrace +from haystack import tracing +from haystack_integrations.tracing.datadog import DatadogTracer + +tracing.enable_tracing(DatadogTracer(ddtrace.tracer)) +``` ### `TransformersSimilarityRanker` removed