diff --git a/.changelog/5259.added b/.changelog/5259.added new file mode 100644 index 00000000000..463de58085d --- /dev/null +++ b/.changelog/5259.added @@ -0,0 +1 @@ +`opentelemetry-sdk`: add `ServiceInstanceIdResourceDetector` for populating `service.instance.id` diff --git a/opentelemetry-sdk/pyproject.toml b/opentelemetry-sdk/pyproject.toml index e367cf45a66..4970dcfab6c 100644 --- a/opentelemetry-sdk/pyproject.toml +++ b/opentelemetry-sdk/pyproject.toml @@ -74,6 +74,7 @@ otel = "opentelemetry.sdk.resources:OTELResourceDetector" process = "opentelemetry.sdk.resources:ProcessResourceDetector" os = "opentelemetry.sdk.resources:OsResourceDetector" host = "opentelemetry.sdk.resources:_HostResourceDetector" +service_instance = "opentelemetry.sdk.resources:ServiceInstanceIdResourceDetector" [project.urls] Homepage = "https://github.com/open-telemetry/opentelemetry-python/tree/main/opentelemetry-sdk" diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py index c5593909033..6585741a58f 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py @@ -55,7 +55,9 @@ import platform import socket import sys +import threading import typing +import uuid from collections.abc import Sequence from json import dumps from os import environ @@ -278,6 +280,11 @@ def to_json(self, indent: int | None = 4) -> str: ) +_service_instance_id: str | None = None +_service_instance_id_pid: int | None = None +_service_instance_id_lock = threading.Lock() + + class ResourceDetector(abc.ABC): def __init__(self, raise_on_error: bool = False) -> None: self.raise_on_error = raise_on_error @@ -462,15 +469,41 @@ def detect(self) -> "Resource": ) +class ServiceInstanceIdResourceDetector(ResourceDetector): + """Detects service.instance.id as a random UUID v4. + + Per the OpenTelemetry specification, SDKs SHOULD generate a random v1/v4 + UUID for service.instance.id to uniquely identify each service instance. + The ID is shared across all detector instances within the same process and + regenerated automatically when the process PID changes (e.g. after a fork). + """ + + def detect(self) -> "Resource": + # pylint: disable-next=global-statement + global _service_instance_id, _service_instance_id_pid + with _service_instance_id_lock: + current_pid = os.getpid() + if ( + _service_instance_id is None + or _service_instance_id_pid != current_pid + ): + _service_instance_id = str(uuid.uuid4()) + _service_instance_id_pid = current_pid + instance_id = _service_instance_id + return Resource({SERVICE_INSTANCE_ID: instance_id}) + + def _build_resource_detectors() -> list["ResourceDetector"]: """Returns the ordered list of resource detectors to use for Resource.create. - Fast path: if no extra detectors are configured, returns only - OTELResourceDetector without scanning entry_points. + Fast path: if no extra detectors are configured, returns only the two + built-in detectors without scanning entry_points. - "otel" (OTELResourceDetector) defaults to last position so that - OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME take highest merge priority, - but an explicit position in OTEL_EXPERIMENTAL_RESOURCE_DETECTORS is respected. + "service_instance" (ServiceInstanceIdResourceDetector) and "otel" + (OTELResourceDetector) are always appended as defaults. "otel" is last so + that OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME take highest merge + priority, but an explicit position in OTEL_EXPERIMENTAL_RESOURCE_DETECTORS + is respected for either name. """ detector_names: list[str] = list( dict.fromkeys( @@ -481,13 +514,13 @@ def _build_resource_detectors() -> list["ResourceDetector"]: ).split(",") if name.strip() ] - + ["otel"] + + ["service_instance", "otel"] ) ) - # Fast path: only the built-in "otel" detector — no entry_points scan needed. - if detector_names == ["otel"]: - return [OTELResourceDetector()] + # Fast path: only the two built-in detectors — no entry_points scan needed. + if detector_names == ["service_instance", "otel"]: + return [ServiceInstanceIdResourceDetector(), OTELResourceDetector()] # pylint: disable=import-outside-toplevel from opentelemetry.util._importlib_metadata import ( # noqa: PLC0415 diff --git a/opentelemetry-sdk/tests/resources/test_resources.py b/opentelemetry-sdk/tests/resources/test_resources.py index 8c6ff460d7d..8519abb044e 100644 --- a/opentelemetry-sdk/tests/resources/test_resources.py +++ b/opentelemetry-sdk/tests/resources/test_resources.py @@ -1,6 +1,10 @@ # Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=too-many-lines + import os +import subprocess import sys import unittest import uuid @@ -10,6 +14,7 @@ from unittest.mock import Mock, patch from urllib import parse +import opentelemetry.sdk.resources as _resources_module from opentelemetry.sdk.environment_variables import ( OTEL_EXPERIMENTAL_RESOURCE_DETECTORS, ) @@ -34,6 +39,7 @@ PROCESS_RUNTIME_DESCRIPTION, PROCESS_RUNTIME_NAME, PROCESS_RUNTIME_VERSION, + SERVICE_INSTANCE_ID, SERVICE_NAME, TELEMETRY_SDK_LANGUAGE, TELEMETRY_SDK_NAME, @@ -43,6 +49,7 @@ ProcessResourceDetector, Resource, ResourceDetector, + ServiceInstanceIdResourceDetector, _HostResourceDetector, get_aggregated_resources, ) @@ -59,6 +66,11 @@ class TestResources(unittest.TestCase): def setUp(self) -> None: environ[OTEL_RESOURCE_ATTRIBUTES] = "" + self._service_instance_id = ( + ServiceInstanceIdResourceDetector() + .detect() + .attributes[SERVICE_INSTANCE_ID] + ) def tearDown(self) -> None: environ.pop(OTEL_RESOURCE_ATTRIBUTES) @@ -79,6 +91,7 @@ def test_create(self): TELEMETRY_SDK_NAME: "opentelemetry", TELEMETRY_SDK_LANGUAGE: "python", TELEMETRY_SDK_VERSION: _OPENTELEMETRY_SDK_VERSION, + SERVICE_INSTANCE_ID: self._service_instance_id, SERVICE_NAME: "unknown_service", } @@ -105,40 +118,30 @@ def test_create(self): resource = Resource.get_empty() self.assertEqual(resource, _EMPTY_RESOURCE) - resource = Resource.create(None) - self.assertEqual( - resource, - _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") - ), + expected_default = _DEFAULT_RESOURCE.merge( + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ) + + resource = Resource.create(None) + self.assertEqual(resource, expected_default) self.assertEqual(resource.schema_url, "") resource = Resource.create(None, None) - self.assertEqual( - resource, - _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") - ), - ) + self.assertEqual(resource, expected_default) self.assertEqual(resource.schema_url, "") resource = Resource.create({}) - self.assertEqual( - resource, - _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") - ), - ) + self.assertEqual(resource, expected_default) self.assertEqual(resource.schema_url, "") resource = Resource.create({}, None) - self.assertEqual( - resource, - _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") - ), - ) + self.assertEqual(resource, expected_default) self.assertEqual(resource.schema_url, "") def test_resource_merge(self): @@ -202,6 +205,7 @@ def test_immutability(self): TELEMETRY_SDK_NAME: "opentelemetry", TELEMETRY_SDK_LANGUAGE: "python", TELEMETRY_SDK_VERSION: _OPENTELEMETRY_SDK_VERSION, + SERVICE_INSTANCE_ID: self._service_instance_id, SERVICE_NAME: "unknown_service", } @@ -257,7 +261,13 @@ def test_aggregated_resources_no_detectors(self): self.assertEqual( aggregated_resources, _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ), ) @@ -308,7 +318,13 @@ def test_aggregated_resources_multiple_detectors(self): [resource_detector1, resource_detector2, resource_detector3] ), _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ).merge( Resource( { @@ -351,7 +367,13 @@ def test_aggregated_resources_different_schema_urls(self): self.assertEqual( get_aggregated_resources([resource_detector1, resource_detector2]), _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ).merge( Resource( {"key1": "value1", "key2": "value2", "key3": "value3"}, @@ -365,7 +387,13 @@ def test_aggregated_resources_different_schema_urls(self): [resource_detector2, resource_detector3] ), _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ).merge( Resource({"key2": "value2", "key3": "value3"}, "url1") ), @@ -383,7 +411,13 @@ def test_aggregated_resources_different_schema_urls(self): ] ), _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ).merge( Resource( { @@ -407,7 +441,13 @@ def test_resource_detector_ignore_error(self): self.assertEqual( get_aggregated_resources([resource_detector]), _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ), ) @@ -427,7 +467,13 @@ def test_resource_detector_timeout(self, mock_logger): self.assertEqual( get_aggregated_resources([resource_detector]), _DEFAULT_RESOURCE.merge( - Resource({SERVICE_NAME: "unknown_service"}, "") + Resource( + { + SERVICE_INSTANCE_ID: self._service_instance_id, + SERVICE_NAME: "unknown_service", + }, + "", + ) ), ) mock_logger.warning.assert_called_with( @@ -912,3 +958,116 @@ def test_resource_detector_entry_points_tolerate_missing_detector(self): resource.attributes["telemetry.sdk.language"], "python" ) self.assertIn(HOST_NAME, resource.attributes) + + +# pylint: disable=protected-access +class TestServiceInstanceIdResourceDetector(unittest.TestCase): + def setUp(self) -> None: + self._orig_instance_id = _resources_module._service_instance_id + self._orig_instance_pid = _resources_module._service_instance_id_pid + + def tearDown(self) -> None: + _resources_module._service_instance_id = self._orig_instance_id + _resources_module._service_instance_id_pid = self._orig_instance_pid + + def test_detect_value_is_valid_uuid4(self): + _resources_module._service_instance_id = None + _resources_module._service_instance_id_pid = None + detector = ServiceInstanceIdResourceDetector() + value = detector.detect().attributes[SERVICE_INSTANCE_ID] + parsed = uuid.UUID(value) + self.assertEqual(parsed.version, 4) + + def test_detect_stable_within_instance(self): + _resources_module._service_instance_id = None + _resources_module._service_instance_id_pid = None + detector = ServiceInstanceIdResourceDetector() + id1 = detector.detect().attributes[SERVICE_INSTANCE_ID] + id2 = detector.detect().attributes[SERVICE_INSTANCE_ID] + self.assertEqual(id1, id2) + + def test_detect_shared_across_instances(self): + _resources_module._service_instance_id = None + _resources_module._service_instance_id_pid = None + id1 = ( + ServiceInstanceIdResourceDetector() + .detect() + .attributes[SERVICE_INSTANCE_ID] + ) + id2 = ( + ServiceInstanceIdResourceDetector() + .detect() + .attributes[SERVICE_INSTANCE_ID] + ) + self.assertEqual(id1, id2) + + def test_detect_pid_change_generates_new_id(self): + _resources_module._service_instance_id = "old-id" + _resources_module._service_instance_id_pid = os.getpid() - 1 + new_id = ( + ServiceInstanceIdResourceDetector() + .detect() + .attributes[SERVICE_INSTANCE_ID] + ) + self.assertNotEqual(new_id, "old-id") + self.assertEqual( + _resources_module._service_instance_id_pid, os.getpid() + ) + uuid.UUID(new_id) + + def test_detect_pid_unchanged_returns_same_id(self): + known_id = "known-stable-id" + _resources_module._service_instance_id = known_id + _resources_module._service_instance_id_pid = os.getpid() + result = ( + ServiceInstanceIdResourceDetector() + .detect() + .attributes[SERVICE_INSTANCE_ID] + ) + self.assertEqual(result, known_id) + + @unittest.skipUnless(hasattr(os, "fork"), "requires os.fork") + def test_detect_fork_generates_new_id(self): + script = """\ +import os +import sys + +from opentelemetry.sdk.resources import ServiceInstanceIdResourceDetector, SERVICE_INSTANCE_ID + +parent_id = ServiceInstanceIdResourceDetector().detect().attributes[SERVICE_INSTANCE_ID] + +r_fd, w_fd = os.pipe() +pid = os.fork() +if not pid: + os.close(r_fd) + child_id = ServiceInstanceIdResourceDetector().detect().attributes[SERVICE_INSTANCE_ID] + with os.fdopen(w_fd, "w") as w: + w.write(child_id) + os._exit(0) +else: + os.close(w_fd) + with os.fdopen(r_fd, "r") as r: + child_id = r.read() + os.waitpid(pid, 0) + print(f"{parent_id}:{child_id}") +""" + result = subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + check=True, + ) + parent_id, child_id = result.stdout.strip().split(":") + self.assertNotEqual(parent_id, child_id) + self.assertEqual(uuid.UUID(parent_id).version, 4) + self.assertEqual(uuid.UUID(child_id).version, 4) + + @patch.dict( + environ, + {OTEL_EXPERIMENTAL_RESOURCE_DETECTORS: "service_instance"}, + clear=True, + ) + def test_resource_detector_entry_points_service_instance(self): + resource = Resource.create() + self.assertIn(SERVICE_INSTANCE_ID, resource.attributes) + uuid.UUID(resource.attributes[SERVICE_INSTANCE_ID]) diff --git a/tests/opentelemetry-test-utils/tests/testdata/registry/attributes.yaml b/tests/opentelemetry-test-utils/tests/testdata/registry/attributes.yaml index d4a5e301470..7e67cffa17d 100644 --- a/tests/opentelemetry-test-utils/tests/testdata/registry/attributes.yaml +++ b/tests/opentelemetry-test-utils/tests/testdata/registry/attributes.yaml @@ -5,6 +5,11 @@ groups: type: attribute_group brief: Minimal registry for WeaverLiveCheck self-tests. attributes: + - id: service.instance.id + type: string + brief: The unique identifier of the service instance. + stability: stable + examples: ["my-k8s-pod-deployment-1"] - id: service.name type: string brief: The name of the service.