Skip to content

Commit 986c868

Browse files
authored
implement trace_continuation_strategy (#1564)
* implement `trace_continuation_strategy` closes #1472 * use typing.Dict instead of dict for backwards compat * update changelog
1 parent 1b884b4 commit 986c868

File tree

7 files changed

+154
-30
lines changed

7 files changed

+154
-30
lines changed

CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ endif::[]
4040
* Add instrumentation for https://kafka-python.readthedocs.io/en/master/[`kafka-python`] {pull}1555[#1555]
4141
* Add API for span links, and implement span link support for OpenTelemetry bridge {pull}1562[#1562]
4242
* Add specific instrumentation for SQS delete/batch-delete {pull}1567[#1567]
43+
* Add `trace_continuation_strategy` setting {pull}1564[#1564]
4344
4445
[float]
4546
===== Bug fixes

docs/configuration.asciidoc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,36 @@ These headers (`traceparent` and `tracestate`) are defined in the https://www.w3
11741174

11751175
Additionally, when this setting is set to `True`, the agent will set `elasticapm-traceparent` for backwards compatibility.
11761176

1177+
[float]
1178+
[[config-trace-continuation-strategy]]
1179+
==== `trace_continuation_strategy`
1180+
1181+
[options="header"]
1182+
|============
1183+
| Environment | Django/Flask | Default
1184+
| `ELASTIC_APM_TRACE_CONTINUATION_STRATEGY` | `TRACE_CONTINUATION_STRATEGY` | `continue`
1185+
|============
1186+
1187+
This option allows some control on how the APM agent handles W3C trace-context headers on incoming requests.
1188+
By default, the `traceparent` and `tracestate` headers are used per W3C spec for distributed tracing.
1189+
However, in certain cases it can be helpful to *not* use the incoming `traceparent` header.
1190+
Some example use cases:
1191+
1192+
- An Elastic-monitored service is receiving requests with `traceparent` headers from *unmonitored* services.
1193+
- An Elastic-monitored service is publicly exposed, and does not want tracing data (trace-ids, sampling decisions) to possibly be spoofed by user requests.
1194+
1195+
Valid values are:
1196+
1197+
- `'continue'`: The default behavior. An incoming `traceparent` value is used to continue the trace and determine the sampling decision.
1198+
- `'restart'`: Always ignores the `traceparent` header of incoming requests.
1199+
A new trace-id will be generated and the sampling decision will be made based on <<config-transaction-sample-rate,`transaction_sample_rate`>>.
1200+
A *span link* will be made to the incoming traceparent.
1201+
- `'restart_external'`: If an incoming request includes the `es` vendor flag in `tracestate`, then any 'traceparent' will be considered internal and will be handled as described for `'continue'` above.
1202+
Otherwise, any `'traceparent'` is considered external and will be handled as described for `'restart'` above.
1203+
1204+
Starting with Elastic Observability 8.2, span links will be visible in trace
1205+
views.
1206+
11771207
[float]
11781208
[[config-use-elastic-excepthook]]
11791209
==== `use_elastic_excepthook`

elasticapm/conf/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import threading
3939
from datetime import timedelta
4040

41-
from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES
41+
from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, TRACE_CONTINUATION_STRATEGY
4242
from elasticapm.utils import compat, starmatch_to_regex
4343
from elasticapm.utils.logging import get_logger
4444
from elasticapm.utils.threading import IntervalTimer, ThreadManager
@@ -671,6 +671,19 @@ class Config(_ConfigBase):
671671
callbacks=[_log_ecs_reformatting_callback],
672672
default="off",
673673
)
674+
trace_continuation_strategy = _ConfigValue(
675+
"TRACE_CONTINUATION_STRATEGY",
676+
validators=[
677+
EnumerationValidator(
678+
[
679+
TRACE_CONTINUATION_STRATEGY.CONTINUE,
680+
TRACE_CONTINUATION_STRATEGY.RESTART,
681+
TRACE_CONTINUATION_STRATEGY.RESTART_EXTERNAL,
682+
]
683+
)
684+
],
685+
default=TRACE_CONTINUATION_STRATEGY.CONTINUE,
686+
)
674687

675688
@property
676689
def is_recording(self):

elasticapm/conf/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,8 @@ def _starmatch_to_regex(pattern):
114114
LABEL_TYPES = (bool, int, float, decimal.Decimal)
115115

116116
TRACESTATE = namedtuple("TRACESTATE", ["SAMPLE_RATE"])(SAMPLE_RATE="s")
117+
TRACE_CONTINUATION_STRATEGY = namedtuple("TRACE_CONTINUATION_STRATEGY", ["CONTINUE", "RESTART", "RESTART_EXTERNAL"])(
118+
CONTINUE="continue",
119+
RESTART="restart",
120+
RESTART_EXTERNAL="restart_external",
121+
)

elasticapm/traces.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from elasticapm.context import init_execution_context
4747
from elasticapm.metrics.base_metrics import Timer
4848
from elasticapm.utils import encoding, get_name_from_func, nested_key, url_to_destination_resource
49-
from elasticapm.utils.disttracing import TraceParent, TracingOptions
49+
from elasticapm.utils.disttracing import TraceParent
5050
from elasticapm.utils.logging import get_logger
5151
from elasticapm.utils.time import time_to_perf_counter
5252

@@ -204,12 +204,7 @@ def __init__(
204204
"""
205205
self.id = self.get_dist_tracing_id()
206206
if not trace_parent:
207-
trace_parent = TraceParent(
208-
constants.TRACE_CONTEXT_VERSION,
209-
"%032x" % random.getrandbits(128),
210-
self.id,
211-
TracingOptions(recorded=is_sampled),
212-
)
207+
trace_parent = TraceParent.new(self.id, is_sampled)
213208

214209
self.trace_parent: TraceParent = trace_parent
215210
self.timestamp = start if start is not None else time.time()
@@ -894,18 +889,29 @@ def begin_transaction(
894889
start: Optional[float] = None,
895890
auto_activate: bool = True,
896891
links: Optional[Sequence[TraceParent]] = None,
897-
):
892+
) -> Transaction:
898893
"""
899894
Start a new transactions and bind it in a thread-local variable
900895
901896
:param transaction_type: type of the transaction, e.g. "request"
902897
:param trace_parent: an optional TraceParent object
903898
:param start: override the start timestamp, mostly useful for testing
904899
:param auto_activate: whether to set this transaction in execution_context
905-
:param list of traceparents to causally link this transaction to
906-
900+
:param links: list of traceparents to causally link this transaction to
907901
:returns the Transaction object
908902
"""
903+
links = links if links else []
904+
continuation_strategy = self.config.trace_continuation_strategy
905+
906+
# we restart the trace if continuation strategy is "restart", or if it is "restart_external" and our
907+
# "es" key is not in the tracestate header. In both cases, the original TraceParent is added to trace links.
908+
if trace_parent and continuation_strategy != constants.TRACE_CONTINUATION_STRATEGY.CONTINUE:
909+
if continuation_strategy == constants.TRACE_CONTINUATION_STRATEGY.RESTART or (
910+
continuation_strategy == constants.TRACE_CONTINUATION_STRATEGY.RESTART_EXTERNAL
911+
and not trace_parent.tracestate_dict
912+
):
913+
links.append(trace_parent)
914+
trace_parent = None
909915
if trace_parent:
910916
is_sampled = bool(trace_parent.trace_options.recorded)
911917
sample_rate = trace_parent.tracestate_dict.get(constants.TRACESTATE.SAMPLE_RATE)

elasticapm/utils/disttracing.py

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
import binascii
3232
import ctypes
3333
import itertools
34+
import random
3435
import re
36+
from typing import Dict, Optional
3537

3638
from elasticapm.conf import constants
3739
from elasticapm.utils.logging import get_logger
@@ -42,16 +44,31 @@
4244
class TraceParent(object):
4345
__slots__ = ("version", "trace_id", "span_id", "trace_options", "tracestate", "tracestate_dict", "is_legacy")
4446

45-
def __init__(self, version, trace_id, span_id, trace_options, tracestate=None, is_legacy=False):
46-
self.version = version
47-
self.trace_id = trace_id
48-
self.span_id = span_id
49-
self.trace_options = trace_options
50-
self.is_legacy = is_legacy
51-
self.tracestate = tracestate
47+
def __init__(
48+
self,
49+
version: int,
50+
trace_id: str,
51+
span_id: str,
52+
trace_options: "TracingOptions",
53+
tracestate: Optional[str] = None,
54+
is_legacy: bool = False,
55+
):
56+
self.version: int = version
57+
self.trace_id: str = trace_id
58+
self.span_id: str = span_id
59+
self.trace_options: TracingOptions = trace_options
60+
self.is_legacy: bool = is_legacy
61+
self.tracestate: Optional[str] = tracestate
5262
self.tracestate_dict = self._parse_tracestate(tracestate)
5363

54-
def copy_from(self, version=None, trace_id=None, span_id=None, trace_options=None, tracestate=None):
64+
def copy_from(
65+
self,
66+
version: int = None,
67+
trace_id: str = None,
68+
span_id: str = None,
69+
trace_options: "TracingOptions" = None,
70+
tracestate: str = None,
71+
):
5572
return TraceParent(
5673
version or self.version,
5774
trace_id or self.trace_id,
@@ -60,13 +77,13 @@ def copy_from(self, version=None, trace_id=None, span_id=None, trace_options=Non
6077
tracestate or self.tracestate,
6178
)
6279

63-
def to_string(self):
80+
def to_string(self) -> str:
6481
return "{:02x}-{}-{}-{:02x}".format(self.version, self.trace_id, self.span_id, self.trace_options.asByte)
6582

66-
def to_ascii(self):
83+
def to_ascii(self) -> bytes:
6784
return self.to_string().encode("ascii")
6885

69-
def to_binary(self):
86+
def to_binary(self) -> bytes:
7087
return b"".join(
7188
[
7289
(self.version).to_bytes(1, byteorder="big"),
@@ -80,7 +97,18 @@ def to_binary(self):
8097
)
8198

8299
@classmethod
83-
def from_string(cls, traceparent_string, tracestate_string=None, is_legacy=False):
100+
def new(cls, transaction_id: str, is_sampled: bool) -> "TraceParent":
101+
return cls(
102+
version=constants.TRACE_CONTEXT_VERSION,
103+
trace_id="%032x" % random.getrandbits(128),
104+
span_id=transaction_id,
105+
trace_options=TracingOptions(recorded=is_sampled),
106+
)
107+
108+
@classmethod
109+
def from_string(
110+
cls, traceparent_string: str, tracestate_string: Optional[str] = None, is_legacy: bool = False
111+
) -> Optional["TraceParent"]:
84112
try:
85113
parts = traceparent_string.split("-")
86114
version, trace_id, span_id, trace_flags = parts[:4]
@@ -105,11 +133,11 @@ def from_string(cls, traceparent_string, tracestate_string=None, is_legacy=False
105133
@classmethod
106134
def from_headers(
107135
cls,
108-
headers,
109-
header_name=constants.TRACEPARENT_HEADER_NAME,
110-
legacy_header_name=constants.TRACEPARENT_LEGACY_HEADER_NAME,
111-
tracestate_header_name=constants.TRACESTATE_HEADER_NAME,
112-
):
136+
headers: dict,
137+
header_name: str = constants.TRACEPARENT_HEADER_NAME,
138+
legacy_header_name: str = constants.TRACEPARENT_LEGACY_HEADER_NAME,
139+
tracestate_header_name: str = constants.TRACESTATE_HEADER_NAME,
140+
) -> Optional["TraceParent"]:
113141
tracestate = cls.merge_duplicate_headers(headers, tracestate_header_name)
114142
if header_name in headers:
115143
return TraceParent.from_string(headers[header_name], tracestate, is_legacy=False)
@@ -119,7 +147,7 @@ def from_headers(
119147
return None
120148

121149
@classmethod
122-
def from_binary(cls, data):
150+
def from_binary(cls, data: bytes) -> Optional["TraceParent"]:
123151
if len(data) != 29:
124152
logger.debug("Invalid binary traceparent format, length is %d, should be 29, value %r", len(data), data)
125153
return
@@ -162,7 +190,7 @@ def merge_duplicate_headers(cls, headers, key):
162190
return ",".join([item[1] for item in headers if item[0] == key])
163191
return headers.get(key)
164192

165-
def _parse_tracestate(self, tracestate):
193+
def _parse_tracestate(self, tracestate) -> Dict[str, str]:
166194
"""
167195
Tracestate can contain data from any vendor, made distinct by vendor
168196
keys. Vendors are comma-separated. The elastic (es) tracestate data is

tests/client/transaction_tests.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,3 +466,44 @@ def test_transaction_span_links(elasticapm_client):
466466
assert span["links"][0]["span_id"] == "0011223344556677"
467467
assert span["links"][1]["trace_id"] == "00112233445566778899aabbccddeeff"
468468
assert span["links"][1]["span_id"] == "aabbccddeeff0011"
469+
470+
471+
def test_transaction_trace_continuation_continue(elasticapm_client):
472+
elasticapm_client.config.update("1", trace_continuation_strategy=constants.TRACE_CONTINUATION_STRATEGY.CONTINUE)
473+
tp = TraceParent.from_string("00-aabbccddeeff00112233445566778899-0011223344556677-01")
474+
elasticapm_client.begin_transaction("a", trace_parent=tp)
475+
elasticapm_client.end_transaction("foo")
476+
transaction = elasticapm_client.events[constants.TRANSACTION][0]
477+
assert transaction["trace_id"] == tp.trace_id
478+
assert "links" not in transaction
479+
480+
481+
def test_transaction_trace_continuation_restart(elasticapm_client):
482+
elasticapm_client.config.update("1", trace_continuation_strategy=constants.TRACE_CONTINUATION_STRATEGY.RESTART)
483+
tp = TraceParent.from_string("00-aabbccddeeff00112233445566778899-0011223344556677-01")
484+
elasticapm_client.begin_transaction("a", trace_parent=tp)
485+
elasticapm_client.end_transaction("foo")
486+
transaction = elasticapm_client.events[constants.TRANSACTION][0]
487+
assert transaction["trace_id"] != tp.trace_id
488+
assert transaction["links"][0]["trace_id"] == tp.trace_id
489+
assert transaction["links"][0]["span_id"] == tp.span_id
490+
491+
492+
def test_transaction_trace_continuation_restart_external(elasticapm_client):
493+
elasticapm_client.config.update(
494+
"1", trace_continuation_strategy=constants.TRACE_CONTINUATION_STRATEGY.RESTART_EXTERNAL
495+
)
496+
tp = TraceParent.from_string("00-aabbccddeeff00112233445566778899-0011223344556677-01")
497+
elasticapm_client.begin_transaction("a", trace_parent=tp)
498+
elasticapm_client.end_transaction("foo")
499+
transaction = elasticapm_client.events[constants.TRANSACTION][0]
500+
assert transaction["trace_id"] != tp.trace_id
501+
assert transaction["links"][0]["trace_id"] == tp.trace_id
502+
assert transaction["links"][0]["span_id"] == tp.span_id
503+
504+
tp.add_tracestate("foo", "bar")
505+
elasticapm_client.begin_transaction("a", trace_parent=tp)
506+
elasticapm_client.end_transaction("foo")
507+
transaction = elasticapm_client.events[constants.TRANSACTION][1]
508+
assert transaction["trace_id"] == tp.trace_id
509+
assert "links" not in transaction

0 commit comments

Comments
 (0)