Skip to content

Commit 6869ed6

Browse files
authored
feat(spanner): implement custom tracer_provider injection for opentelemetry traces (#1229)
* all: implement custom tracer_provider injection An important feature for observability is to allow the injection of a custom tracer_provider instead of always using the global tracer_provider by sending in observability_options=dict( tracer_provider=tracer_provider, enable_extended_tracing=True, ) * Address review feedback by attaching observability_options to Client only * Attach observability_options directly before trace_call * More reverts for formatting * Plumb observability_options into _restart_on_unavailable * completely decouple observability_options from session * apply SPANNER_ENABLE_EXTENDED_TRACING but in inverse due to compatibility * Document SPANNER_ENABLE_EXTENDED_TRACING in environment * Revert a vestige of mock * tests: add unit test for propagating TracerProvider * Add preliminary end-to-end test to check for injection of observability_options * Document default enable_extended_tracing value * Carve out observability_options test * Ensure that observability_options test sets up and deletes database * Ensure instance.create() is invoked in system tests * Use getattr for mock _Client * Update with code review suggestions * Deal with mock.Mock false positives failing tests * Address review feedback
1 parent 3079bdd commit 6869ed6

File tree

10 files changed

+339
-27
lines changed

10 files changed

+339
-27
lines changed

docs/opentelemetry-tracing.rst

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,21 @@ We also need to tell OpenTelemetry which exporter to use. To export Spanner trac
2525
2626
# Create and export one trace every 1000 requests
2727
sampler = TraceIdRatioBased(1/1000)
28-
# Use the default tracer provider
29-
trace.set_tracer_provider(TracerProvider(sampler=sampler))
30-
trace.get_tracer_provider().add_span_processor(
28+
tracer_provider = TracerProvider(sampler=sampler)
29+
tracer_provider.add_span_processor(
3130
# Initialize the cloud tracing exporter
3231
BatchSpanProcessor(CloudTraceSpanExporter())
3332
)
33+
observability_options = dict(
34+
tracer_provider=tracer_provider,
35+
36+
# By default extended_tracing is set to True due
37+
# to legacy reasons to avoid breaking changes, you
38+
# can modify it though using the environment variable
39+
# SPANNER_ENABLE_EXTENDED_TRACING=false.
40+
enable_extended_tracing=False,
41+
)
42+
spanner = spanner.NewClient(project_id, observability_options=observability_options)
3443
3544
3645
To get more fine-grained traces from gRPC, you can enable the gRPC instrumentation by the following
@@ -52,3 +61,13 @@ Generated spanner traces should now be available on `Cloud Trace <https://consol
5261

5362
Tracing is most effective when many libraries are instrumented to provide insight over the entire lifespan of a request.
5463
For a list of libraries that can be instrumented, see the `OpenTelemetry Integrations` section of the `OpenTelemetry Python docs <https://opentelemetry-python.readthedocs.io/en/stable/>`_
64+
65+
Annotating spans with SQL
66+
~~~~~~~~~~~~~~~~~~~~~~~~~
67+
68+
By default your spans will be annotated with SQL statements where appropriate, but that can be a PII (Personally Identifiable Information)
69+
leak. Sadly due to legacy behavior, we cannot simply turn off this behavior by default. However you can control this behavior by setting
70+
71+
SPANNER_ENABLE_EXTENDED_TRACING=false
72+
73+
to turn it off globally or when creating each SpannerClient, please set `observability_options.enable_extended_tracing=false`

examples/trace.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,18 @@ def main():
3232
tracer_provider = TracerProvider(sampler=ALWAYS_ON)
3333
trace_exporter = CloudTraceSpanExporter(project_id=project_id)
3434
tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))
35-
trace.set_tracer_provider(tracer_provider)
36-
# Retrieve a tracer from the global tracer provider.
37-
tracer = tracer_provider.get_tracer('MyApp')
3835

3936
# Setup the Cloud Spanner Client.
40-
spanner_client = spanner.Client(project_id)
37+
spanner_client = spanner.Client(
38+
project_id,
39+
observability_options=dict(tracer_provider=tracer_provider, enable_extended_tracing=True),
40+
)
4141
instance = spanner_client.instance('test-instance')
4242
database = instance.database('test-db')
4343

44+
# Retrieve a tracer from our custom tracer provider.
45+
tracer = tracer_provider.get_tracer('MyApp')
46+
4447
# Now run our queries
4548
with tracer.start_as_current_span('QueryInformationSchema'):
4649
with database.snapshot() as snapshot:

google/cloud/spanner_v1/_opentelemetry_tracing.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""Manages OpenTelemetry trace creation and handling"""
1616

1717
from contextlib import contextmanager
18+
import os
1819

1920
from google.cloud.spanner_v1 import SpannerClient
2021
from google.cloud.spanner_v1 import gapic_version
@@ -33,6 +34,9 @@
3334

3435
TRACER_NAME = "cloud.google.com/python/spanner"
3536
TRACER_VERSION = gapic_version.__version__
37+
extended_tracing_globally_disabled = (
38+
os.getenv("SPANNER_ENABLE_EXTENDED_TRACING", "").lower() == "false"
39+
)
3640

3741

3842
def get_tracer(tracer_provider=None):
@@ -51,13 +55,26 @@ def get_tracer(tracer_provider=None):
5155

5256

5357
@contextmanager
54-
def trace_call(name, session, extra_attributes=None):
58+
def trace_call(name, session, extra_attributes=None, observability_options=None):
5559
if not HAS_OPENTELEMETRY_INSTALLED or not session:
5660
# Empty context manager. Users will have to check if the generated value is None or a span
5761
yield None
5862
return
5963

60-
tracer = get_tracer()
64+
tracer_provider = None
65+
66+
# By default enable_extended_tracing=True because in a bid to minimize
67+
# breaking changes and preserve legacy behavior, we are keeping it turned
68+
# on by default.
69+
enable_extended_tracing = True
70+
71+
if isinstance(observability_options, dict): # Avoid false positives with mock.Mock
72+
tracer_provider = observability_options.get("tracer_provider", None)
73+
enable_extended_tracing = observability_options.get(
74+
"enable_extended_tracing", enable_extended_tracing
75+
)
76+
77+
tracer = get_tracer(tracer_provider)
6178

6279
# Set base attributes that we know for every trace created
6380
attributes = {
@@ -72,6 +89,12 @@ def trace_call(name, session, extra_attributes=None):
7289
if extra_attributes:
7390
attributes.update(extra_attributes)
7491

92+
if extended_tracing_globally_disabled:
93+
enable_extended_tracing = False
94+
95+
if not enable_extended_tracing:
96+
attributes.pop("db.statement", False)
97+
7598
with tracer.start_as_current_span(
7699
name, kind=trace.SpanKind.CLIENT, attributes=attributes
77100
) as span:

google/cloud/spanner_v1/batch.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,13 @@ def commit(
205205
max_commit_delay=max_commit_delay,
206206
request_options=request_options,
207207
)
208-
with trace_call("CloudSpanner.Commit", self._session, trace_attributes):
208+
observability_options = getattr(database, "observability_options", None)
209+
with trace_call(
210+
"CloudSpanner.Commit",
211+
self._session,
212+
trace_attributes,
213+
observability_options=observability_options,
214+
):
209215
method = functools.partial(
210216
api.commit,
211217
request=request,
@@ -318,7 +324,13 @@ def batch_write(self, request_options=None, exclude_txn_from_change_streams=Fals
318324
request_options=request_options,
319325
exclude_txn_from_change_streams=exclude_txn_from_change_streams,
320326
)
321-
with trace_call("CloudSpanner.BatchWrite", self._session, trace_attributes):
327+
observability_options = getattr(database, "observability_options", None)
328+
with trace_call(
329+
"CloudSpanner.BatchWrite",
330+
self._session,
331+
trace_attributes,
332+
observability_options=observability_options,
333+
):
322334
method = functools.partial(
323335
api.batch_write,
324336
request=request,

google/cloud/spanner_v1/client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,16 @@ class Client(ClientWithProject):
126126
for all ReadRequests and ExecuteSqlRequests that indicates which replicas
127127
or regions should be used for non-transactional reads or queries.
128128
129+
:type observability_options: dict (str -> any) or None
130+
:param observability_options: (Optional) the configuration to control
131+
the tracer's behavior.
132+
tracer_provider is the injected tracer provider
133+
enable_extended_tracing: :type:boolean when set to true will allow for
134+
spans that issue SQL statements to be annotated with SQL.
135+
Default `True`, please set it to `False` to turn it off
136+
or you can use the environment variable `SPANNER_ENABLE_EXTENDED_TRACING=<boolean>`
137+
to control it.
138+
129139
:raises: :class:`ValueError <exceptions.ValueError>` if both ``read_only``
130140
and ``admin`` are :data:`True`
131141
"""
@@ -146,6 +156,7 @@ def __init__(
146156
query_options=None,
147157
route_to_leader_enabled=True,
148158
directed_read_options=None,
159+
observability_options=None,
149160
):
150161
self._emulator_host = _get_spanner_emulator_host()
151162

@@ -187,6 +198,7 @@ def __init__(
187198

188199
self._route_to_leader_enabled = route_to_leader_enabled
189200
self._directed_read_options = directed_read_options
201+
self._observability_options = observability_options
190202

191203
@property
192204
def credentials(self):
@@ -268,6 +280,15 @@ def route_to_leader_enabled(self):
268280
"""
269281
return self._route_to_leader_enabled
270282

283+
@property
284+
def observability_options(self):
285+
"""Getter for observability_options.
286+
287+
:rtype: dict
288+
:returns: The configured observability_options if set.
289+
"""
290+
return self._observability_options
291+
271292
@property
272293
def directed_read_options(self):
273294
"""Getter for directed_read_options.

google/cloud/spanner_v1/database.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ def execute_pdml():
718718
method=method,
719719
request=request,
720720
transaction_selector=txn_selector,
721+
observability_options=self.observability_options,
721722
)
722723

723724
result_set = StreamedResultSet(iterator)
@@ -1106,6 +1107,17 @@ def set_iam_policy(self, policy):
11061107
response = api.set_iam_policy(request=request, metadata=metadata)
11071108
return response
11081109

1110+
@property
1111+
def observability_options(self):
1112+
"""
1113+
Returns the observability options that you set when creating
1114+
the SpannerClient.
1115+
"""
1116+
if not (self._instance and self._instance._client):
1117+
return None
1118+
1119+
return getattr(self._instance._client, "observability_options", None)
1120+
11091121

11101122
class BatchCheckout(object):
11111123
"""Context manager for using a batch from a database.

google/cloud/spanner_v1/session.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,13 @@ def create(self):
142142
if self._labels:
143143
request.session.labels = self._labels
144144

145-
with trace_call("CloudSpanner.CreateSession", self, self._labels):
145+
observability_options = getattr(self._database, "observability_options", None)
146+
with trace_call(
147+
"CloudSpanner.CreateSession",
148+
self,
149+
self._labels,
150+
observability_options=observability_options,
151+
):
146152
session_pb = api.create_session(
147153
request=request,
148154
metadata=metadata,
@@ -169,7 +175,10 @@ def exists(self):
169175
)
170176
)
171177

172-
with trace_call("CloudSpanner.GetSession", self) as span:
178+
observability_options = getattr(self._database, "observability_options", None)
179+
with trace_call(
180+
"CloudSpanner.GetSession", self, observability_options=observability_options
181+
) as span:
173182
try:
174183
api.get_session(name=self.name, metadata=metadata)
175184
if span:
@@ -194,7 +203,12 @@ def delete(self):
194203
raise ValueError("Session ID not set by back-end")
195204
api = self._database.spanner_api
196205
metadata = _metadata_with_prefix(self._database.name)
197-
with trace_call("CloudSpanner.DeleteSession", self):
206+
observability_options = getattr(self._database, "observability_options", None)
207+
with trace_call(
208+
"CloudSpanner.DeleteSession",
209+
self,
210+
observability_options=observability_options,
211+
):
198212
api.delete_session(name=self.name, metadata=metadata)
199213

200214
def ping(self):

0 commit comments

Comments
 (0)