_observability.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. # Copyright 2023 The gRPC authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import annotations
  15. import abc
  16. import contextlib
  17. import logging
  18. import threading
  19. from typing import Any, Generator, Generic, List, Optional, TypeVar
  20. from grpc._cython import cygrpc as _cygrpc
  21. from grpc._typing import ChannelArgumentType
  22. _LOGGER = logging.getLogger(__name__)
  23. _channel = Any # _channel.py imports this module.
  24. ClientCallTracerCapsule = TypeVar("ClientCallTracerCapsule")
  25. ServerCallTracerFactoryCapsule = TypeVar("ServerCallTracerFactoryCapsule")
  26. _plugin_lock: threading.RLock = threading.RLock()
  27. _OBSERVABILITY_PLUGIN: Optional["ObservabilityPlugin"] = None
  28. _SERVICES_TO_EXCLUDE: List[bytes] = [
  29. b"google.monitoring.v3.MetricService",
  30. b"google.devtools.cloudtrace.v2.TraceService",
  31. ]
  32. class ServerCallTracerFactory:
  33. """An encapsulation of a ServerCallTracerFactory.
  34. Instances of this class can be passed to a Channel as values for the
  35. grpc.experimental.server_call_tracer_factory option
  36. """
  37. def __init__(self, address):
  38. self._address = address
  39. def __int__(self):
  40. return self._address
  41. class ObservabilityPlugin(
  42. Generic[ClientCallTracerCapsule, ServerCallTracerFactoryCapsule],
  43. metaclass=abc.ABCMeta,
  44. ):
  45. """Abstract base class for observability plugin.
  46. *This is a semi-private class that was intended for the exclusive use of
  47. the gRPC team.*
  48. The ClientCallTracerCapsule and ClientCallTracerCapsule created by this
  49. plugin should be injected to gRPC core using observability_init at the
  50. start of a program, before any channels/servers are built.
  51. Any future methods added to this interface cannot have the
  52. @abc.abstractmethod annotation.
  53. Attributes:
  54. _stats_enabled: A bool indicates whether tracing is enabled.
  55. _tracing_enabled: A bool indicates whether stats(metrics) is enabled.
  56. _registered_methods: A set which stores the registered method names in
  57. bytes.
  58. """
  59. _tracing_enabled: bool = False
  60. _stats_enabled: bool = False
  61. @abc.abstractmethod
  62. def create_client_call_tracer(
  63. self, method_name: bytes, target: bytes
  64. ) -> ClientCallTracerCapsule:
  65. """Creates a ClientCallTracerCapsule.
  66. After register the plugin, if tracing or stats is enabled, this method
  67. will be called after a call was created, the ClientCallTracer created
  68. by this method will be saved to call context.
  69. The ClientCallTracer is an object which implements `grpc_core::ClientCallTracer`
  70. interface and wrapped in a PyCapsule using `client_call_tracer` as name.
  71. Args:
  72. method_name: The method name of the call in byte format.
  73. target: The channel target of the call in byte format.
  74. registered_method: Whether this method is pre-registered.
  75. Returns:
  76. A PyCapsule which stores a ClientCallTracer object.
  77. """
  78. raise NotImplementedError()
  79. @abc.abstractmethod
  80. def save_trace_context(
  81. self, trace_id: str, span_id: str, is_sampled: bool
  82. ) -> None:
  83. """Saves the trace_id and span_id related to the current span.
  84. After register the plugin, if tracing is enabled, this method will be
  85. called after the server finished sending response.
  86. This method can be used to propagate census context.
  87. Args:
  88. trace_id: The identifier for the trace associated with the span as a
  89. 32-character hexadecimal encoded string,
  90. e.g. 26ed0036f2eff2b7317bccce3e28d01f
  91. span_id: The identifier for the span as a 16-character hexadecimal encoded
  92. string. e.g. 113ec879e62583bc
  93. is_sampled: A bool indicates whether the span is sampled.
  94. """
  95. raise NotImplementedError()
  96. @abc.abstractmethod
  97. def create_server_call_tracer_factory(
  98. self,
  99. *,
  100. xds: bool = False,
  101. ) -> Optional[ServerCallTracerFactoryCapsule]:
  102. """Creates a ServerCallTracerFactoryCapsule.
  103. This method will be called at server initialization time to create a
  104. ServerCallTracerFactory, which will be registered to gRPC core.
  105. The ServerCallTracerFactory is an object which implements
  106. `grpc_core::ServerCallTracerFactory` interface and wrapped in a PyCapsule
  107. using `server_call_tracer_factory` as name.
  108. Args:
  109. xds: Whether the server is xds server.
  110. Returns:
  111. A PyCapsule which stores a ServerCallTracerFactory object. Or None if
  112. plugin decides not to create ServerCallTracerFactory.
  113. """
  114. raise NotImplementedError()
  115. @abc.abstractmethod
  116. def record_rpc_latency(
  117. self, method: str, target: str, rpc_latency: float, status_code: Any
  118. ) -> None:
  119. """Record the latency of the RPC.
  120. After register the plugin, if stats is enabled, this method will be
  121. called at the end of each RPC.
  122. Args:
  123. method: The fully-qualified name of the RPC method being invoked.
  124. target: The target name of the RPC method being invoked.
  125. rpc_latency: The latency for the RPC in seconds, equals to the time between
  126. when the client invokes the RPC and when the client receives the status.
  127. status_code: An element of grpc.StatusCode in string format representing the
  128. final status for the RPC.
  129. """
  130. raise NotImplementedError()
  131. def set_tracing(self, enable: bool) -> None:
  132. """Enable or disable tracing.
  133. Args:
  134. enable: A bool indicates whether tracing should be enabled.
  135. """
  136. self._tracing_enabled = enable
  137. def set_stats(self, enable: bool) -> None:
  138. """Enable or disable stats(metrics).
  139. Args:
  140. enable: A bool indicates whether stats should be enabled.
  141. """
  142. self._stats_enabled = enable
  143. def save_registered_method(self, method_name: bytes) -> None:
  144. """Saves the method name to registered_method list.
  145. When exporting metrics, method name for unregistered methods will be replaced
  146. with 'other' by default.
  147. Args:
  148. method_name: The method name in bytes.
  149. """
  150. raise NotImplementedError()
  151. @property
  152. def tracing_enabled(self) -> bool:
  153. return self._tracing_enabled
  154. @property
  155. def stats_enabled(self) -> bool:
  156. return self._stats_enabled
  157. @property
  158. def observability_enabled(self) -> bool:
  159. return self.tracing_enabled or self.stats_enabled
  160. @contextlib.contextmanager
  161. def get_plugin() -> Generator[Optional[ObservabilityPlugin], None, None]:
  162. """Get the ObservabilityPlugin in _observability module.
  163. Returns:
  164. The ObservabilityPlugin currently registered with the _observability
  165. module. Or None if no plugin exists at the time of calling this method.
  166. """
  167. with _plugin_lock:
  168. yield _OBSERVABILITY_PLUGIN
  169. def set_plugin(observability_plugin: Optional[ObservabilityPlugin]) -> None:
  170. """Save ObservabilityPlugin to _observability module.
  171. Args:
  172. observability_plugin: The ObservabilityPlugin to save.
  173. Raises:
  174. ValueError: If an ObservabilityPlugin was already registered at the
  175. time of calling this method.
  176. """
  177. global _OBSERVABILITY_PLUGIN # pylint: disable=global-statement
  178. with _plugin_lock:
  179. if observability_plugin and _OBSERVABILITY_PLUGIN:
  180. raise ValueError("observability_plugin was already set!")
  181. _OBSERVABILITY_PLUGIN = observability_plugin
  182. def observability_init(observability_plugin: ObservabilityPlugin) -> None:
  183. """Initialize observability with provided ObservabilityPlugin.
  184. This method have to be called at the start of a program, before any
  185. channels/servers are built.
  186. Args:
  187. observability_plugin: The ObservabilityPlugin to use.
  188. Raises:
  189. ValueError: If an ObservabilityPlugin was already registered at the
  190. time of calling this method.
  191. """
  192. set_plugin(observability_plugin)
  193. def observability_deinit() -> None:
  194. """Clear the observability context, including ObservabilityPlugin and
  195. ServerCallTracerFactory
  196. This method have to be called after exit observability context so that
  197. it's possible to re-initialize again.
  198. """
  199. set_plugin(None)
  200. _cygrpc.clear_server_call_tracer_factory()
  201. def maybe_record_rpc_latency(state: "_channel._RPCState") -> None:
  202. """Record the latency of the RPC, if the plugin is registered and stats is enabled.
  203. This method will be called at the end of each RPC.
  204. Args:
  205. state: a grpc._channel._RPCState object which contains the stats related to the
  206. RPC.
  207. """
  208. # TODO(xuanwn): use channel args to exclude those metrics.
  209. for exclude_prefix in _SERVICES_TO_EXCLUDE:
  210. if exclude_prefix in state.method.encode("utf8"):
  211. return
  212. with get_plugin() as plugin:
  213. if plugin and plugin.stats_enabled:
  214. rpc_latency_s = state.rpc_end_time - state.rpc_start_time
  215. rpc_latency_ms = rpc_latency_s * 1000
  216. plugin.record_rpc_latency(
  217. state.method, state.target, rpc_latency_ms, state.code
  218. )
  219. def create_server_call_tracer_factory_option(xds: bool) -> ChannelArgumentType:
  220. with get_plugin() as plugin:
  221. if plugin and plugin.stats_enabled:
  222. server_call_tracer_factory_address = (
  223. _cygrpc.get_server_call_tracer_factory_address(plugin, xds)
  224. )
  225. if server_call_tracer_factory_address:
  226. return (
  227. (
  228. "grpc.experimental.server_call_tracer_factory",
  229. ServerCallTracerFactory(
  230. server_call_tracer_factory_address
  231. ),
  232. ),
  233. )
  234. return ()