From abcf854a79cafbb36fe3ecab2996ff95971d8977 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 14 Nov 2025 19:41:37 +0100 Subject: [PATCH 1/2] set up opentelemetry metrics --- Cargo.lock | 173 ++++++++++++++++++++++++++ Cargo.toml | 5 + docker-compose.yml | 18 +++ dockerfiles/Dockerfile-opentelemetry | 2 + dockerfiles/collector-config-dev.yaml | 30 +++++ src/bin/cratesfyi.rs | 9 +- src/build_queue.rs | 78 ++++++++++-- src/cdn.rs | 75 +++++++++-- src/config.rs | 4 + src/context.rs | 33 +++-- src/db/pool.rs | 73 ++++++++++- src/docbuilder/mod.rs | 4 +- src/docbuilder/rustwide_builder.rs | 52 ++++++++ src/metrics/mod.rs | 11 +- src/metrics/otel.rs | 99 +++++++++++++++ src/metrics/service.rs | 101 +++++++++++++++ src/storage/database.rs | 16 ++- src/storage/mod.rs | 63 ++++++++-- src/storage/s3.rs | 11 +- src/test/mod.rs | 42 ++++++- src/test/test_metrics.rs | 93 ++++++++++++++ src/utils/daemon.rs | 45 ++++++- src/utils/html.rs | 3 + src/web/metrics.rs | 150 +++++++++++++++++++++- src/web/mod.rs | 8 +- src/web/releases.rs | 12 +- src/web/rustdoc.rs | 14 ++- 27 files changed, 1155 insertions(+), 69 deletions(-) create mode 100644 dockerfiles/Dockerfile-opentelemetry create mode 100644 dockerfiles/collector-config-dev.yaml create mode 100644 src/metrics/otel.rs create mode 100644 src/metrics/service.rs create mode 100644 src/test/test_metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 60b3d2a3d..2a0f0005f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2078,6 +2078,10 @@ dependencies = [ "mockito", "num_cpus", "once_cell", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry-resource-detectors", + "opentelemetry_sdk", "path-slash", "percent-encoding", "pretty_assertions", @@ -4775,6 +4779,19 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.8.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -5920,6 +5937,99 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.17", + "tracing", +] + +[[package]] +name = "opentelemetry-http" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +dependencies = [ + "async-trait", + "bytes", + "http 1.3.1", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +dependencies = [ + "http 1.3.1", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.17", + "tokio", + "tonic", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry-resource-detectors" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82845106cf72d47c141cee7f0d95e0650d8f28c6222a1f1ae727a8883899c19" +dependencies = [ + "opentelemetry", + "opentelemetry-semantic-conventions", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e62e29dfe041afb8ed2a6c9737ab57db4907285d999ef8ad3a59092a36bdc846" + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.2", + "thiserror 2.0.17", + "tokio", + "tokio-stream", +] + [[package]] name = "os_info" version = "3.13.0" @@ -6376,6 +6486,29 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "prost" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.110", +] + [[package]] name = "quote" version = "1.0.42" @@ -8265,6 +8398,43 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2" +[[package]] +name = "tonic" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "sync_wrapper", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-prost" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +dependencies = [ + "bytes", + "prost", + "tonic", +] + [[package]] name = "tower" version = "0.5.2" @@ -8273,9 +8443,12 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", + "indexmap 2.12.0", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 25a464d28..d90e5742a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,10 @@ comrak = { version = "0.47.0", default-features = false } syntect = { version = "5.0.0", default-features = false, features = ["parsing", "html", "dump-load", "regex-onig"] } toml = "0.9.2" prometheus = { version = "0.14.0", default-features = false } +opentelemetry = "0.31.0" +opentelemetry-otlp = { version = "0.31.0", features = ["grpc-tonic", "metrics"] } +opentelemetry-resource-detectors = "0.10.0" +opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio"] } rustwide = { version = "0.20.0", features = ["unstable-toolchain-ci", "unstable"] } mime_guess = "2" zstd = "0.13.0" @@ -123,6 +127,7 @@ rand = "0.9" mockito = "1.0.2" test-case = "3.0.0" tower = { version = "0.5.1", features = ["util"] } +opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio", "testing"] } aws-smithy-types = "1.0.1" aws-smithy-runtime = {version = "1.0.1", features = ["client", "test-util"]} aws-smithy-http = "0.62.0" diff --git a/docker-compose.yml b/docker-compose.yml index b9b1c1261..eea43d88f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,6 +22,7 @@ # # optional profile: `metrics`: # * `prometheus` -> configured prometheus instance +# * `opentelemetry` -> a debug opentelemetry receiver # # optional profile: `full`: all of the above. # @@ -296,6 +297,23 @@ services: - metrics - full + opentelemetry: + build: + context: ./dockerfiles + dockerfile: ./Dockerfile-opentelemetry + <<: *docker-cache + ports: + - "127.0.0.1:4317:4317" + healthcheck: + <<: *healthcheck-interval + test: curl --silent --fail http://localhost:13133/health + + profiles: + # we rarely need to test with actual prometheus, so always running + # it is a waste. + - metrics + - full + gui_tests: platform: "linux/amd64" build: diff --git a/dockerfiles/Dockerfile-opentelemetry b/dockerfiles/Dockerfile-opentelemetry new file mode 100644 index 000000000..1396d4b3a --- /dev/null +++ b/dockerfiles/Dockerfile-opentelemetry @@ -0,0 +1,2 @@ +FROM otel/opentelemetry-collector-contrib:0.139.0 AS base +COPY collector-config-dev.yaml /etc/otelcol-contrib/config.yaml diff --git a/dockerfiles/collector-config-dev.yaml b/dockerfiles/collector-config-dev.yaml new file mode 100644 index 000000000..bbd75b47c --- /dev/null +++ b/dockerfiles/collector-config-dev.yaml @@ -0,0 +1,30 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +exporters: + debug: + # the debug exporter will just print everything to the console + verbosity: detailed + +extensions: + # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/09d7cdeb075f51bf90a604c95b106521bba9962e/extension/healthcheckextension + health_check: + endpoint: "0.0.0.0:13133" + +service: + pipelines: + traces: + receivers: [otlp] + processors: [] + exporters: [debug] + metrics: + receivers: [otlp] + processors: [] + exporters: [debug] + logs: + receivers: [otlp] + processors: [] + exporters: [debug] diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 0eeefd036..f5bdd5a2e 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -5,8 +5,9 @@ use docs_rs::{ db::{self, CrateId, Overrides, ReleaseId, add_path_into_database, types::version::Version}, start_background_metrics_webserver, start_web_server, utils::{ - ConfigName, get_config, get_crate_pattern_and_priority, list_crate_priorities, - queue_builder, remove_crate_priority, set_config, set_crate_priority, + ConfigName, daemon::start_background_service_metric_collector, get_config, + get_crate_pattern_and_priority, list_crate_priorities, queue_builder, + remove_crate_priority, set_config, set_crate_priority, }, }; use futures_util::StreamExt; @@ -199,6 +200,10 @@ impl CommandLine { docs_rs::utils::daemon::start_background_queue_rebuild(&ctx)?; } + // When people run the services separately, we assume that we can collect service + // metrics from the registry watcher, which should only run once, and all the time. + start_background_service_metric_collector(&ctx)?; + start_background_metrics_webserver(Some(metric_server_socket_addr), &ctx)?; ctx.runtime.block_on(async move { diff --git a/src/build_queue.rs b/src/build_queue.rs index dafc84d1b..4ee79b5a4 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -4,19 +4,39 @@ use crate::{ CrateId, Pool, delete_crate, delete_version, types::version::Version, update_latest_version_id, }, - docbuilder::PackageKind, + docbuilder::{BuilderMetrics, PackageKind}, error::Result, + metrics::otel::AnyMeterProvider, storage::AsyncStorage, utils::{ConfigName, get_config, get_crate_priority, report_error, retry, set_config}, }; use anyhow::Context as _; use fn_error_context::context; use futures_util::{StreamExt, stream::TryStreamExt}; +use opentelemetry::metrics::Counter; use sqlx::Connection as _; -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashMap, sync::Arc, time::Instant}; use tokio::runtime; use tracing::{debug, error, info, instrument, warn}; +#[derive(Debug)] +struct BuildQueueMetrics { + queued_builds: Counter, +} + +impl BuildQueueMetrics { + fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("build_queue"); + const PREFIX: &str = "docsrs.build_queue"; + Self { + queued_builds: meter + .u64_counter(format!("{PREFIX}.queued_builds")) + .with_unit("1") + .build(), + } + } +} + /// The static priority for background rebuilds. /// Used when queueing rebuilds, and when rendering them /// collapsed in the UI. @@ -40,6 +60,8 @@ pub struct AsyncBuildQueue { storage: Arc, pub(crate) db: Pool, metrics: Arc, + queue_metrics: BuildQueueMetrics, + builder_metrics: Arc, max_attempts: i32, } @@ -49,6 +71,7 @@ impl AsyncBuildQueue { metrics: Arc, config: Arc, storage: Arc, + otel_meter_provider: &AnyMeterProvider, ) -> Self { AsyncBuildQueue { max_attempts: config.build_attempts.into(), @@ -56,9 +79,15 @@ impl AsyncBuildQueue { db, metrics, storage, + queue_metrics: BuildQueueMetrics::new(otel_meter_provider), + builder_metrics: Arc::new(BuilderMetrics::new(otel_meter_provider)), } } + pub fn builder_metrics(&self) -> Arc { + self.builder_metrics.clone() + } + pub async fn last_seen_reference(&self) -> Result> { let mut conn = self.db.get_async().await?; if let Some(value) = @@ -333,6 +362,7 @@ impl AsyncBuildQueue { release.name, release.version ); self.metrics.queued_builds.inc(); + self.queue_metrics.queued_builds.add(1, &[]); crates_added += 1; } Err(err) => report_error(&err), @@ -540,13 +570,17 @@ impl BuildQueue { None => return Ok(()), }; - let res = self - .inner - .metrics - .build_time - .observe_closure_duration(|| f(&to_process)); + let res = { + let instant = Instant::now(); + let res = f(&to_process); + let elapsed = instant.elapsed().as_secs_f64(); + self.inner.metrics.build_time.observe(elapsed); + self.inner.builder_metrics.build_time.record(elapsed, &[]); + res + }; self.inner.metrics.total_builds.inc(); + self.inner.builder_metrics.total_builds.add(1, &[]); if let Err(err) = self.runtime.block_on(cdn::queue_crate_invalidation( &mut transaction, &self.inner.config, @@ -571,6 +605,7 @@ impl BuildQueue { if attempt >= self.inner.max_attempts { self.inner.metrics.failed_builds.inc(); + self.inner.builder_metrics.failed_builds.add(1, &[]); } Ok(()) }; @@ -731,6 +766,7 @@ mod tests { use super::*; use crate::test::{FakeBuild, TestEnvironment, V1, V2}; use chrono::Utc; + use std::time::Duration; #[tokio::test(flavor = "multi_thread")] @@ -1051,6 +1087,34 @@ mod tests { assert_eq!(metrics.failed_builds.get(), 1); assert_eq!(metrics.build_time.get_sample_count(), 9); + let collected_metrics = env.collected_metrics(); + + assert_eq!( + collected_metrics + .get_metric("builder", "docsrs.builder.total_builds")? + .get_u64_counter() + .value(), + 9 + ); + + assert_eq!( + collected_metrics + .get_metric("builder", "docsrs.builder.failed_builds")? + .get_u64_counter() + .value(), + 1 + ); + + assert_eq!( + dbg!( + collected_metrics + .get_metric("builder", "docsrs.builder.build_time")? + .get_f64_histogram() + .count() + ), + 9 + ); + // no invalidations were run since we don't have a distribution id configured assert!( env.runtime() diff --git a/src/cdn.rs b/src/cdn.rs index edae997d4..45b2e1db6 100644 --- a/src/cdn.rs +++ b/src/cdn.rs @@ -1,4 +1,4 @@ -use crate::{Config, InstanceMetrics, metrics::duration_to_seconds, utils::report_error}; +use crate::{Config, InstanceMetrics, metrics::otel::AnyMeterProvider, utils::report_error}; use anyhow::{Context, Error, Result, anyhow, bail}; use aws_config::BehaviorVersion; use aws_sdk_cloudfront::{ @@ -9,6 +9,7 @@ use aws_sdk_cloudfront::{ }; use chrono::{DateTime, Utc}; use futures_util::stream::TryStreamExt; +use opentelemetry::{KeyValue, metrics::Histogram}; use serde::Serialize; use sqlx::Connection as _; use std::{ @@ -24,6 +25,29 @@ use uuid::Uuid; /// triggered invalidations const MAX_CLOUDFRONT_WILDCARD_INVALIDATIONS: i32 = 13; +#[derive(Debug)] +pub struct CdnMetrics { + invalidation_time: Histogram, + queue_time: Histogram, +} + +impl CdnMetrics { + pub fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("cdn"); + const PREFIX: &str = "docsrs.cdn"; + Self { + invalidation_time: meter + .f64_histogram(format!("{PREFIX}.invalidation_time")) + .with_unit("s") + .build(), + queue_time: meter + .f64_histogram(format!("{PREFIX}.queue_time")) + .with_unit("s") + .build(), + } + } +} + #[derive(Debug, EnumString)] pub enum CdnKind { #[strum(ascii_case_insensitive)] @@ -302,6 +326,7 @@ impl CdnBackend { pub(crate) async fn full_invalidation( cdn: &CdnBackend, metrics: &InstanceMetrics, + otel_metrics: &CdnMetrics, conn: &mut sqlx::PgConnection, distribution_id: &str, ) -> Result<()> { @@ -319,11 +344,15 @@ pub(crate) async fn full_invalidation( .await? { if let Ok(duration) = (now - row.queued).to_std() { - // This can only fail when the duration is negative, which can't happen anyways + let duration = duration.as_secs_f64(); metrics .cdn_queue_time .with_label_values(&[distribution_id]) - .observe(duration_to_seconds(duration)); + .observe(duration); + otel_metrics.queue_time.record( + duration, + &[KeyValue::new("distribution", distribution_id.to_string())], + ); } } @@ -359,6 +388,7 @@ pub(crate) async fn handle_queued_invalidation_requests( config: &Config, cdn: &CdnBackend, metrics: &InstanceMetrics, + otel_metrics: &CdnMetrics, conn: &mut sqlx::PgConnection, distribution_id: &str, ) -> Result<()> { @@ -422,11 +452,15 @@ pub(crate) async fn handle_queued_invalidation_requests( .await? { if let Ok(duration) = (now - row.created_in_cdn.expect("this is always Some")).to_std() { - // This can only fail when the duration is negative, which can't happen anyways + let duration = duration.as_secs_f64(); metrics .cdn_invalidation_time .with_label_values(&[distribution_id]) - .observe(duration_to_seconds(duration)); + .observe(duration); + otel_metrics.invalidation_time.record( + duration, + &[KeyValue::new("distribution", distribution_id.to_string())], + ); } } let possible_path_invalidations: i32 = @@ -454,7 +488,7 @@ pub(crate) async fn handle_queued_invalidation_requests( .await? && (now - min_queued).to_std().unwrap_or_default() >= config.cdn_max_queued_age { - full_invalidation(cdn, metrics, conn, distribution_id).await?; + full_invalidation(cdn, metrics, otel_metrics, conn, distribution_id).await?; return Ok(()); } @@ -480,11 +514,15 @@ pub(crate) async fn handle_queued_invalidation_requests( path_patterns.push(row.path_pattern); if let Ok(duration) = (now - row.queued).to_std() { - // This can only fail when the duration is negative, which can't happen anyways + let duration = duration.as_secs_f64(); metrics .cdn_queue_time .with_label_values(&[distribution_id]) - .observe(duration_to_seconds(duration)); + .observe(duration); + otel_metrics.queue_time.record( + duration, + &[KeyValue::new("distribution", distribution_id.to_string())], + ); } } @@ -701,6 +739,10 @@ mod tests { Ok(()) } + fn otel_metrics(env: &TestEnvironment) -> CdnMetrics { + CdnMetrics::new(env.meter_provider()) + } + #[tokio::test(flavor = "multi_thread")] async fn create_cloudfront() -> Result<()> { let env = TestEnvironment::with_config( @@ -817,12 +859,14 @@ mod tests { let cdn = env.cdn(); let config = env.config(); + let metrics = otel_metrics(&env); // now handle the queued invalidations handle_queued_invalidation_requests( config, cdn, env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) @@ -831,6 +875,7 @@ mod tests { config, cdn, env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_STATIC, ) @@ -919,12 +964,14 @@ mod tests { let cdn = env.cdn(); let config = env.config(); + let metrics = otel_metrics(&env); // now handle the queued invalidations handle_queued_invalidation_requests( config, cdn, env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) @@ -933,6 +980,7 @@ mod tests { config, cdn, env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_STATIC, ) @@ -966,6 +1014,7 @@ mod tests { config, cdn, env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) @@ -974,6 +1023,7 @@ mod tests { config, cdn, env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_STATIC, ) @@ -1034,11 +1084,14 @@ mod tests { // queue an invalidation queue_crate_invalidation(&mut conn, env.config(), "krate").await?; + let metrics = otel_metrics(&env); + // handle the queued invalidations handle_queued_invalidation_requests( env.config(), env.cdn(), env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) @@ -1093,11 +1146,14 @@ mod tests { // queue an invalidation queue_crate_invalidation(&mut conn, env.config(), "krate").await?; + let metrics = otel_metrics(&env); + // handle the queued invalidations handle_queued_invalidation_requests( env.config(), env.cdn(), env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) @@ -1130,6 +1186,7 @@ mod tests { env.config(), env.cdn(), env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) @@ -1157,6 +1214,7 @@ mod tests { let env = TestEnvironment::with_config(config_with_cdn().build()?).await?; let cdn = env.cdn(); + let metrics = otel_metrics(&env); let mut conn = env.async_db().async_conn().await; // no invalidation is queued @@ -1171,6 +1229,7 @@ mod tests { env.config(), env.cdn(), env.instance_metrics(), + &metrics, &mut conn, DISTRIBUTION_ID_WEB, ) diff --git a/src/config.rs b/src/config.rs index 8a46714ac..246736b94 100644 --- a/src/config.rs +++ b/src/config.rs @@ -130,6 +130,9 @@ pub struct Config { // automatic rebuild configuration pub(crate) max_queued_rebuilds: Option, + + // opentelemetry endpoint to send OTLP to + pub(crate) opentelemetry_endpoint: Option, } impl Config { @@ -173,6 +176,7 @@ impl Config { "DOCSRS_REGISTRY_API_HOST", "https://crates.io".parse().unwrap(), )?) + .opentelemetry_endpoint(maybe_env("OTEL_EXPORTER_OTLP_ENDPOINT")?) .prefix(prefix.clone()) .database_url(require_env("DOCSRS_DATABASE_URL")?) .max_pool_size(env("DOCSRS_MAX_POOL_SIZE", 90u32)?) diff --git a/src/context.rs b/src/context.rs index 631d3e583..d8640f268 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,9 +1,10 @@ -use crate::cdn::CdnBackend; -use crate::db::Pool; -use crate::repositories::RepositoryStatsUpdater; use crate::{ AsyncBuildQueue, AsyncStorage, BuildQueue, Config, InstanceMetrics, RegistryApi, ServiceMetrics, Storage, + cdn::CdnBackend, + db::Pool, + metrics::otel::{AnyMeterProvider, get_meter_provider}, + repositories::RepositoryStatsUpdater, }; use anyhow::Result; use std::sync::Arc; @@ -22,25 +23,29 @@ pub struct Context { pub registry_api: Arc, pub repository_stats_updater: Arc, pub runtime: runtime::Handle, + pub meter_provider: AnyMeterProvider, } impl Context { /// Create a new context environment from the given configuration. - #[cfg(not(test))] pub async fn from_config(config: Config) -> Result { let instance_metrics = Arc::new(InstanceMetrics::new()?); - let pool = Pool::new(&config, instance_metrics.clone()).await?; - Self::from_config_with_metrics_and_pool(config, instance_metrics, pool).await + let meter_provider = get_meter_provider(&config)?; + let pool = Pool::new(&config, instance_metrics.clone(), &meter_provider).await?; + Self::from_config_with_metrics_and_pool(config, instance_metrics, meter_provider, pool) + .await } /// Create a new context environment from the given configuration, for running tests. #[cfg(test)] - pub async fn from_config( + pub async fn from_test_config( config: Config, instance_metrics: Arc, + meter_provider: AnyMeterProvider, pool: Pool, ) -> Result { - Self::from_config_with_metrics_and_pool(config, instance_metrics, pool).await + Self::from_config_with_metrics_and_pool(config, instance_metrics, meter_provider, pool) + .await } /// private function for context environment generation, allows passing in a @@ -49,12 +54,19 @@ impl Context { async fn from_config_with_metrics_and_pool( config: Config, instance_metrics: Arc, + meter_provider: AnyMeterProvider, pool: Pool, ) -> Result { let config = Arc::new(config); let async_storage = Arc::new( - AsyncStorage::new(pool.clone(), instance_metrics.clone(), config.clone()).await?, + AsyncStorage::new( + pool.clone(), + instance_metrics.clone(), + config.clone(), + &meter_provider, + ) + .await?, ); let async_build_queue = Arc::new(AsyncBuildQueue::new( @@ -62,11 +74,13 @@ impl Context { instance_metrics.clone(), config.clone(), async_storage.clone(), + &meter_provider, )); let cdn = Arc::new(CdnBackend::new(&config).await); let runtime = runtime::Handle::current(); + // sync wrappers around build-queue & storage async resources let build_queue = Arc::new(BuildQueue::new(runtime.clone(), async_build_queue.clone())); let storage = Arc::new(Storage::new(async_storage.clone(), runtime.clone())); @@ -87,6 +101,7 @@ impl Context { repository_stats_updater: Arc::new(RepositoryStatsUpdater::new(&config, pool)), runtime, config, + meter_provider, }) } } diff --git a/src/db/pool.rs b/src/db/pool.rs index d6da89dc9..5e607a6b8 100644 --- a/src/db/pool.rs +++ b/src/db/pool.rs @@ -1,6 +1,6 @@ -use crate::Config; -use crate::metrics::InstanceMetrics; +use crate::{Config, metrics::InstanceMetrics, metrics::otel::AnyMeterProvider}; use futures_util::{future::BoxFuture, stream::BoxStream}; +use opentelemetry::metrics::{Counter, ObservableGauge}; use sqlx::{Executor, postgres::PgPoolOptions}; use std::{ ops::{Deref, DerefMut}, @@ -12,20 +12,77 @@ use tracing::debug; const DEFAULT_SCHEMA: &str = "public"; +#[derive(Debug)] +struct PoolMetrics { + failed_connections: Counter, + _idle_connections: ObservableGauge, + _used_connections: ObservableGauge, + _max_connections: ObservableGauge, +} + +impl PoolMetrics { + fn new(pool: sqlx::PgPool, meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("pool"); + const PREFIX: &str = "docsrs.db.pool"; + Self { + failed_connections: meter + .u64_counter(format!("{PREFIX}.failed_connections")) + .with_unit("1") + .build(), + _idle_connections: meter + .u64_observable_gauge(format!("{PREFIX}.idle_connections")) + .with_unit("1") + .with_callback({ + let pool = pool.clone(); + move |observer| { + observer.observe(pool.num_idle() as u64, &[]); + } + }) + .build(), + _used_connections: meter + .u64_observable_gauge(format!("{PREFIX}.used_connections")) + .with_unit("1") + .with_callback({ + let pool = pool.clone(); + move |observer| { + let used = pool.size() as u64 - pool.num_idle() as u64; + observer.observe(used, &[]); + } + }) + .build(), + _max_connections: meter + .u64_observable_gauge(format!("{PREFIX}.max_connections")) + .with_unit("1") + .with_callback({ + let pool = pool.clone(); + move |observer| { + observer.observe(pool.size() as u64, &[]); + } + }) + .build(), + } + } +} + #[derive(Debug, Clone)] pub struct Pool { async_pool: sqlx::PgPool, runtime: runtime::Handle, metrics: Arc, max_size: u32, + otel_metrics: Arc, } impl Pool { - pub async fn new(config: &Config, metrics: Arc) -> Result { + pub async fn new( + config: &Config, + metrics: Arc, + otel_meter_provider: &AnyMeterProvider, + ) -> Result { debug!( "creating database pool (if this hangs, consider running `docker-compose up -d db s3`)" ); - Self::new_inner(config, metrics, DEFAULT_SCHEMA).await + Self::new_inner(config, metrics, DEFAULT_SCHEMA, otel_meter_provider).await } #[cfg(test)] @@ -33,14 +90,16 @@ impl Pool { config: &Config, metrics: Arc, schema: &str, + otel_meter_provider: &AnyMeterProvider, ) -> Result { - Self::new_inner(config, metrics, schema).await + Self::new_inner(config, metrics, schema, otel_meter_provider).await } async fn new_inner( config: &Config, metrics: Arc, schema: &str, + otel_meter_provider: &AnyMeterProvider, ) -> Result { let acquire_timeout = Duration::from_secs(30); let max_lifetime = Duration::from_secs(30 * 60); @@ -76,10 +135,11 @@ impl Pool { .map_err(PoolError::AsyncPoolCreationFailed)?; Ok(Pool { - async_pool, + async_pool: async_pool.clone(), metrics, runtime: runtime::Handle::current(), max_size: config.max_pool_size, + otel_metrics: Arc::new(PoolMetrics::new(async_pool, otel_meter_provider)), }) } @@ -91,6 +151,7 @@ impl Pool { }), Err(err) => { self.metrics.failed_db_connections.inc(); + self.otel_metrics.failed_connections.add(1, &[]); Err(PoolError::AsyncClientError(err)) } } diff --git a/src/docbuilder/mod.rs b/src/docbuilder/mod.rs index f09da2d71..430d3cb64 100644 --- a/src/docbuilder/mod.rs +++ b/src/docbuilder/mod.rs @@ -3,7 +3,9 @@ mod rustwide_builder; pub(crate) use self::limits::Limits; pub(crate) use self::rustwide_builder::DocCoverage; -pub use self::rustwide_builder::{BuildPackageSummary, PackageKind, RustwideBuilder}; +pub use self::rustwide_builder::{ + BuildPackageSummary, BuilderMetrics, PackageKind, RustwideBuilder, +}; #[cfg(test)] pub use self::rustwide_builder::RUSTDOC_JSON_COMPRESSION_ALGORITHMS; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 83e2f2639..fee8ae897 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -11,6 +11,7 @@ use crate::{ }, docbuilder::Limits, error::Result, + metrics::otel::AnyMeterProvider, repositories::RepositoryStatsUpdater, storage::{ CompressionAlgorithm, RustdocJsonFormatVersion, compress, get_file_list, @@ -24,6 +25,7 @@ use crate::{ use anyhow::{Context as _, Error, anyhow, bail}; use docsrs_metadata::{BuildTargets, DEFAULT_TARGETS, HOST_TARGET, Metadata}; use itertools::Itertools as _; +use opentelemetry::metrics::{Counter, Histogram}; use regex::Regex; use rustwide::{ AlternativeRegistry, Build, Crate, Toolchain, Workspace, WorkspaceBuilder, @@ -113,6 +115,50 @@ pub enum PackageKind<'a> { Registry(&'a str), } +#[derive(Debug)] +pub struct BuilderMetrics { + pub total_builds: Counter, + pub build_time: Histogram, + pub successful_builds: Counter, + pub failed_builds: Counter, + pub non_library_builds: Counter, + pub documentation_size: Histogram, +} + +impl BuilderMetrics { + pub fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("builder"); + const PREFIX: &str = "docsrs.builder"; + Self { + failed_builds: meter + .u64_counter(format!("{PREFIX}.failed_builds")) + .with_unit("1") + .build(), + build_time: meter + .f64_histogram(format!("{PREFIX}.build_time")) + .with_unit("s") + .build(), + total_builds: meter + .u64_counter(format!("{PREFIX}.total_builds")) + .with_unit("1") + .build(), + successful_builds: meter + .u64_counter(format!("{PREFIX}.successful_builds")) + .with_unit("1") + .build(), + non_library_builds: meter + .u64_counter(format!("{PREFIX}.non_library_builds")) + .with_unit("1") + .build(), + documentation_size: meter + .u64_histogram(format!("{PREFIX}.documentation_size")) + .with_unit("bytes") + .with_description("size of the generated documentation in bytes") + .build(), + } + } +} + pub struct RustwideBuilder { workspace: Workspace, toolchain: Toolchain, @@ -125,6 +171,7 @@ pub struct RustwideBuilder { registry_api: Arc, repository_stats_updater: Arc, workspace_initialize_time: Instant, + builder_metrics: Arc, } impl RustwideBuilder { @@ -146,6 +193,7 @@ impl RustwideBuilder { registry_api: context.registry_api.clone(), repository_stats_updater: context.repository_stats_updater.clone(), workspace_initialize_time: Instant::now(), + builder_metrics: context.async_build_queue.builder_metrics(), }) } @@ -732,6 +780,7 @@ impl RustwideBuilder { self.metrics .documentation_size .observe(documentation_size as f64 / 1024.0 / 1024.0); + self.builder_metrics.documentation_size.record(documentation_size, &[]); algs.insert(new_alg); Some(documentation_size) } else { @@ -766,10 +815,13 @@ impl RustwideBuilder { if res.result.successful { self.metrics.successful_builds.inc(); + self.builder_metrics.successful_builds.add(1, &[]); } else if res.cargo_metadata.root().is_library() { self.metrics.failed_builds.inc(); + self.builder_metrics.failed_builds.add(1, &[]); } else { self.metrics.non_library_builds.inc(); + self.builder_metrics.non_library_builds.add(1, &[]); } let release_data = if !is_local { diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 41c7b5d14..a38b68f34 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -1,5 +1,7 @@ #[macro_use] mod macros; +pub(crate) mod otel; +pub(crate) mod service; use self::macros::MetricFromOpts; use crate::{ @@ -7,7 +9,7 @@ use crate::{ db::{CrateId, Pool, ReleaseId}, target::TargetAtom, }; -use anyhow::Error; +use anyhow::{Error, Result}; use dashmap::DashMap; use prometheus::proto::MetricFamily; use std::{ @@ -139,13 +141,6 @@ metrics! { namespace: "docsrs", } -/// Converts a `Duration` to seconds, used by prometheus internally -#[inline] -pub(crate) fn duration_to_seconds(d: Duration) -> f64 { - let nanos = f64::from(d.subsec_nanos()) / 1e9; - d.as_secs() as f64 + nanos -} - #[derive(Debug, Default)] pub(crate) struct RecentlyAccessedReleases { crates: DashMap, diff --git a/src/metrics/otel.rs b/src/metrics/otel.rs new file mode 100644 index 000000000..595f15f79 --- /dev/null +++ b/src/metrics/otel.rs @@ -0,0 +1,99 @@ +use crate::Config; +use anyhow::Result; +use opentelemetry::{ + InstrumentationScope, + metrics::{InstrumentProvider, Meter, MeterProvider}, +}; +use opentelemetry_otlp::{Protocol, WithExportConfig as _}; +use opentelemetry_resource_detectors::{OsResourceDetector, ProcessResourceDetector}; +use opentelemetry_sdk::{Resource, error::OTelSdkResult}; +use std::{sync::Arc, time::Duration}; +use tracing::info; + +/// extend the `MeterProvider` trait so we also expose +/// the `force_flush` method for tests. +pub trait MeterProviderWithExt: MeterProvider { + fn force_flush(&self) -> OTelSdkResult; +} + +pub type AnyMeterProvider = Arc; + +impl MeterProviderWithExt for opentelemetry_sdk::metrics::SdkMeterProvider { + fn force_flush(&self) -> OTelSdkResult { + self.force_flush() + } +} + +/// opentelemetry metric provider setup, +/// if no endpoint is configured, use a no-op provider +pub(crate) fn get_meter_provider(config: &Config) -> Result { + if let Some(ref endpoint) = config.opentelemetry_endpoint { + let endpoint = endpoint.to_string(); + info!(endpoint, "setting up OpenTelemetry metrics exporter"); + + let exporter = opentelemetry_otlp::MetricExporter::builder() + .with_tonic() + .with_endpoint(endpoint.to_string()) + .with_protocol(Protocol::Grpc) + .with_timeout(Duration::from_secs(3)) + .build()?; + + let provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder() + .with_periodic_exporter(exporter) + .with_resource( + Resource::builder() + .with_detector(Box::new(OsResourceDetector)) + .with_detector(Box::new(ProcessResourceDetector)) + .build(), + ) + .build(); + + Ok(Arc::new(provider)) + } else { + Ok(Arc::new(NoopMeterProvider::new())) + } +} + +/// A no-op instance of a `MetricProvider`, so we can avoid conditional +/// logic across the whole codebase. +/// +/// For now, copy/paste from opentelemetry-sdk, see +/// https://github.com/open-telemetry/opentelemetry-rust/pull/3111 +#[derive(Debug, Default)] +pub(crate) struct NoopMeterProvider { + _private: (), +} + +impl NoopMeterProvider { + /// Create a new no-op meter provider. + pub fn new() -> Self { + NoopMeterProvider { _private: () } + } +} + +impl MeterProvider for NoopMeterProvider { + fn meter_with_scope(&self, _scope: InstrumentationScope) -> Meter { + Meter::new(Arc::new(NoopMeter::new())) + } +} + +impl MeterProviderWithExt for NoopMeterProvider { + fn force_flush(&self) -> OTelSdkResult { + Ok(()) + } +} + +/// A no-op instance of a `Meter` +#[derive(Debug, Default)] +pub(crate) struct NoopMeter { + _private: (), +} + +impl NoopMeter { + /// Create a new no-op meter core. + pub(crate) fn new() -> Self { + NoopMeter { _private: () } + } +} + +impl InstrumentProvider for NoopMeter {} diff --git a/src/metrics/service.rs b/src/metrics/service.rs new file mode 100644 index 000000000..9483f47b9 --- /dev/null +++ b/src/metrics/service.rs @@ -0,0 +1,101 @@ +use crate::{AsyncBuildQueue, Config, cdn, metrics::otel::AnyMeterProvider}; +use anyhow::{Error, Result}; +use opentelemetry::{KeyValue, metrics::Gauge}; +use std::collections::HashSet; + +#[derive(Debug)] +pub struct OtelServiceMetrics { + pub queued_crates_count: Gauge, + pub prioritized_crates_count: Gauge, + pub failed_crates_count: Gauge, + pub queue_is_locked: Gauge, + pub queued_crates_count_by_priority: Gauge, + pub queued_cdn_invalidations_by_distribution: Gauge, +} + +impl OtelServiceMetrics { + pub fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("service"); + const PREFIX: &str = "docsrs.service"; + Self { + queued_crates_count: meter + .u64_gauge(format!("{PREFIX}.queued_crates_count")) + .with_unit("1") + .build(), + prioritized_crates_count: meter + .u64_gauge(format!("{PREFIX}.prioritized_crates_count")) + .with_unit("1") + .build(), + failed_crates_count: meter + .u64_gauge(format!("{PREFIX}.failed_crates_count")) + .with_unit("1") + .build(), + queue_is_locked: meter + .u64_gauge(format!("{PREFIX}.queue_is_locked")) + .with_unit("1") + .build(), + queued_crates_count_by_priority: meter + .u64_gauge(format!("{PREFIX}.queued_crates_count_by_priority")) + .with_unit("1") + .build(), + queued_cdn_invalidations_by_distribution: meter + .u64_gauge(format!("{PREFIX}.queued_cdn_invalidations_by_distribution")) + .with_unit("1") + .build(), + } + } + + pub(crate) async fn gather( + &self, + conn: &mut sqlx::PgConnection, + queue: &AsyncBuildQueue, + config: &Config, + ) -> Result<(), Error> { + self.queue_is_locked + .record(queue.is_locked().await? as u64, &[]); + self.queued_crates_count + .record(queue.pending_count().await? as u64, &[]); + self.prioritized_crates_count + .record(queue.prioritized_count().await? as u64, &[]); + + let queue_pending_count = queue.pending_count_by_priority().await?; + + // gauges keep their old value per label when it's not removed, reset to zero or updated. + // When a priority is used at least once, it would be kept in the metric and the last + // value would be remembered. `pending_count_by_priority` returns only the priorities + // that are currently in the queue, which means when the tasks for a priority are + // finished, we wouldn't update the metric anymore, which means a wrong value is + // in the metric. + // + // the only solution is to explicitly set the value to be zero, for all common priorities, + // when there are no items in the queue with that priority. + // So we create a set of all priorities we want to be explicitly zeroed, combined + // with the actual priorities in the queue. + let all_priorities: HashSet = + queue_pending_count.keys().copied().chain(0..=20).collect(); + + for priority in all_priorities { + let count = queue_pending_count.get(&priority).unwrap_or(&0); + + self.queued_crates_count_by_priority.record( + *count as u64, + &[KeyValue::new("priority", priority.to_string())], + ); + } + + for (distribution_id, count) in + cdn::queued_or_active_crate_invalidation_count_by_distribution(&mut *conn, config) + .await? + { + self.queued_cdn_invalidations_by_distribution.record( + count as u64, + &[KeyValue::new("distribution", distribution_id)], + ); + } + + self.failed_crates_count + .record(queue.failed_count().await? as u64, &[]); + + Ok(()) + } +} diff --git a/src/storage/database.rs b/src/storage/database.rs index 275b30409..5c3eb06a7 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -1,4 +1,4 @@ -use super::{Blob, FileRange, StreamingBlob}; +use super::{Blob, FileRange, StorageMetrics, StreamingBlob}; use crate::{InstanceMetrics, db::Pool, error::Result}; use chrono::{DateTime, Utc}; use futures_util::stream::{Stream, TryStreamExt}; @@ -8,11 +8,20 @@ use std::{io, sync::Arc}; pub(crate) struct DatabaseBackend { pool: Pool, metrics: Arc, + otel_metrics: StorageMetrics, } impl DatabaseBackend { - pub(crate) fn new(pool: Pool, metrics: Arc) -> Self { - Self { pool, metrics } + pub(crate) fn new( + pool: Pool, + metrics: Arc, + otel_metrics: StorageMetrics, + ) -> Self { + Self { + pool, + metrics, + otel_metrics, + } } pub(super) async fn exists(&self, path: &str) -> Result { @@ -144,6 +153,7 @@ impl DatabaseBackend { ) .execute(&mut *trans).await?; self.metrics.uploaded_files_total.inc(); + self.otel_metrics.uploaded_files.add(1, &[]); } trans.commit().await?; Ok(()) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8b2400c1d..a047c9f31 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -18,6 +18,7 @@ use crate::{ types::version::Version, }, error::Result, + metrics::otel::AnyMeterProvider, utils::spawn_blocking, }; use anyhow::{anyhow, bail}; @@ -26,6 +27,7 @@ use dashmap::DashMap; use fn_error_context::context; use futures_util::{TryStreamExt as _, stream::BoxStream}; use mime::Mime; +use opentelemetry::metrics::Counter; use path_slash::PathExt; use std::{ fmt, @@ -200,6 +202,24 @@ impl std::str::FromStr for StorageKind { } } +#[derive(Debug)] +struct StorageMetrics { + uploaded_files: Counter, +} + +impl StorageMetrics { + fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("storage"); + const PREFIX: &str = "docsrs.storage"; + Self { + uploaded_files: meter + .u64_counter(format!("{PREFIX}.uploaded_files")) + .with_unit("1") + .build(), + } + } +} + enum StorageBackend { Database(DatabaseBackend), S3(Box), @@ -217,15 +237,18 @@ impl AsyncStorage { pool: Pool, metrics: Arc, config: Arc, + otel_meter_provider: &AnyMeterProvider, ) -> Result { + let otel_metrics = StorageMetrics::new(otel_meter_provider); + Ok(Self { backend: match config.storage_backend { StorageKind::Database => { - StorageBackend::Database(DatabaseBackend::new(pool, metrics)) - } - StorageKind::S3 => { - StorageBackend::S3(Box::new(S3Backend::new(metrics, &config).await?)) + StorageBackend::Database(DatabaseBackend::new(pool, metrics, otel_metrics)) } + StorageKind::S3 => StorageBackend::S3(Box::new( + S3Backend::new(metrics, &config, otel_metrics).await?, + )), }, config, locks: DashMap::new(), @@ -1786,6 +1809,8 @@ mod test { /// This is the preferred way to test whether backends work. #[cfg(test)] mod backend_tests { + use crate::test::TestEnvironment; + use super::*; fn get_file_info(files: &[FileEntry], path: impl AsRef) -> Option<&FileEntry> { @@ -1997,7 +2022,11 @@ mod backend_tests { Ok(()) } - fn test_store_blobs(storage: &Storage, metrics: &InstanceMetrics) -> Result<()> { + fn test_store_blobs( + env: &TestEnvironment, + storage: &Storage, + metrics: &InstanceMetrics, + ) -> Result<()> { const NAMES: &[&str] = &[ "a", "b", @@ -2027,6 +2056,16 @@ mod backend_tests { assert_eq!(NAMES.len(), metrics.uploaded_files_total.get() as usize); + let collected_metrics = env.collected_metrics(); + + assert_eq!( + collected_metrics + .get_metric("storage", "docsrs.storage.uploaded_files")? + .get_u64_counter() + .value(), + NAMES.len() as u64, + ); + Ok(()) } @@ -2037,7 +2076,11 @@ mod backend_tests { Ok(()) } - fn test_store_all_in_archive(storage: &Storage, metrics: &InstanceMetrics) -> Result<()> { + fn test_store_all_in_archive( + _env: &TestEnvironment, + storage: &Storage, + metrics: &InstanceMetrics, + ) -> Result<()> { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-archive-test") .tempdir()?; @@ -2103,7 +2146,11 @@ mod backend_tests { Ok(()) } - fn test_store_all(storage: &Storage, metrics: &InstanceMetrics) -> Result<()> { + fn test_store_all( + _env: &TestEnvironment, + storage: &Storage, + metrics: &InstanceMetrics, + ) -> Result<()> { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-test") .tempdir()?; @@ -2282,7 +2329,7 @@ mod backend_tests { #[test] fn $test() -> anyhow::Result<()> { let env = get_env()?; - super::$test(&*env.storage(), &*env.instance_metrics()) + super::$test(&env, &*env.storage(), &*env.instance_metrics()) } )* }; diff --git a/src/storage/s3.rs b/src/storage/s3.rs index fcf257d4b..364cd5d6c 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -1,4 +1,4 @@ -use super::{Blob, FileRange, StreamingBlob}; +use super::{Blob, FileRange, StorageMetrics, StreamingBlob}; use crate::{Config, InstanceMetrics}; use anyhow::{Context as _, Error}; use async_stream::try_stream; @@ -73,12 +73,17 @@ pub(super) struct S3Backend { client: Client, bucket: String, metrics: Arc, + otel_metrics: StorageMetrics, #[cfg(test)] temporary: bool, } impl S3Backend { - pub(super) async fn new(metrics: Arc, config: &Config) -> Result { + pub(super) async fn new( + metrics: Arc, + config: &Config, + otel_metrics: StorageMetrics, + ) -> Result { let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; let mut config_builder = aws_sdk_s3::config::Builder::from(&shared_config) .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) @@ -109,6 +114,7 @@ impl S3Backend { Ok(Self { client, metrics, + otel_metrics, bucket: config.s3_bucket.clone(), #[cfg(test)] temporary: config.s3_bucket_is_temporary, @@ -232,6 +238,7 @@ impl S3Backend { .send() .map_ok(|_| { self.metrics.uploaded_files_total.inc(); + self.otel_metrics.uploaded_files.add(1, &[]); }) .map_err(|err| { warn!("Failed to upload blob to S3: {:?}", err); diff --git a/src/test/mod.rs b/src/test/mod.rs index 3cde6e038..42012da77 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -1,4 +1,5 @@ mod fakes; +mod test_metrics; pub(crate) use self::fakes::{FakeBuild, fake_release_that_failed_before_build}; use crate::{ @@ -7,7 +8,9 @@ use crate::{ config::ConfigBuilder, db::{self, AsyncPoolClient, Pool, types::version::Version}, error::Result, + metrics::otel::AnyMeterProvider, storage::{AsyncStorage, Storage, StorageKind}, + test::test_metrics::CollectedMetrics, web::{build_axum_app, cache, page::TemplateData}, }; use anyhow::Context as _; @@ -15,7 +18,8 @@ use axum::body::Bytes; use axum::{Router, body::Body, http::Request, response::Response as AxumResponse}; use fn_error_context::context; use futures_util::stream::TryStreamExt; -use http_body_util::BodyExt; // for `collect` +use http_body_util::BodyExt; +use opentelemetry_sdk::metrics::{InMemoryMetricExporter, PeriodicReader}; use serde::de::DeserializeOwned; use sqlx::Connection as _; use std::{fs, future::Future, panic, rc::Rc, str::FromStr, sync::Arc}; @@ -317,6 +321,7 @@ pub(crate) struct TestEnvironment { db: TestDatabase, pub context: Context, owned_runtime: Option>, + collected_metrics: InMemoryMetricExporter, } pub(crate) fn init_logger() { @@ -362,15 +367,29 @@ impl TestEnvironment { // create index directory fs::create_dir_all(config.registry_index_path.clone())?; + let metric_exporter = InMemoryMetricExporter::default(); + let meter_provider: AnyMeterProvider = Arc::new( + opentelemetry_sdk::metrics::SdkMeterProvider::builder() + .with_reader(PeriodicReader::builder(metric_exporter.clone()).build()) + .build(), + ); + let instance_metrics = Arc::new(InstanceMetrics::new()?); - let test_db = TestDatabase::new(&config, instance_metrics.clone()) + let test_db = TestDatabase::new(&config, instance_metrics.clone(), &meter_provider) .await .context("can't initialize test database")?; Ok(Self { - context: Context::from_config(config, instance_metrics, test_db.pool().clone()).await?, + context: Context::from_test_config( + config, + instance_metrics, + meter_provider, + test_db.pool().clone(), + ) + .await?, db: test_db, owned_runtime: None, + collected_metrics: metric_exporter, }) } @@ -422,6 +441,10 @@ impl TestEnvironment { &self.context.instance_metrics } + pub(crate) fn meter_provider(&self) -> &AnyMeterProvider { + &self.context.meter_provider + } + pub(crate) fn runtime(&self) -> &runtime::Handle { &self.context.runtime } @@ -430,6 +453,11 @@ impl TestEnvironment { &self.db } + pub(crate) fn collected_metrics(&self) -> CollectedMetrics { + self.context.meter_provider.force_flush().unwrap(); + CollectedMetrics(self.collected_metrics.get_finished_metrics().unwrap()) + } + pub(crate) async fn web_app(&self) -> Router { let template_data = Arc::new(TemplateData::new(1).unwrap()); build_axum_app(&self.context, template_data) @@ -470,12 +498,16 @@ pub(crate) struct TestDatabase { } impl TestDatabase { - async fn new(config: &Config, metrics: Arc) -> Result { + async fn new( + config: &Config, + metrics: Arc, + otel_meter_provider: &AnyMeterProvider, + ) -> Result { // A random schema name is generated and used for the current connection. This allows each // test to create a fresh instance of the database to run within. let schema = format!("docs_rs_test_schema_{}", rand::random::()); - let pool = Pool::new_with_schema(config, metrics, &schema).await?; + let pool = Pool::new_with_schema(config, metrics, &schema, otel_meter_provider).await?; let mut conn = sqlx::PgConnection::connect(&config.database_url).await?; sqlx::query(&format!("CREATE SCHEMA {schema}")) diff --git a/src/test/test_metrics.rs b/src/test/test_metrics.rs new file mode 100644 index 000000000..02f52cee6 --- /dev/null +++ b/src/test/test_metrics.rs @@ -0,0 +1,93 @@ +use anyhow::{Result, anyhow}; +use derive_more::Deref; +use opentelemetry_sdk::metrics::data::{ + AggregatedMetrics, HistogramDataPoint, Metric, MetricData, ResourceMetrics, SumDataPoint, +}; + +/// small wrapper around the collected result of the InMemoryMetricExporter. +/// For convenience in tests. +#[derive(Debug)] +pub(crate) struct CollectedMetrics(pub(crate) Vec); + +impl CollectedMetrics { + pub(crate) fn get_metric<'a>( + &'a self, + scope: impl AsRef, + name: impl AsRef, + ) -> Result> { + let scope = scope.as_ref(); + let name = name.as_ref(); + + let scope_metrics = self + .0 + .iter() + .flat_map(|rm| rm.scope_metrics()) + .find(|sm| sm.scope().name() == scope) + .ok_or_else(|| { + anyhow!( + "Scope '{}' not found in collected metrics: {:?}", + scope, + self.0 + ) + })?; + + Ok(CollectedMetric( + scope_metrics + .metrics() + .find(|m| m.name() == name) + .ok_or_else(|| { + anyhow!( + "Metric '{}' not found in scope '{}': {:?}", + name, + scope, + scope_metrics, + ) + })?, + )) + } +} + +#[derive(Debug, Deref)] +pub(crate) struct CollectedMetric<'a>(&'a Metric); + +impl<'a> CollectedMetric<'a> { + pub(crate) fn get_u64_counter(&'a self) -> &'a SumDataPoint { + let AggregatedMetrics::U64(metric_data) = self.data() else { + panic!("Expected U64 metric data, got: {:?}", self.data()); + }; + + let MetricData::Sum(sum) = metric_data else { + panic!("Expected sum metric data, got: {:?}", metric_data); + }; + + let mut data_points = sum.data_points(); + + let result = data_points + .next() + .expect("Expected at least one data point"); + + debug_assert!(data_points.next().is_none(), "Expected only one data point"); + + result + } + + pub(crate) fn get_f64_histogram(&'a self) -> &'a HistogramDataPoint { + let AggregatedMetrics::F64(metric_data) = self.data() else { + panic!("Expected F64 metric data, got: {:?}", self.data()); + }; + + let MetricData::Histogram(histogram) = metric_data else { + panic!("Expected Histogram metric data, got: {:?}", metric_data); + }; + + let mut data_points = histogram.data_points(); + + let result = data_points + .next() + .expect("Expected at least one data point"); + + debug_assert!(data_points.next().is_none(), "Expected only one data point"); + + result + } +} diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index ef4bf73c1..5cac36af1 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -3,7 +3,9 @@ //! This daemon will start web server, track new packages and build them use crate::{ - AsyncBuildQueue, Config, Context, Index, RustwideBuilder, cdn, queue_rebuilds, + AsyncBuildQueue, Config, Context, Index, RustwideBuilder, cdn, + metrics::service::OtelServiceMetrics, + queue_rebuilds, utils::{queue_builder, report_error}, web::start_web_server, }; @@ -13,7 +15,7 @@ use std::sync::Arc; use std::thread; use std::time::Duration; use tokio::{runtime, time::Instant}; -use tracing::{debug, info}; +use tracing::{debug, info, trace}; /// Run the registry watcher /// NOTE: this should only be run once, otherwise crates would be added @@ -114,6 +116,36 @@ pub fn start_background_queue_rebuild(context: &Context) -> Result<(), Error> { Ok(()) } +pub fn start_background_service_metric_collector(context: &Context) -> Result<(), Error> { + let runtime = context.runtime.clone(); + let pool = context.pool.clone(); + let config = context.config.clone(); + let build_queue = context.async_build_queue.clone(); + let service_metrics = Arc::new(OtelServiceMetrics::new(&context.meter_provider)); + + async_cron( + &runtime, + "background service metric collector", + // old prometheus scrape interval seems to have been ~5s, but IMO that's far too frequent + // for these service metrics. + Duration::from_secs(30), + move || { + let pool = pool.clone(); + let build_queue = build_queue.clone(); + let config = config.clone(); + let service_metrics = service_metrics.clone(); + async move { + trace!("collecting service metrics"); + let mut conn = pool.get_async().await?; + service_metrics + .gather(&mut conn, &build_queue, &config) + .await + } + }, + ); + Ok(()) +} + pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> { let metrics = context.instance_metrics.clone(); let config = context.config.clone(); @@ -121,6 +153,8 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> let runtime = context.runtime.clone(); let cdn = context.cdn.clone(); + let otel_metrics = Arc::new(cdn::CdnMetrics::new(&context.meter_provider)); + if config.cloudfront_distribution_id_web.is_none() && config.cloudfront_distribution_id_static.is_none() { @@ -142,6 +176,7 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> let config = config.clone(); let cdn = cdn.clone(); let metrics = metrics.clone(); + let otel_metrics = otel_metrics.clone(); async move { let mut conn = pool.get_async().await?; if let Some(distribution_id) = config.cloudfront_distribution_id_web.as_ref() { @@ -149,6 +184,7 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> &config, &cdn, &metrics, + &otel_metrics, &mut conn, distribution_id, ) @@ -160,6 +196,7 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> &config, &cdn, &metrics, + &otel_metrics, &mut conn, distribution_id, ) @@ -203,6 +240,10 @@ pub fn start_daemon(context: Context, enable_registry_watcher: bool) -> Result<( start_background_cdn_invalidator(&context)?; start_background_queue_rebuild(&context)?; + // when people run the daemon, we assume the daemon is the one single process where + // we can collect the service metrics. + start_background_service_metric_collector(&context)?; + // NOTE: if a error occurred earlier in `start_daemon`, the server will _not_ be joined - // instead it will get killed when the process exits. webserver_thread diff --git a/src/utils/html.rs b/src/utils/html.rs index 28af67d4f..c327db30a 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -2,6 +2,7 @@ use crate::{ InstanceMetrics, utils::report_error, web::{ + metrics::WebMetrics, page::{ TemplateData, templates::{Body, Head, Vendored}, @@ -44,6 +45,7 @@ pub(crate) fn rewrite_rustdoc_html_stream( max_allowed_memory_usage: usize, data: Arc, metrics: Arc, + otel_metrics: Arc, ) -> impl Stream> + Send + 'static where R: AsyncRead + Unpin + Send + 'static, @@ -225,6 +227,7 @@ where Ok(e) => { if matches!(e, RewritingError::MemoryLimitExceeded(_)) { metrics.html_rewrite_ooms.inc(); + otel_metrics.html_rewrite_ooms.add(1, &[]); } RustdocRewritingError::RewritingError(e) } diff --git a/src/web/metrics.rs b/src/web/metrics.rs index 7b7ec5cd8..0fa0ee686 100644 --- a/src/web/metrics.rs +++ b/src/web/metrics.rs @@ -1,6 +1,6 @@ use crate::{ AsyncBuildQueue, Config, InstanceMetrics, ServiceMetrics, db::Pool, - metrics::duration_to_seconds, web::error::AxumResult, + metrics::otel::AnyMeterProvider, web::error::AxumResult, }; use anyhow::{Context as _, Result}; use axum::{ @@ -9,9 +9,47 @@ use axum::{ middleware::Next, response::IntoResponse, }; +use opentelemetry::{ + KeyValue, + metrics::{Counter, Histogram}, +}; use prometheus::{Encoder, TextEncoder, proto::MetricFamily}; use std::{borrow::Cow, future::Future, sync::Arc, time::Instant}; +#[derive(Debug)] +pub(crate) struct WebMetrics { + pub(crate) html_rewrite_ooms: Counter, + pub(crate) im_feeling_lucky_searches: Counter, + + routes_visited: Counter, + response_time: Histogram, +} + +impl WebMetrics { + pub(crate) fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("web"); + const PREFIX: &str = "docsrs.web"; + Self { + html_rewrite_ooms: meter + .u64_counter(format!("{PREFIX}.html_rewrite_ooms")) + .with_unit("1") + .build(), + im_feeling_lucky_searches: meter + .u64_counter(format!("{PREFIX}.im_feeling_lucky_searches")) + .with_unit("1") + .build(), + routes_visited: meter + .u64_counter(format!("{PREFIX}.routes_visited")) + .with_unit("1") + .build(), + response_time: meter + .f64_histogram(format!("{PREFIX}.response_time")) + .with_unit("s") + .build(), + } + } +} + async fn fetch_and_render_metrics(fetch_metrics: Fut) -> AxumResult where Fut: Future>> + Send + 'static, @@ -92,25 +130,40 @@ pub(crate) async fn request_recorder( .expect("metrics missing in request extensions") .clone(); + let otel_metrics = request + .extensions() + .get::>() + .expect("otel metrics missing in request extensions") + .clone(); + let start = Instant::now(); let result = next.run(request).await; - let resp_time = duration_to_seconds(start.elapsed()); + let resp_time = start.elapsed().as_secs_f64(); + + let attrs = [KeyValue::new("route", route_name.to_string())]; metrics .routes_visited .with_label_values(&[&route_name]) .inc(); + + otel_metrics.routes_visited.add(1, &attrs); + metrics .response_time .with_label_values(&[&route_name]) .observe(resp_time); + otel_metrics.response_time.record(resp_time, &attrs); + result } #[cfg(test)] mod tests { use crate::test::{AxumResponseTestExt, AxumRouterTestExt, async_wrapper}; + use opentelemetry_sdk::metrics::data::{AggregatedMetrics, MetricData}; + use pretty_assertions::assert_eq; use std::collections::HashMap; #[test] @@ -185,6 +238,99 @@ mod tests { *entry += 2; } + let collected = dbg!(env.collected_metrics()); + let AggregatedMetrics::U64(MetricData::Sum(routes_visited)) = collected + .get_metric("web", "docsrs.web.routes_visited")? + .data() + else { + panic!("Expected Sum metric data"); + }; + + dbg!(&routes_visited); + + let routes_visited: HashMap = routes_visited + .data_points() + .map(|dp| { + let route = dp + .attributes() + .find(|kv| kv.key.as_str() == "route") + .unwrap() + .clone() + .value; + + (route.to_string(), dp.value()) + }) + .collect(); + + assert_eq!( + routes_visited, + HashMap::from_iter( + vec![ + ("/", 2), + ("/-/sitemap/{letter}/sitemap.xml", 2), + ("/crate/{name}/{version}", 4), + ("/crate/{name}/{version}/status.json", 2), + ("/releases", 2), + ("/releases/feed", 2), + ("/releases/queue", 2), + ("/releases/recent-failures", 2), + ("/releases/recent-failures/{page}", 2), + ("/releases/recent/{page}", 2), + ("/sitemap.xml", 2), + ("rustdoc page", 4), + ("static resource", 16), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), v)) + ) + ); + + let AggregatedMetrics::F64(MetricData::Histogram(response_time)) = collected + .get_metric("web", "docsrs.web.response_time")? + .data() + else { + panic!("Expected Histogram metric data"); + }; + + dbg!(&response_time); + + let response_time_sample_counts: HashMap = response_time + .data_points() + .map(|dp| { + let route = dp + .attributes() + .find(|kv| kv.key.as_str() == "route") + .unwrap() + .clone() + .value; + + (route.to_string(), dp.count()) + }) + .collect(); + + assert_eq!( + response_time_sample_counts, + HashMap::from_iter( + vec![ + ("/", 2), + ("/-/sitemap/{letter}/sitemap.xml", 2), + ("/crate/{name}/{version}", 4), + ("/crate/{name}/{version}/status.json", 2), + ("/releases", 2), + ("/releases/feed", 2), + ("/releases/queue", 2), + ("/releases/recent-failures", 2), + ("/releases/recent-failures/{page}", 2), + ("/releases/recent/{page}", 2), + ("/sitemap.xml", 2), + ("rustdoc page", 4), + ("static resource", 16), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), v)) + ) + ); + // this shows what the routes were *actually* recorded as, making it easier to update ROUTES if the name changes. let metrics_serialized = metrics.gather(&env.context.pool)?; let all_routes_visited = metrics_serialized diff --git a/src/web/mod.rs b/src/web/mod.rs index 3d0ef6cfd..cf3400759 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -8,7 +8,10 @@ use crate::{ types::{BuildStatus, version::Version}, }, utils::{get_correct_docsrs_style_file, report_error}, - web::page::templates::{RenderBrands, RenderSolid, filters}, + web::{ + metrics::WebMetrics, + page::templates::{RenderBrands, RenderSolid, filters}, + }, }; use anyhow::{Context as _, Result, anyhow, bail}; use askama::Template; @@ -442,6 +445,8 @@ async fn apply_middleware( ) -> Result { let has_templates = template_data.is_some(); + let web_metrics = Arc::new(WebMetrics::new(&context.meter_provider)); + Ok(router.layer( ServiceBuilder::new() .layer(TraceLayer::new_for_http()) @@ -464,6 +469,7 @@ async fn apply_middleware( .layer(Extension(context.async_build_queue.clone())) .layer(Extension(context.service_metrics.clone())) .layer(Extension(context.instance_metrics.clone())) + .layer(Extension(web_metrics)) .layer(Extension(context.config.clone())) .layer(Extension(context.registry_api.clone())) .layer(Extension(context.async_storage.clone())) diff --git a/src/web/releases.rs b/src/web/releases.rs index cda9ee494..a8b4175ce 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -12,6 +12,7 @@ use crate::{ error::{AxumNope, AxumResult}, extractors::{DbConnection, Path, rustdoc::RustdocParams}, match_version, + metrics::WebMetrics, page::templates::{RenderBrands, RenderRegular, RenderSolid, filters}, rustdoc::OfficialCrateDescription, }, @@ -455,6 +456,7 @@ impl Default for Search { async fn redirect_to_random_crate( config: Arc, metrics: Arc, + otel_metrics: Arc, conn: &mut sqlx::PgConnection, ) -> AxumResult> { // We try to find a random crate and redirect to it. @@ -492,6 +494,7 @@ async fn redirect_to_random_crate( if let Some(row) = row { metrics.im_feeling_lucky_searches.inc(); + otel_metrics.im_feeling_lucky_searches.add(1, &[]); let params = RustdocParams::new(&row.name) .with_req_version(ReqVersion::Exact( @@ -520,6 +523,7 @@ pub(crate) async fn search_handler( Extension(config): Extension>, Extension(registry): Extension>, Extension(metrics): Extension>, + Extension(otel_metrics): Extension>, Query(mut query_params): Query>, ) -> AxumResult { let mut query = query_params @@ -535,9 +539,11 @@ pub(crate) async fn search_handler( if query_params.remove("i-am-feeling-lucky").is_some() || query.contains("::") { // redirect to a random crate if query is empty if query.is_empty() { - return Ok(redirect_to_random_crate(config, metrics, &mut conn) - .await? - .into_response()); + return Ok( + redirect_to_random_crate(config, metrics, otel_metrics, &mut conn) + .await? + .into_response(), + ); } let mut queries = BTreeMap::new(); diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 5a8c17599..fe34c60d4 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -20,6 +20,7 @@ use crate::{ }, file::StreamingFile, match_version, + metrics::WebMetrics, page::{ TemplateData, templates::{RenderBrands, RenderRegular, RenderSolid, filters}, @@ -385,6 +386,7 @@ impl RustdocPage { self: &Arc, template_data: Arc, metrics: Arc, + otel_metrics: Arc, rustdoc_html: StreamingBlob, max_parse_memory: usize, ) -> AxumResult { @@ -408,6 +410,7 @@ impl RustdocPage { max_parse_memory, self.clone(), metrics, + otel_metrics, )), ) .into_response()) @@ -427,6 +430,7 @@ impl RustdocPage { pub(crate) async fn rustdoc_html_server_handler( params: RustdocParams, Extension(metrics): Extension>, + Extension(otel_metrics): Extension>, Extension(templates): Extension>, Extension(storage): Extension>, Extension(config): Extension>, @@ -642,8 +646,14 @@ pub(crate) async fn rustdoc_html_server_handler( krate, params, }); - page.into_response(templates, metrics, blob, config.max_parse_memory) - .await + page.into_response( + templates, + metrics, + otel_metrics, + blob, + config.max_parse_memory, + ) + .await } #[instrument(skip_all)] From 70eed1b908430977f95c1c97ab46aacf4b5be48e Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sun, 16 Nov 2025 17:48:22 +0100 Subject: [PATCH 2/2] add readme section about locally testing metrics --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index dc5c8f114..6c6d64439 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,22 @@ $ just compose-down $ just compose-down-and-wipe ``` +#### testing opentelemetry metrics + +When you add or update any metrics you might want to test them. While there is +a way to check metric in unit-tests (see `TestEnvironment::collected_metrics`), +you might also want to test manually. + +We have set up a small docker-compose service (`opentelemetry`) you can start up +via `docker compose up opentelemetry`. This start up a local instance of +the [opentelemetry collector +contrib](https://hub.docker.com/r/otel/opentelemetry-collector-contrib) image, +configured for debug-logging. + +After configuring your local environment for `OTEL_EXPORTER_OTLP_ENDPOINT` => `http://localhost:4317` +(either in `.env` or `.docker.env`, depending on how you run the webserver), you +can see any metrics you report and how they are exported to your collector. + #### FAQ ##### I see the error `standard_init_linux.go:211: exec user process caused "no such file or directory"` when I use docker-compose.