diff --git a/src/cdn.rs b/src/cdn.rs index 45b2e1db6..13006d50f 100644 --- a/src/cdn.rs +++ b/src/cdn.rs @@ -1,4 +1,8 @@ -use crate::{Config, InstanceMetrics, metrics::otel::AnyMeterProvider, utils::report_error}; +use crate::{ + Config, InstanceMetrics, + metrics::{CDN_INVALIDATION_HISTOGRAM_BUCKETS, otel::AnyMeterProvider}, + utils::report_error, +}; use anyhow::{Context, Error, Result, anyhow, bail}; use aws_config::BehaviorVersion; use aws_sdk_cloudfront::{ @@ -38,10 +42,12 @@ impl CdnMetrics { Self { invalidation_time: meter .f64_histogram(format!("{PREFIX}.invalidation_time")) + .with_boundaries(CDN_INVALIDATION_HISTOGRAM_BUCKETS.to_vec()) .with_unit("s") .build(), queue_time: meter .f64_histogram(format!("{PREFIX}.queue_time")) + .with_boundaries(CDN_INVALIDATION_HISTOGRAM_BUCKETS.to_vec()) .with_unit("s") .build(), } diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index fee8ae897..04683bf9f 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -11,7 +11,7 @@ use crate::{ }, docbuilder::Limits, error::Result, - metrics::otel::AnyMeterProvider, + metrics::{BUILD_TIME_HISTOGRAM_BUCKETS, DOCUMENTATION_SIZE_BUCKETS, otel::AnyMeterProvider}, repositories::RepositoryStatsUpdater, storage::{ CompressionAlgorithm, RustdocJsonFormatVersion, compress, get_file_list, @@ -136,6 +136,7 @@ impl BuilderMetrics { .build(), build_time: meter .f64_histogram(format!("{PREFIX}.build_time")) + .with_boundaries(BUILD_TIME_HISTOGRAM_BUCKETS.to_vec()) .with_unit("s") .build(), total_builds: meter @@ -152,6 +153,7 @@ impl BuilderMetrics { .build(), documentation_size: meter .u64_histogram(format!("{PREFIX}.documentation_size")) + .with_boundaries(DOCUMENTATION_SIZE_BUCKETS.to_vec()) .with_unit("bytes") .with_description("size of the generated documentation in bytes") .build(), diff --git a/src/metrics/macros.rs b/src/metrics/macros.rs index 81c149f8b..8e5515633 100644 --- a/src/metrics/macros.rs +++ b/src/metrics/macros.rs @@ -24,6 +24,7 @@ macro_rules! metrics { pub(crate) cdn_queue_time: prometheus::HistogramVec, pub(crate) build_time: prometheus::Histogram, pub(crate) documentation_size: prometheus::Histogram, + pub(crate) response_time: prometheus::HistogramVec, } impl $name { $vis fn new() -> Result { @@ -69,7 +70,7 @@ macro_rules! metrics { "time spent building crates", ) .namespace($namespace) - .buckets($crate::metrics::build_time_histogram_buckets()), + .buckets($crate::metrics::BUILD_TIME_HISTOGRAM_BUCKETS.to_vec()), )?; registry.register(Box::new(build_time.clone()))?; @@ -83,6 +84,18 @@ macro_rules! metrics { )?; registry.register(Box::new(documentation_size.clone()))?; + let response_time = prometheus::HistogramVec::new( + prometheus::HistogramOpts::new( + "response_time", + "The response times of various docs.rs routes", + ) + .namespace($namespace) + .buckets($crate::metrics::RESPONSE_TIME_HISTOGRAM_BUCKETS.to_vec()) + .variable_label("route"), + &["route"], + )?; + registry.register(Box::new(response_time.clone()))?; + Ok(Self { registry, recently_accessed_releases: RecentlyAccessedReleases::new(), @@ -90,6 +103,7 @@ macro_rules! metrics { cdn_queue_time, build_time, documentation_size, + response_time, $( $(#[$meta])* $metric, diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index a38b68f34..54003e11b 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -59,31 +59,41 @@ pub const DOCUMENTATION_SIZE_BUCKETS: &[f64; 16] = &[ ]; /// the measured times of building crates will be put into these buckets -pub fn build_time_histogram_buckets() -> Vec { - vec![ - 30.0, // 0.5 - 60.0, // 1 - 120.0, // 2 - 180.0, // 3 - 240.0, // 4 - 300.0, // 5 - 360.0, // 6 - 420.0, // 7 - 480.0, // 8 - 540.0, // 9 - 600.0, // 10 - 660.0, // 11 - 720.0, // 12 - 780.0, // 13 - 840.0, // 14 - 900.0, // 15 - 1200.0, // 20 - 1800.0, // 30 - 2400.0, // 40 - 3000.0, // 50 - 3600.0, // 60 - ] -} +pub const BUILD_TIME_HISTOGRAM_BUCKETS: &[f64] = &[ + 30.0, // 0.5 + 60.0, // 1 + 120.0, // 2 + 180.0, // 3 + 240.0, // 4 + 300.0, // 5 + 360.0, // 6 + 420.0, // 7 + 480.0, // 8 + 540.0, // 9 + 600.0, // 10 + 660.0, // 11 + 720.0, // 12 + 780.0, // 13 + 840.0, // 14 + 900.0, // 15 + 1200.0, // 20 + 1800.0, // 30 + 2400.0, // 40 + 3000.0, // 50 + 3600.0, // 60 +]; + +/// response time histogram buckets from the opentelemetry semantiv conventions +/// https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpserverrequestduration +/// +/// These are the default prometheus bucket sizes, +/// https://docs.rs/prometheus/0.14.0/src/prometheus/histogram.rs.html#25-27 +/// tailored to broadly measure the response time (in seconds) of a network service. +/// +/// Otel default buckets are not suited for that. +pub const RESPONSE_TIME_HISTOGRAM_BUCKETS: &[f64] = &[ + 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, +]; metrics! { pub struct InstanceMetrics { @@ -103,8 +113,6 @@ metrics! { /// The traffic of various docs.rs routes pub(crate) routes_visited: IntCounterVec["route"], - /// The response times of various docs.rs routes - pub(crate) response_time: HistogramVec["route"], /// Count of recently accessed crates pub(crate) recent_crates: IntGaugeVec["duration"], diff --git a/src/web/metrics.rs b/src/web/metrics.rs index 0fa0ee686..0bc44576f 100644 --- a/src/web/metrics.rs +++ b/src/web/metrics.rs @@ -1,6 +1,8 @@ use crate::{ - AsyncBuildQueue, Config, InstanceMetrics, ServiceMetrics, db::Pool, - metrics::otel::AnyMeterProvider, web::error::AxumResult, + AsyncBuildQueue, Config, InstanceMetrics, ServiceMetrics, + db::Pool, + metrics::{RESPONSE_TIME_HISTOGRAM_BUCKETS, otel::AnyMeterProvider}, + web::error::AxumResult, }; use anyhow::{Context as _, Result}; use axum::{ @@ -44,6 +46,7 @@ impl WebMetrics { .build(), response_time: meter .f64_histogram(format!("{PREFIX}.response_time")) + .with_boundaries(RESPONSE_TIME_HISTOGRAM_BUCKETS.to_vec()) .with_unit("s") .build(), }