From d2980fcc3e5658d29b1dabd2876345e74da599ee Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 21 Nov 2025 18:28:42 +0100 Subject: [PATCH] Stream source files when we return them directly --- src/config.rs | 21 ++++++++++++- src/storage/mod.rs | 41 ++++++++++++------------- src/web/source.rs | 75 ++++++++++++++++++++++++++-------------------- 3 files changed, 82 insertions(+), 55 deletions(-) diff --git a/src/config.rs b/src/config.rs index 51528880d..4cf784f7d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,6 +1,13 @@ use crate::{cdn::cloudfront::CdnKind, storage::StorageKind}; use anyhow::{Context, Result, anyhow, bail}; -use std::{env::VarError, error::Error, io, path, path::PathBuf, str::FromStr, time::Duration}; +use std::{ + env::VarError, + error::Error, + io, + path::{self, Path, PathBuf}, + str::FromStr, + time::Duration, +}; use tracing::trace; use url::Url; @@ -253,6 +260,18 @@ impl Config { )?)) .max_queued_rebuilds(maybe_env("DOCSRS_MAX_QUEUED_REBUILDS")?)) } + + pub fn max_file_size_for(&self, path: impl AsRef) -> usize { + static HTML: &str = "html"; + + if let Some(ext) = path.as_ref().extension() + && ext == HTML + { + self.max_file_size_html + } else { + self.max_file_size + } + } } fn ensure_absolute_path(path: PathBuf) -> io::Result { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a316d63a5..742e4ef87 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -89,12 +89,6 @@ pub(crate) struct Blob { pub(crate) compression: Option, } -impl Blob { - pub(crate) fn is_empty(&self) -> bool { - self.mime == "application/x-empty" - } -} - pub(crate) struct StreamingBlob { pub(crate) path: String, pub(crate) mime: Mime, @@ -299,14 +293,6 @@ impl AsyncStorage { } } - fn max_file_size_for(&self, path: &str) -> usize { - if path.ends_with(".html") { - self.config.max_file_size_html - } else { - self.config.max_file_size - } - } - /// Fetch a rustdoc file from our blob storage. /// * `name` - the crate name /// * `version` - the crate version @@ -345,17 +331,28 @@ impl AsyncStorage { path: &str, archive_storage: bool, ) -> Result { - Ok(if archive_storage { - self.get_from_archive( - &source_archive_path(name, version), - latest_build_id, - path, - self.max_file_size_for(path), - ) + self.stream_source_file(name, version, latest_build_id, path, archive_storage) .await? + .materialize(self.config.max_file_size_for(path)) + .await + } + + #[instrument] + pub(crate) async fn stream_source_file( + &self, + name: &str, + version: &Version, + latest_build_id: Option, + path: &str, + archive_storage: bool, + ) -> Result { + trace!("fetch source file"); + Ok(if archive_storage { + self.stream_from_archive(&source_archive_path(name, version), latest_build_id, path) + .await? } else { let remote_path = format!("sources/{name}/{version}/{path}"); - self.get(&remote_path, self.max_file_size_for(path)).await? + self.get_stream(&remote_path).await? }) } diff --git a/src/web/source.rs b/src/web/source.rs index d1b2bb9fa..abac50349 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -1,5 +1,5 @@ use crate::{ - AsyncStorage, + AsyncStorage, Config, db::{BuildId, types::version::Version}, impl_axum_webpage, storage::PathNotFoundError, @@ -11,7 +11,7 @@ use crate::{ DbConnection, rustdoc::{PageKind, RustdocParams}, }, - file::File as DbFile, + file::StreamingFile, headers::CanonicalUrl, match_version, page::templates::{RenderBrands, RenderRegular, RenderSolid, filters}, @@ -188,6 +188,7 @@ impl SourcePage { pub(crate) async fn source_browser_handler( params: RustdocParams, Extension(storage): Extension>, + Extension(config): Extension>, mut conn: DbConnection, ) -> AxumResult { let params = params.with_page_kind(PageKind::Source); @@ -239,9 +240,9 @@ pub(crate) async fn source_browser_handler( // try to get actual file first // skip if request is a directory - let (blob, is_file_too_large) = if !params.path_is_folder() { + let stream = if !params.path_is_folder() { match storage - .fetch_source_file( + .stream_source_file( params.name(), &version, row.latest_build_id, @@ -251,23 +252,14 @@ pub(crate) async fn source_browser_handler( .await .context("error fetching source file") { - Ok(blob) => (Some(blob), false), + Ok(stream) => Some(stream), Err(err) => match err { - err if err.is::() => (None, false), - // if file is too large, set is_file_too_large to true - err if err.downcast_ref::().is_some_and(|err| { - err.get_ref() - .map(|err| err.is::()) - .unwrap_or(false) - }) => - { - (None, true) - } + err if err.is::() => None, _ => return Err(err.into()), }, } } else { - (None, false) + None }; let canonical_url = CanonicalUrl::from_uri( @@ -277,28 +269,47 @@ pub(crate) async fn source_browser_handler( .source_url(), ); - let (file, file_content) = if let Some(blob) = blob { - let is_text = blob.mime.type_() == mime::TEXT || blob.mime == mime::APPLICATION_JSON; - // serve the file with DatabaseFileHandler if file isn't text and not empty - if !is_text && !blob.is_empty() { - let mut response = DbFile(blob).into_response(); + let mut is_file_too_large = false; + + let (file, file_content) = if let Some(stream) = stream { + let is_text = stream.mime.type_() == mime::TEXT || stream.mime == mime::APPLICATION_JSON; + if !is_text { + // if the file isn't text, serve it directly to the client + let mut response = StreamingFile(stream).into_response(); response.headers_mut().typed_insert(canonical_url); response .extensions_mut() .insert(CachePolicy::ForeverInCdnAndStaleInBrowser); return Ok(response); - } else if is_text && !blob.is_empty() { - let path = blob - .path - .rsplit_once('/') - .map(|(_, path)| path) - .unwrap_or(&blob.path); - ( - Some(File::from_path_and_mime(path, &blob.mime)), - String::from_utf8(blob.content).ok(), - ) } else { - (None, None) + let max_file_size = config.max_file_size_for(&stream.path); + + // otherwise we'll now download the content to render it into our template. + match stream.materialize(max_file_size).await { + Ok(blob) => { + let path = blob + .path + .rsplit_once('/') + .map(|(_, path)| path) + .unwrap_or(&blob.path); + ( + Some(File::from_path_and_mime(path, &blob.mime)), + Some(String::from_utf8_lossy(&blob.content).to_string()), + ) + } + Err(err) + // if file is too large, set is_file_too_large to true + if err.downcast_ref::().is_some_and(|err| { + err.get_ref() + .map(|err| err.is::()) + .unwrap_or(false) + }) => + { + is_file_too_large = true; + (None, None) + } + Err(err) => return Err(err.into()), + } } } else { (None, None)