diff --git a/.sqlx/query-1be3826465e3cd76738b3bfc9d46df14795d1f18830783080bf3a58b909ab16a.json b/.sqlx/query-1be3826465e3cd76738b3bfc9d46df14795d1f18830783080bf3a58b909ab16a.json new file mode 100644 index 000000000..1cac273c1 --- /dev/null +++ b/.sqlx/query-1be3826465e3cd76738b3bfc9d46df14795d1f18830783080bf3a58b909ab16a.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "WITH params AS (\n -- get maximum possible id-value in crates-table\n SELECT last_value AS max_id FROM crates_id_seq\n )\n SELECT\n crates.name as \"name: KrateName\",\n releases.version as \"version: Version\",\n releases.target_name\n FROM (\n -- generate random numbers in the ID-range.\n SELECT DISTINCT 1 + trunc(random() * params.max_id)::INTEGER AS id\n FROM params, generate_series(1, $1)\n ) AS r\n INNER JOIN crates ON r.id = crates.id\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN repositories ON releases.repository_id = repositories.id\n WHERE\n releases.rustdoc_status = TRUE AND\n repositories.stars >= 100\n LIMIT 1", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name: KrateName", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "version: Version", + "type_info": "Text" + }, + { + "ordinal": 2, + "name": "target_name", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [ + false, + false, + true + ] + }, + "hash": "1be3826465e3cd76738b3bfc9d46df14795d1f18830783080bf3a58b909ab16a" +} diff --git a/.sqlx/query-543694448dbb43af41dc18c29017c5b211f45f7722811fdcabea26f796d4514b.json b/.sqlx/query-543694448dbb43af41dc18c29017c5b211f45f7722811fdcabea26f796d4514b.json new file mode 100644 index 000000000..d3de58075 --- /dev/null +++ b/.sqlx/query-543694448dbb43af41dc18c29017c5b211f45f7722811fdcabea26f796d4514b.json @@ -0,0 +1,64 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n crates.name as \"name: KrateName\",\n releases.version as \"version: Version\",\n releases.description,\n release_build_status.last_build_time,\n releases.target_name,\n releases.rustdoc_status,\n repositories.stars as \"stars?\",\n EXISTS (\n SELECT 1\n FROM releases AS all_releases\n WHERE\n all_releases.crate_id = crates.id AND\n all_releases.yanked = false\n ) AS has_unyanked_releases\n\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON releases.id = release_build_status.rid\n LEFT JOIN repositories ON releases.repository_id = repositories.id\n\n WHERE\n crates.name = ANY($1) AND\n release_build_status.build_status <> 'in_progress'", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name: KrateName", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "version: Version", + "type_info": "Text" + }, + { + "ordinal": 2, + "name": "description", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "last_build_time", + "type_info": "Timestamptz" + }, + { + "ordinal": 4, + "name": "target_name", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "rustdoc_status", + "type_info": "Bool" + }, + { + "ordinal": 6, + "name": "stars?", + "type_info": "Int4" + }, + { + "ordinal": 7, + "name": "has_unyanked_releases", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [ + "TextArray" + ] + }, + "nullable": [ + false, + false, + true, + true, + true, + true, + false, + null + ] + }, + "hash": "543694448dbb43af41dc18c29017c5b211f45f7722811fdcabea26f796d4514b" +} diff --git a/.sqlx/query-af663241b6e7d77402847d57ef9a1d69412ea5af6d0b6562644fe913bac2e1df.json b/.sqlx/query-af663241b6e7d77402847d57ef9a1d69412ea5af6d0b6562644fe913bac2e1df.json deleted file mode 100644 index ea4fc500a..000000000 --- a/.sqlx/query-af663241b6e7d77402847d57ef9a1d69412ea5af6d0b6562644fe913bac2e1df.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT\n crates.name,\n releases.version as \"version: Version\",\n releases.description,\n release_build_status.last_build_time,\n releases.target_name,\n releases.rustdoc_status,\n repositories.stars as \"stars?\",\n EXISTS (\n SELECT 1\n FROM releases AS all_releases\n WHERE\n all_releases.crate_id = crates.id AND\n all_releases.yanked = false\n ) AS has_unyanked_releases\n\n FROM crates\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN release_build_status ON releases.id = release_build_status.rid\n LEFT JOIN repositories ON releases.repository_id = repositories.id\n\n WHERE\n crates.name = ANY($1) AND\n release_build_status.build_status <> 'in_progress'", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "version: Version", - "type_info": "Text" - }, - { - "ordinal": 2, - "name": "description", - "type_info": "Varchar" - }, - { - "ordinal": 3, - "name": "last_build_time", - "type_info": "Timestamptz" - }, - { - "ordinal": 4, - "name": "target_name", - "type_info": "Varchar" - }, - { - "ordinal": 5, - "name": "rustdoc_status", - "type_info": "Bool" - }, - { - "ordinal": 6, - "name": "stars?", - "type_info": "Int4" - }, - { - "ordinal": 7, - "name": "has_unyanked_releases", - "type_info": "Bool" - } - ], - "parameters": { - "Left": [ - "TextArray" - ] - }, - "nullable": [ - false, - false, - true, - true, - true, - true, - false, - null - ] - }, - "hash": "af663241b6e7d77402847d57ef9a1d69412ea5af6d0b6562644fe913bac2e1df" -} diff --git a/.sqlx/query-bdddad099e891bb45ba3703d7144160056d6cb620c55be459ead0f95c3523035.json b/.sqlx/query-bdddad099e891bb45ba3703d7144160056d6cb620c55be459ead0f95c3523035.json deleted file mode 100644 index aea83a04d..000000000 --- a/.sqlx/query-bdddad099e891bb45ba3703d7144160056d6cb620c55be459ead0f95c3523035.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "WITH params AS (\n -- get maximum possible id-value in crates-table\n SELECT last_value AS max_id FROM crates_id_seq\n )\n SELECT\n crates.name,\n releases.version,\n releases.target_name\n FROM (\n -- generate random numbers in the ID-range.\n SELECT DISTINCT 1 + trunc(random() * params.max_id)::INTEGER AS id\n FROM params, generate_series(1, $1)\n ) AS r\n INNER JOIN crates ON r.id = crates.id\n INNER JOIN releases ON crates.latest_version_id = releases.id\n INNER JOIN repositories ON releases.repository_id = repositories.id\n WHERE\n releases.rustdoc_status = TRUE AND\n repositories.stars >= 100\n LIMIT 1", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "version", - "type_info": "Text" - }, - { - "ordinal": 2, - "name": "target_name", - "type_info": "Varchar" - } - ], - "parameters": { - "Left": [ - "Int4" - ] - }, - "nullable": [ - false, - false, - true - ] - }, - "hash": "bdddad099e891bb45ba3703d7144160056d6cb620c55be459ead0f95c3523035" -} diff --git a/.sqlx/query-db8654a53a9914ebfaa15a9bcc0af66fc2f46a8b5caa5473278123403ebbd613.json b/.sqlx/query-db8654a53a9914ebfaa15a9bcc0af66fc2f46a8b5caa5473278123403ebbd613.json deleted file mode 100644 index b35981b73..000000000 --- a/.sqlx/query-db8654a53a9914ebfaa15a9bcc0af66fc2f46a8b5caa5473278123403ebbd613.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT\n crates.name,\n releases.version as \"version: Version\"\n FROM builds\n INNER JOIN releases ON releases.id = builds.rid\n INNER JOIN crates ON releases.crate_id = crates.id\n WHERE\n builds.build_status = 'in_progress'\n ORDER BY builds.id ASC", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "name", - "type_info": "Text" - }, - { - "ordinal": 1, - "name": "version: Version", - "type_info": "Text" - } - ], - "parameters": { - "Left": [] - }, - "nullable": [ - false, - false - ] - }, - "hash": "db8654a53a9914ebfaa15a9bcc0af66fc2f46a8b5caa5473278123403ebbd613" -} diff --git a/.sqlx/query-e1a95bfd43982d86a56cf542fde99a6e5a42e56aedf068a3acf39f923eb32ade.json b/.sqlx/query-e1a95bfd43982d86a56cf542fde99a6e5a42e56aedf068a3acf39f923eb32ade.json new file mode 100644 index 000000000..c9bf149a6 --- /dev/null +++ b/.sqlx/query-e1a95bfd43982d86a56cf542fde99a6e5a42e56aedf068a3acf39f923eb32ade.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n crates.name as \"name: KrateName\",\n releases.version as \"version: Version\"\n FROM builds\n INNER JOIN releases ON releases.id = builds.rid\n INNER JOIN crates ON releases.crate_id = crates.id\n WHERE\n builds.build_status = 'in_progress'\n ORDER BY builds.id ASC", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name: KrateName", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "version: Version", + "type_info": "Text" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false, + false + ] + }, + "hash": "e1a95bfd43982d86a56cf542fde99a6e5a42e56aedf068a3acf39f923eb32ade" +} diff --git a/Cargo.lock b/Cargo.lock index e64a49db8..fd92215bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1243,6 +1243,12 @@ dependencies = [ "half", ] +[[package]] +name = "claims" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bba18ee93d577a8428902687bcc2b6b45a56b1981a1f6d779731c86cc4c5db18" + [[package]] name = "clap" version = "4.5.53" @@ -1463,6 +1469,15 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "crates_io_validation" +version = "0.0.0" +dependencies = [ + "claims", + "thiserror 2.0.17", + "unicode-xid", +] + [[package]] name = "crc" version = "3.4.0" @@ -1974,6 +1989,7 @@ dependencies = [ "constant_time_eq", "crates-index", "crates-index-diff", + "crates_io_validation", "criterion", "dashmap", "derive_builder", diff --git a/Cargo.toml b/Cargo.toml index 21cda216c..e2e4a6a89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ build = "build.rs" edition = "2024" [workspace] +members = ["crates/*"] exclude = [ "ignored", "tests", @@ -105,6 +106,8 @@ chrono = { version = "0.4.11", default-features = false, features = ["clock", "s constant_time_eq = "0.4.2" md5 = "0.8.0" +crates_io_validation = { path = "crates/crates_io_validation" } + [dev-dependencies] criterion = "0.8.0" kuchikiki = "0.8" diff --git a/crates/crates_io_validation/Cargo.toml b/crates/crates_io_validation/Cargo.toml new file mode 100644 index 000000000..e4dbadf2e --- /dev/null +++ b/crates/crates_io_validation/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "crates_io_validation" +version = "0.0.0" +license = "MIT OR Apache-2.0" +edition = "2024" + +[dependencies] +thiserror = "=2.0.17" +unicode-xid = "=0.2.6" + +[dev-dependencies] +claims = "=0.8.0" diff --git a/crates/crates_io_validation/README.md b/crates/crates_io_validation/README.md new file mode 100644 index 000000000..703955e50 --- /dev/null +++ b/crates/crates_io_validation/README.md @@ -0,0 +1,6 @@ +# crates_io_validation + +This package contains some low-level validation functions that are used across +the code base. + +These are mostly in their own crate to be shared with docs.rs. diff --git a/crates/crates_io_validation/src/lib.rs b/crates/crates_io_validation/src/lib.rs new file mode 100644 index 000000000..f65297fb6 --- /dev/null +++ b/crates/crates_io_validation/src/lib.rs @@ -0,0 +1,360 @@ +#![doc = include_str!("../README.md")] + +pub const MAX_NAME_LENGTH: usize = 64; + +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum InvalidFeature { + #[error("feature cannot be empty")] + Empty, + #[error( + "invalid character `{0}` in feature `{1}`, the first character must be \ + a Unicode XID start character or digit (most letters or `_` or `0` to \ + `9`)" + )] + Start(char, String), + #[error( + "invalid character `{0}` in feature `{1}`, characters must be Unicode \ + XID characters, `+`, `-`, or `.` (numbers, `+`, `-`, `_`, `.`, or most \ + letters)" + )] + Char(char, String), + #[error(transparent)] + DependencyName(#[from] InvalidDependencyName), +} + +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum InvalidCrateName { + #[error("the {what} name `{name}` is too long (max {MAX_NAME_LENGTH} characters)")] + TooLong { what: String, name: String }, + #[error("{what} name cannot be empty")] + Empty { what: String }, + #[error( + "the name `{name}` cannot be used as a {what} name, \ + the name cannot start with a digit" + )] + StartWithDigit { what: String, name: String }, + #[error( + "invalid character `{first_char}` in {what} name: `{name}`, \ + the first character must be an ASCII character" + )] + Start { + first_char: char, + what: String, + name: String, + }, + #[error( + "invalid character `{ch}` in {what} name: `{name}`, \ + characters must be an ASCII alphanumeric characters, `-`, or `_`" + )] + Char { + ch: char, + what: String, + name: String, + }, +} + +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum InvalidDependencyName { + #[error("the dependency name `{0}` is too long (max {MAX_NAME_LENGTH} characters)")] + TooLong(String), + #[error("dependency name cannot be empty")] + Empty, + #[error( + "the name `{0}` cannot be used as a dependency name, \ + the name cannot start with a digit" + )] + StartWithDigit(String), + #[error( + "invalid character `{0}` in dependency name: `{1}`, \ + the first character must be an ASCII character, or `_`" + )] + Start(char, String), + #[error( + "invalid character `{0}` in dependency name: `{1}`, \ + characters must be an ASCII alphanumeric characters, `-`, or `_`" + )] + Char(char, String), +} + +// Validates the name is a valid crate name. +// This is also used for validating the name of dependencies. +// So the `for_what` parameter is used to indicate what the name is used for. +// It can be "crate" or "dependency". +pub fn validate_crate_name(for_what: &str, name: &str) -> Result<(), InvalidCrateName> { + if name.chars().count() > MAX_NAME_LENGTH { + return Err(InvalidCrateName::TooLong { + what: for_what.into(), + name: name.into(), + }); + } + validate_create_ident(for_what, name) +} + +// Checks that the name is a valid crate name. +// 1. The name must be non-empty. +// 2. The first character must be an ASCII character. +// 3. The remaining characters must be ASCII alphanumerics or `-` or `_`. +// Note: This differs from `valid_dependency_name`, which allows `_` as the first character. +fn validate_create_ident(for_what: &str, name: &str) -> Result<(), InvalidCrateName> { + if name.is_empty() { + return Err(InvalidCrateName::Empty { + what: for_what.into(), + }); + } + let mut chars = name.chars(); + if let Some(ch) = chars.next() { + if ch.is_ascii_digit() { + return Err(InvalidCrateName::StartWithDigit { + what: for_what.into(), + name: name.into(), + }); + } + if !ch.is_ascii_alphabetic() { + return Err(InvalidCrateName::Start { + first_char: ch, + what: for_what.into(), + name: name.into(), + }); + } + } + + for ch in chars { + if !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_') { + return Err(InvalidCrateName::Char { + ch, + what: for_what.into(), + name: name.into(), + }); + } + } + + Ok(()) +} + +pub fn validate_dependency_name(name: &str) -> Result<(), InvalidDependencyName> { + if name.chars().count() > MAX_NAME_LENGTH { + return Err(InvalidDependencyName::TooLong(name.into())); + } + validate_dependency_ident(name) +} + +// Checks that the name is a valid dependency name. +// 1. The name must be non-empty. +// 2. The first character must be an ASCII character or `_`. +// 3. The remaining characters must be ASCII alphanumerics or `-` or `_`. +fn validate_dependency_ident(name: &str) -> Result<(), InvalidDependencyName> { + if name.is_empty() { + return Err(InvalidDependencyName::Empty); + } + let mut chars = name.chars(); + if let Some(ch) = chars.next() { + if ch.is_ascii_digit() { + return Err(InvalidDependencyName::StartWithDigit(name.into())); + } + if !(ch.is_ascii_alphabetic() || ch == '_') { + return Err(InvalidDependencyName::Start(ch, name.into())); + } + } + + for ch in chars { + if !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_') { + return Err(InvalidDependencyName::Char(ch, name.into())); + } + } + + Ok(()) +} + +/// Validates the THIS parts of `features = ["THIS", "and/THIS", "dep:THIS", "dep?/THIS"]`. +/// 1. The name must be non-empty. +/// 2. The first character must be a Unicode XID start character, `_`, or a digit. +/// 3. The remaining characters must be Unicode XID characters, `_`, `+`, `-`, or `.`. +pub fn validate_feature_name(name: &str) -> Result<(), InvalidFeature> { + if name.is_empty() { + return Err(InvalidFeature::Empty); + } + let mut chars = name.chars(); + if let Some(ch) = chars.next() + && !(unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' || ch.is_ascii_digit()) + { + return Err(InvalidFeature::Start(ch, name.into())); + } + for ch in chars { + if !(unicode_xid::UnicodeXID::is_xid_continue(ch) || ch == '+' || ch == '-' || ch == '.') { + return Err(InvalidFeature::Char(ch, name.into())); + } + } + + Ok(()) +} + +/// Validates a whole feature string, `features = ["THIS", "and/THIS", "dep:THIS", "dep?/THIS"]`. +pub fn validate_feature(name: &str) -> Result<(), InvalidFeature> { + if let Some((dep, dep_feat)) = name.split_once('/') { + let dep = dep.strip_suffix('?').unwrap_or(dep); + validate_dependency_name(dep)?; + validate_feature_name(dep_feat) + } else if let Some((_, dep)) = name.split_once("dep:") { + validate_dependency_name(dep)?; + Ok(()) + } else { + validate_feature_name(name) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use claims::{assert_err_eq, assert_ok}; + + #[test] + fn test_validate_crate_name() { + use super::{InvalidCrateName, MAX_NAME_LENGTH}; + + assert_ok!(validate_crate_name("crate", "foo")); + assert_err_eq!( + validate_crate_name("crate", "京"), + InvalidCrateName::Start { + first_char: '京', + what: "crate".into(), + name: "京".into() + } + ); + assert_err_eq!( + validate_crate_name("crate", ""), + InvalidCrateName::Empty { + what: "crate".into() + } + ); + assert_err_eq!( + validate_crate_name("crate", "💝"), + InvalidCrateName::Start { + first_char: '💝', + what: "crate".into(), + name: "💝".into() + } + ); + assert_ok!(validate_crate_name("crate", "foo_underscore")); + assert_ok!(validate_crate_name("crate", "foo-dash")); + assert_err_eq!( + validate_crate_name("crate", "foo+plus"), + InvalidCrateName::Char { + ch: '+', + what: "crate".into(), + name: "foo+plus".into() + } + ); + assert_err_eq!( + validate_crate_name("crate", "_foo"), + InvalidCrateName::Start { + first_char: '_', + what: "crate".into(), + name: "_foo".into() + } + ); + assert_err_eq!( + validate_crate_name("crate", "-foo"), + InvalidCrateName::Start { + first_char: '-', + what: "crate".into(), + name: "-foo".into() + } + ); + assert_err_eq!( + validate_crate_name("crate", "123"), + InvalidCrateName::StartWithDigit { + what: "crate".into(), + name: "123".into() + } + ); + assert_err_eq!( + validate_crate_name("crate", "o".repeat(MAX_NAME_LENGTH + 1).as_str()), + InvalidCrateName::TooLong { + what: "crate".into(), + name: "o".repeat(MAX_NAME_LENGTH + 1).as_str().into() + } + ); + } + + #[test] + fn test_validate_dependency_name() { + use super::{InvalidDependencyName, MAX_NAME_LENGTH}; + + assert_ok!(validate_dependency_name("foo")); + assert_err_eq!( + validate_dependency_name("京"), + InvalidDependencyName::Start('京', "京".into()) + ); + assert_err_eq!(validate_dependency_name(""), InvalidDependencyName::Empty); + assert_err_eq!( + validate_dependency_name("💝"), + InvalidDependencyName::Start('💝', "💝".into()) + ); + assert_ok!(validate_dependency_name("foo_underscore")); + assert_ok!(validate_dependency_name("foo-dash")); + assert_err_eq!( + validate_dependency_name("foo+plus"), + InvalidDependencyName::Char('+', "foo+plus".into()) + ); + // Starting with an underscore is a valid dependency name. + assert_ok!(validate_dependency_name("_foo")); + assert_err_eq!( + validate_dependency_name("-foo"), + InvalidDependencyName::Start('-', "-foo".into()) + ); + assert_err_eq!( + validate_dependency_name("o".repeat(MAX_NAME_LENGTH + 1).as_str()), + InvalidDependencyName::TooLong("o".repeat(MAX_NAME_LENGTH + 1).as_str().into()) + ); + } + + #[test] + fn test_validate_feature_names() { + use super::InvalidDependencyName; + use super::InvalidFeature; + + assert_ok!(validate_feature("foo")); + assert_ok!(validate_feature("1foo")); + assert_ok!(validate_feature("_foo")); + assert_ok!(validate_feature("_foo-_+.1")); + assert_ok!(validate_feature("_foo-_+.1")); + assert_err_eq!(validate_feature(""), InvalidFeature::Empty); + assert_err_eq!(validate_feature("/"), InvalidDependencyName::Empty.into()); + assert_err_eq!( + validate_feature("%/%"), + InvalidDependencyName::Start('%', "%".into()).into() + ); + assert_ok!(validate_feature("a/a")); + assert_ok!(validate_feature("32-column-tables")); + assert_ok!(validate_feature("c++20")); + assert_ok!(validate_feature("krate/c++20")); + assert_err_eq!( + validate_feature("c++20/wow"), + InvalidDependencyName::Char('+', "c++20".into()).into() + ); + assert_ok!(validate_feature("foo?/bar")); + assert_ok!(validate_feature("dep:foo")); + assert_err_eq!( + validate_feature("dep:foo?/bar"), + InvalidDependencyName::Char(':', "dep:foo".into()).into() + ); + assert_err_eq!( + validate_feature("foo/?bar"), + InvalidFeature::Start('?', "?bar".into()) + ); + assert_err_eq!( + validate_feature("foo?bar"), + InvalidFeature::Char('?', "foo?bar".into()) + ); + assert_ok!(validate_feature("bar.web")); + assert_ok!(validate_feature("foo/bar.web")); + assert_err_eq!( + validate_feature("dep:0foo"), + InvalidDependencyName::StartWithDigit("0foo".into()).into() + ); + assert_err_eq!( + validate_feature("0foo?/bar.web"), + InvalidDependencyName::StartWithDigit("0foo".into()).into() + ); + } +} diff --git a/src/db/types/krate_name.rs b/src/db/types/krate_name.rs index 041db4add..0d5b00627 100644 --- a/src/db/types/krate_name.rs +++ b/src/db/types/krate_name.rs @@ -1,14 +1,15 @@ -use anyhow::{Result, bail}; +use anyhow::Result; +use crates_io_validation::{InvalidCrateName, validate_crate_name}; use derive_more::{Deref, Display, Into}; use serde_with::{DeserializeFromStr, SerializeDisplay}; use sqlx::{ Decode, Encode, Postgres, encode::IsNull, error::BoxDynError, - postgres::{PgArgumentBuffer, PgTypeInfo, PgValueRef}, + postgres::{PgArgumentBuffer, PgHasArrayType, PgTypeInfo, PgValueRef}, prelude::*, }; -use std::{io::Write, str::FromStr}; +use std::{borrow::Cow, io::Write, str::FromStr}; /// validated crate name /// @@ -30,7 +31,14 @@ use std::{io::Write, str::FromStr}; SerializeDisplay, bincode::Encode, )] -pub struct KrateName(String); +pub struct KrateName(Cow<'static, str>); + +impl KrateName { + #[cfg(test)] + pub(crate) const fn from_static(s: &'static str) -> Self { + KrateName(Cow::Borrowed(s)) + } +} impl From<&KrateName> for KrateName { fn from(krate_name: &KrateName) -> Self { @@ -39,11 +47,11 @@ impl From<&KrateName> for KrateName { } impl FromStr for KrateName { - type Err = anyhow::Error; + type Err = InvalidCrateName; fn from_str(s: &str) -> Result { - validate_crate_name(s)?; - Ok(KrateName(s.to_string())) + validate_crate_name("crate", s)?; + Ok(KrateName(Cow::Owned(s.to_string()))) } } @@ -66,8 +74,21 @@ impl<'q> Encode<'q, Postgres> for KrateName { impl<'r> Decode<'r, Postgres> for KrateName { fn decode(value: PgValueRef<'r>) -> Result { + // future improvement: we could also avoid the allocation here + // and return a Cow::Borrowed while reading from the DB. + // But this would mean the lifetime leaks into the whole codebase. let s: &str = Decode::::decode(value)?; - Ok(Self(s.parse()?)) + Ok(Self(Cow::Owned(s.parse()?))) + } +} + +impl PgHasArrayType for KrateName { + fn array_type_info() -> PgTypeInfo { + <&str as PgHasArrayType>::array_type_info() + } + + fn array_compatible(ty: &PgTypeInfo) -> bool { + <&str as PgHasArrayType>::array_compatible(ty) } } @@ -80,59 +101,10 @@ where } } -/// validate if a string is a valid crate name. -/// Based on the crates.io implementation in their publish-endpoint: -/// https://github.com/rust-lang/crates.io/blob/9651eaab14887e0442849d5e81c1f2bbf10a73a2/crates/crates_io_database/src/models/krate.rs#L218-L252 -fn validate_crate_name(name: &str) -> Result<()> { - if name.is_empty() { - bail!("empty crate name"); - } - if name.len() > 64 { - bail!("crate name too long (maximum is 64 characters)"); - } - - let mut chars = name.chars(); - if let Some(ch) = chars.next() { - if ch.is_ascii_digit() { - bail!("crate name cannot start with a digit"); - } - if !ch.is_ascii_alphabetic() { - bail!("crate name must start with an alphabetic character"); - } - } - - for ch in chars { - if !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_') { - bail!("invalid character '{}' in crate name", ch); - } - } - - Ok(()) -} - #[cfg(test)] mod tests { - use crate::test::TestEnvironment; - use super::*; - use test_case::test_case; - - #[test_case("valid_crate_name")] - #[test_case("with-dash")] - #[test_case("CapitalLetter")] - fn test_valid_crate_name(name: &str) { - assert!(validate_crate_name(name).is_ok()); - assert_eq!(name.parse::().unwrap(), name); - } - - #[test_case("with space")] - #[test_case("line break\n")] - #[test_case("non ascii äöü")] - #[test_case("0123456789101112131415161718192021222324252627282930313233343536373839"; "too long")] - fn test_invalid_crate_name(name: &str) { - assert!(validate_crate_name(name).is_err()); - assert!(name.parse::().is_err()); - } + use crate::test::TestEnvironment; #[tokio::test(flavor = "multi_thread")] async fn test_sqlx_encode_decode() -> Result<()> { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index bc515475a..2cd74b823 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -45,7 +45,7 @@ use tokio::{ runtime, sync::Mutex, }; -use tracing::{debug, error, info_span, instrument, trace, warn}; +use tracing::{debug, info_span, instrument, trace, warn}; use walkdir::WalkDir; const ARCHIVE_INDEX_FILE_EXTENSION: &str = "index"; diff --git a/src/utils/cargo_metadata.rs b/src/utils/cargo_metadata.rs index 370dccd13..28681e18c 100644 --- a/src/utils/cargo_metadata.rs +++ b/src/utils/cargo_metadata.rs @@ -1,4 +1,6 @@ -use crate::{db::types::version::Version, error::Result}; +use crate::db::types::krate_name::KrateName; +use crate::web::ReqVersion; +use crate::{db::types::version::Version, error::Result, web::extractors::rustdoc::RustdocParams}; use anyhow::{Context, bail}; use rustwide::{Toolchain, Workspace, cmd::Command}; use semver::VersionReq; @@ -135,6 +137,19 @@ pub(crate) struct Dependency { pub(crate) optional: bool, } +impl Dependency { + pub(crate) fn rustdoc_params(&self) -> RustdocParams { + RustdocParams::new( + // I validated in the database, which makes me assume that renames are + // handled before storing the deps into the column. + self.name + .parse::() + .expect("we validated that the dep name is always a valid KrateName"), + ) + .with_req_version(ReqVersion::Semver(self.req.clone())) + } +} + impl bincode::Encode for Dependency { fn encode( &self, diff --git a/src/utils/html.rs b/src/utils/html.rs index 891655270..f0da4f655 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -18,7 +18,7 @@ use lol_html::{element, errors::RewritingError}; use std::sync::Arc; use tokio::{io::AsyncRead, task::JoinHandle}; use tokio_util::io::ReaderStream; -use tracing::{Span, error, instrument}; +use tracing::{Span, instrument}; use tracing_futures::Instrument as _; const CHANNEL_SIZE: usize = 64; diff --git a/src/web/build_details.rs b/src/web/build_details.rs index bda5334dd..84db06670 100644 --- a/src/web/build_details.rs +++ b/src/web/build_details.rs @@ -76,7 +76,7 @@ pub(crate) async fn build_details_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.build_details_url(id, build_params.filename.as_deref()), diff --git a/src/web/builds.rs b/src/web/builds.rs index a88913bb4..47e041ae0 100644 --- a/src/web/builds.rs +++ b/src/web/builds.rs @@ -1,4 +1,5 @@ use crate::build_queue::PRIORITY_MANUAL_FROM_CRATES_IO; +use crate::db::types::krate_name::KrateName; use crate::{ AsyncBuildQueue, Config, db::{ @@ -70,7 +71,7 @@ pub(crate) async fn build_list_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.builds_url(), @@ -105,7 +106,7 @@ pub(crate) async fn build_list_handler( async fn crate_version_exists( conn: &mut sqlx::PgConnection, - name: &String, + name: &KrateName, version: &Version, ) -> Result { let row = sqlx::query!( @@ -125,7 +126,7 @@ async fn crate_version_exists( async fn build_trigger_check( conn: &mut sqlx::PgConnection, - name: &String, + name: &KrateName, version: &Version, build_queue: &Arc, ) -> AxumResult { @@ -145,7 +146,7 @@ async fn build_trigger_check( } pub(crate) async fn build_trigger_rebuild_handler( - Path((name, version)): Path<(String, Version)>, + Path((name, version)): Path<(KrateName, Version)>, mut conn: DbConnection, Extension(build_queue): Extension>, Extension(config): Extension>, diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 8e89e4614..331c0c3dc 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -489,7 +489,7 @@ pub(crate) async fn crate_details_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.crate_details_url(), @@ -660,7 +660,7 @@ pub(crate) async fn get_all_platforms_inner( AxumNope::Redirect( params .clone() - .with_confirmed_name(Some(corrected_name)) + .with_name(corrected_name) .with_req_version(req_version) .platforms_partial_url(), CachePolicy::NoCaching, @@ -669,7 +669,7 @@ pub(crate) async fn get_all_platforms_inner( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.platforms_partial_url(), @@ -2254,7 +2254,7 @@ path = "src/lib.rs" .await?; let resp = env.web_app().await.get("/crate/dummy%3E").await?; - assert_eq!(resp.status(), StatusCode::NOT_FOUND); + assert_eq!(resp.status(), StatusCode::BAD_REQUEST); Ok(()) }) diff --git a/src/web/error.rs b/src/web/error.rs index 70dec0844..db9ed92a4 100644 --- a/src/web/error.rs +++ b/src/web/error.rs @@ -10,7 +10,6 @@ use axum::{ response::{IntoResponse, Response as AxumResponse}, }; use std::borrow::Cow; -use tracing::error; #[derive(Debug, thiserror::Error)] pub enum AxumNope { diff --git a/src/web/extractors/rustdoc.rs b/src/web/extractors/rustdoc.rs index 347bd49ab..5e26421c2 100644 --- a/src/web/extractors/rustdoc.rs +++ b/src/web/extractors/rustdoc.rs @@ -49,8 +49,7 @@ pub(crate) struct RustdocParams { page_kind: Option, original_uri: Option, - name: String, - confirmed_name: Option, + name: KrateName, req_version: ReqVersion, doc_target: Option, inner_path: Option, @@ -69,7 +68,6 @@ impl std::fmt::Debug for RustdocParams { .field("page_kind", &self.page_kind) .field("original_uri", &self.original_uri) .field("name", &self.name) - .field("confirmed_name", &self.confirmed_name) .field("req_version", &self.req_version) .field("doc_target", &self.doc_target) .field("inner_path", &self.inner_path) @@ -109,8 +107,8 @@ impl std::fmt::Debug for RustdocParams { /// so this extractor can be used in many handlers with a variety of /// specificity of the route. #[derive(Deserialize, Debug)] -struct UrlParams { - pub name: String, +pub(crate) struct UrlParams { + pub name: KrateName, #[serde(default)] pub version: ReqVersion, pub target: Option, @@ -141,34 +139,21 @@ where let original_uri = parts.extract::().await.expect("infallible extractor"); - let static_route_suffix = { - let uri_path = url_decode(original_uri.path()).map_err(AxumNope::BadRequest)?; - - let matched_path = parts - .extract::() - .await - .map_err(|err| AxumNope::BadRequest(err.into()))?; - let matched_route = url_decode(matched_path.as_str()).map_err(AxumNope::BadRequest)?; - - find_static_route_suffix(&matched_route, &uri_path) - }; + let matched_path = parts + .extract::() + .await + .map_err(|err| AxumNope::BadRequest(err.into()))?; - Ok(RustdocParams::new(params.name) - .with_req_version(params.version) - .with_maybe_doc_target(params.target) - .with_maybe_inner_path(params.path) - .with_original_uri(original_uri) - .with_maybe_static_route_suffix(static_route_suffix)) + Ok(Self::from_parts(params, original_uri, matched_path)?) } } /// Builder-style methods to create & update the parameters. #[allow(dead_code)] impl RustdocParams { - pub(crate) fn new(name: impl Into) -> Self { + pub(crate) fn new(name: impl Into) -> Self { Self { - name: name.into().trim().into(), - confirmed_name: None, + name: name.into(), req_version: ReqVersion::default(), original_uri: None, doc_target: None, @@ -182,6 +167,30 @@ impl RustdocParams { } } + /// create RustdocParams with the given parts. + /// + /// Useful when you don't want to use struct as extractor directly, + /// for example when you manually change things. + pub(crate) fn from_parts( + params: UrlParams, + original_uri: Uri, + matched_path: MatchedPath, + ) -> Result { + let static_route_suffix = { + let uri_path = url_decode(original_uri.path())?; + let matched_route = url_decode(matched_path.as_str())?; + + find_static_route_suffix(&matched_route, &uri_path) + }; + + Ok(RustdocParams::new(params.name) + .with_req_version(params.version) + .with_maybe_doc_target(params.target) + .with_maybe_inner_path(params.path) + .with_original_uri(original_uri) + .with_maybe_static_route_suffix(static_route_suffix)) + } + fn try_update(self, f: F) -> Result where F: FnOnce(Self) -> Result, @@ -203,12 +212,11 @@ impl RustdocParams { } pub(crate) fn from_metadata(metadata: &MetaData) -> Self { - RustdocParams::new(metadata.name.to_string()).apply_metadata(metadata) + RustdocParams::new(metadata.name.clone()).apply_metadata(metadata) } pub(crate) fn apply_metadata(self, metadata: &MetaData) -> RustdocParams { - self.with_name(metadata.name.to_string()) - .with_confirmed_name(Some(metadata.name.clone())) + self.with_name(metadata.name.clone()) .with_req_version(&metadata.req_version) // first set the doc-target list .with_maybe_doc_targets(metadata.doc_targets.clone()) @@ -218,41 +226,24 @@ impl RustdocParams { } pub(crate) fn from_matched_release(matched_release: &MatchedRelease) -> Self { - RustdocParams::new(matched_release.name.to_string()).apply_matched_release(matched_release) + RustdocParams::new(matched_release.name.clone()).apply_matched_release(matched_release) } pub(crate) fn apply_matched_release(self, matched_release: &MatchedRelease) -> RustdocParams { let release = &matched_release.release; - self.with_name(matched_release.name.to_string()) - .with_confirmed_name(Some(matched_release.name.clone())) + self.with_name(matched_release.name.clone()) .with_req_version(&matched_release.req_version) .with_maybe_doc_targets(release.doc_targets.as_deref()) .with_maybe_default_target(release.default_target.as_deref()) .with_maybe_target_name(release.target_name.as_deref()) } - pub(crate) fn name(&self) -> &str { + pub(crate) fn name(&self) -> &KrateName { &self.name } - pub(crate) fn with_name(self, name: impl Into) -> Self { + pub(crate) fn with_name(self, name: impl Into) -> Self { self.update(|mut params| { - params.name = name.into().trim().into(); - params - }) - } - - pub(crate) fn confirmed_name(&self) -> Option<&KrateName> { - self.confirmed_name.as_ref() - } - pub(crate) fn with_confirmed_name(self, confirmed_name: Option>) -> Self { - self.update(|mut params| { - let confirmed_name = confirmed_name.map(Into::into); - - if let Some(ref confirmed_name) = confirmed_name { - params.name = confirmed_name.to_string(); - } - - params.confirmed_name = confirmed_name; + params.name = name.into(); params }) } @@ -921,7 +912,9 @@ mod tests { use axum::{Router, routing::get}; use test_case::test_case; - static KRATE: &str = "krate"; + const KRATE: KrateName = KrateName::from_static("krate"); + const DUMMY: KrateName = KrateName::from_static("dummy"); + const CLAP: KrateName = KrateName::from_static("clap"); const VERSION: Version = Version::new(0, 1, 0); static DEFAULT_TARGET: &str = "x86_64-unknown-linux-gnu"; static OTHER_TARGET: &str = "x86_64-pc-windows-msvc"; @@ -987,7 +980,7 @@ mod tests { )] #[test_case( "/{name}/{version}/clapproc%20%60macro.html", - RustdocParams::new("clap") + RustdocParams::new(CLAP) .try_with_original_uri("/clap/latest/clapproc%20%60macro.html").unwrap() .with_static_route_suffix("clapproc `macro.html"); "name, version, static suffix with some urlencoding" @@ -1217,10 +1210,10 @@ mod tests { .try_with_original_uri(&dummy_path[..]) .unwrap() .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); - assert_eq!(parsed.name(), KRATE); + assert_eq!(parsed.name(), &KRATE); assert_eq!(parsed.req_version(), &ReqVersion::Latest); assert_eq!(parsed.doc_target(), expected_target); assert_eq!(parsed.inner_path(), expected_path); @@ -1241,7 +1234,7 @@ mod tests { #[test_case("something.html", Some("html"); "plain file")] #[test_case("", None)] fn test_generate_fallback_search(path: &str, search: Option<&str>) { - let mut params = RustdocParams::new("dummy") + let mut params = RustdocParams::new(DUMMY) .try_with_req_version("0.4.0") .unwrap() // non-default target, target stays in the url @@ -1275,7 +1268,7 @@ mod tests { #[test] fn test_parse_source() { - let params = RustdocParams::new("dummy") + let params = RustdocParams::new(DUMMY) .try_with_req_version("0.4.0") .unwrap() .with_inner_path("README.md") @@ -1366,7 +1359,7 @@ mod tests { #[test] fn test_case_1() { - let params = RustdocParams::new("dummy") + let params = RustdocParams::new(DUMMY) .try_with_req_version("0.2.0") .unwrap() .with_doc_target("dummy") @@ -1485,36 +1478,41 @@ mod tests { "just other target" )] #[test_case( - Some(KRATE), Some(DEFAULT_TARGET), + Some(&KRATE), Some(DEFAULT_TARGET), Some(DEFAULT_TARGET), None => format!("{KRATE}/"); "full with default target, target name is used" )] #[test_case( - Some(KRATE), Some(DEFAULT_TARGET), + Some(&KRATE), Some(DEFAULT_TARGET), Some(OTHER_TARGET), None => format!("{OTHER_TARGET}/{KRATE}/"); "full with other target, target name is used" )] #[test_case( - Some(KRATE), Some(DEFAULT_TARGET), + Some(&KRATE), Some(DEFAULT_TARGET), Some(DEFAULT_TARGET), Some("inner/something.html") => "inner/something.html"; "full with default target, target name is ignored" )] #[test_case( - Some(KRATE), Some(DEFAULT_TARGET), + Some(&KRATE), Some(DEFAULT_TARGET), Some(OTHER_TARGET), Some("inner/something.html") => format!("{OTHER_TARGET}/inner/something.html"); "full with other target, target name is ignored" )] fn test_rustdoc_path_for_url( - target_name: Option<&str>, + target_name: Option<&KrateName>, default_target: Option<&str>, doc_target: Option<&str>, inner_path: Option<&str>, ) -> String { - generate_rustdoc_path_for_url(target_name, default_target, doc_target, inner_path) + generate_rustdoc_path_for_url( + target_name.map(|n| n.to_string()).as_deref(), + default_target, + doc_target, + inner_path, + ) } #[test] @@ -1525,7 +1523,7 @@ mod tests { .with_inner_path("path_add") .with_static_route_suffix("static.html") .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); // without page kind, rustdoc path doesn' thave a path, and static suffix ignored @@ -1563,7 +1561,7 @@ mod tests { .with_static_route_suffix("static.html") .with_doc_target(OTHER_TARGET) .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); // without page kind, rustdoc path doesn' thave a path, and static suffix ignored @@ -1605,7 +1603,7 @@ mod tests { #[test] fn test_debug_output() { - let params = RustdocParams::new("dummy") + let params = RustdocParams::new(&DUMMY) .try_with_req_version("0.2.0") .unwrap() .with_inner_path("struct.Dummy.html") @@ -1646,7 +1644,7 @@ mod tests { // it to the inner_path. let params = params .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); dbg!(¶ms); @@ -1672,7 +1670,7 @@ mod tests { .unwrap() .with_inner_path("dummy/struct.Dummy.html") .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); assert_eq!(params.doc_target(), None); @@ -1693,7 +1691,7 @@ mod tests { .unwrap() .with_inner_path(format!("{OTHER_TARGET}/dummy/struct.Dummy.html")) .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); assert_eq!(params.doc_target(), Some(OTHER_TARGET)); @@ -1713,14 +1711,14 @@ mod tests { .try_with_original_uri(format!("/{KRATE}/latest/{KRATE}")) .unwrap() .with_req_version(ReqVersion::Latest) - .with_doc_target(KRATE) + .with_doc_target(KRATE.to_string()) ); assert_eq!(params.rustdoc_url(), "/krate/latest/krate/"); let params = dbg!( params - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_default_target(DEFAULT_TARGET) .with_doc_targets(TARGETS.iter().cloned()) ); @@ -1732,7 +1730,7 @@ mod tests { #[test_case("other_path", "/krate/latest/krate/other_path"; "without .html")] #[test_case("other_path.html", "/krate/latest/krate/other_path.html"; "with .html")] #[test_case("settings.html", "/krate/latest/settings.html"; "static routes")] - #[test_case(KRATE, "/krate/latest/krate/"; "same as target name, without slash")] + #[test_case("krate", "/krate/latest/krate/"; "same as target name, without slash")] fn test_redirect_some_odd_paths_we_saw(inner_path: &str, expected_url: &str) { // test for https://github.com/rust-lang/docs.rs/issues/2989 let params = RustdocParams::new(KRATE) @@ -1743,7 +1741,7 @@ mod tests { .with_maybe_doc_target(None::) .with_inner_path(inner_path) .with_default_target(DEFAULT_TARGET) - .with_target_name(KRATE) + .with_target_name(KRATE.to_string()) .with_doc_targets(TARGETS.iter().cloned()); dbg!(¶ms); @@ -1762,7 +1760,7 @@ mod tests { let params = RustdocParams::new(KRATE) .with_page_kind(PageKind::Rustdoc) .with_req_version(ReqVersion::Exact(ver)) - .with_doc_target(KRATE) + .with_doc_target(KRATE.to_string()) .with_inner_path("trait.Itertools.html"); dbg!(¶ms); diff --git a/src/web/features.rs b/src/web/features.rs index aee4b310d..bebcd43c0 100644 --- a/src/web/features.rs +++ b/src/web/features.rs @@ -1,5 +1,5 @@ use crate::{ - db::types::Feature as DbFeature, + db::types::{Feature as DbFeature, krate_name::KrateName}, impl_axum_webpage, web::{ MetaData, ReqVersion, @@ -104,6 +104,12 @@ impl FeaturesPage { .get(dependency) .unwrap_or(&ReqVersion::Latest) } + fn dependency_params(&self, dependency: &str) -> Option { + let version = self.dependency_version(dependency); + let dependency: KrateName = dependency.parse().ok()?; + + Some(RustdocParams::new(dependency).with_req_version(version)) + } } impl_axum_webpage! { @@ -152,7 +158,7 @@ pub(crate) async fn build_features_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.features_url(), diff --git a/src/web/mod.rs b/src/web/mod.rs index 19398cae1..91cc741fe 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -27,7 +27,7 @@ pub(crate) mod crate_details; mod csp; pub(crate) mod error; mod escaped_uri; -mod extractors; +pub(crate) mod extractors; mod features; mod file; pub(crate) mod headers; diff --git a/src/web/releases.rs b/src/web/releases.rs index 735bf2492..0b5a2ae77 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -2,6 +2,7 @@ use super::cache::CachePolicy; use crate::build_queue::PRIORITY_CONTINUOUS; +use crate::db::types::krate_name::KrateName; use crate::{ AsyncBuildQueue, Config, RegistryApi, build_queue::QueuedCrate, @@ -47,7 +48,7 @@ const RELEASES_IN_FEED: i64 = 150; #[derive(Debug, Clone, PartialEq, Eq, Serialize)] pub struct Release { - pub(crate) name: String, + pub(crate) name: KrateName, pub(crate) version: Version, pub(crate) description: Option, pub(crate) target_name: Option, @@ -59,7 +60,7 @@ pub struct Release { impl Release { pub fn rustdoc_params(&self) -> RustdocParams { - RustdocParams::new(&self.name) + RustdocParams::new(self.name.clone()) .with_req_version(self.version.clone()) .with_maybe_target_name(self.target_name.clone()) } @@ -142,7 +143,7 @@ pub(crate) enum ReleaseStatus { Available(Release), External(&'static OfficialCrateDescription), /// Only contains the crate name. - NotAvailable(String), + NotAvailable(KrateName), } struct SearchResult { @@ -165,7 +166,12 @@ async fn get_search_results( let names = Arc::new( crates .into_iter() - .map(|krate| krate.name) + .map(|krate| { + krate + .name + .parse::() + .expect("results from crates.io always pass the check") + }) .collect::>(), ); @@ -176,9 +182,9 @@ async fn get_search_results( // So for now we are using the version with the youngest release_time. // This is different from all other release-list views where we show // our latest build. - let mut crates: HashMap = sqlx::query!( + let mut crates: HashMap = sqlx::query!( r#"SELECT - crates.name, + crates.name as "name: KrateName", releases.version as "version: Version", releases.description, release_build_status.last_build_time, @@ -201,7 +207,7 @@ async fn get_search_results( WHERE crates.name = ANY($1) AND release_build_status.build_status <> 'in_progress'"#, - &names[..], + &names[..] as _, ) .fetch(&mut *conn) .map_ok(|row| { @@ -226,11 +232,13 @@ async fn get_search_results( // extend with the release/build information from docs.rs // Crates that are not on docs.rs yet will not be returned. let mut results = Vec::new(); - if let Some(desc) = super::rustdoc::DOC_RUST_LANG_ORG_REDIRECTS.get(query) { + if let Ok(krate) = query.parse::() + && let Some(desc) = super::rustdoc::DOC_RUST_LANG_ORG_REDIRECTS.get(&krate) + { results.push(ReleaseStatus::External(desc)); } - let names: Vec = + let names: Vec = Arc::into_inner(names).expect("Arc still borrowed in `get_search_results`"); results.extend(names.into_iter().map(|name| { if let Some(release) = crates.remove(&name) { @@ -464,13 +472,13 @@ async fn redirect_to_random_crate( // // If random-crate-searches end up being empty, increase that value. let row = sqlx::query!( - "WITH params AS ( + r#"WITH params AS ( -- get maximum possible id-value in crates-table SELECT last_value AS max_id FROM crates_id_seq ) SELECT - crates.name, - releases.version, + crates.name as "name: KrateName", + releases.version as "version: Version", releases.target_name FROM ( -- generate random numbers in the ID-range. @@ -483,7 +491,7 @@ async fn redirect_to_random_crate( WHERE releases.rustdoc_status = TRUE AND repositories.stars >= 100 - LIMIT 1", + LIMIT 1"#, config.random_crate_search_view_size as i32, ) .fetch_optional(&mut *conn) @@ -493,12 +501,8 @@ async fn redirect_to_random_crate( if let Some(row) = row { otel_metrics.im_feeling_lucky_searches.add(1, &[]); - let params = RustdocParams::new(&row.name) - .with_req_version(ReqVersion::Exact( - row.version - .parse() - .context("could not parse version releases table")?, - )) + let params = RustdocParams::new(row.name.clone()) + .with_req_version(ReqVersion::Exact(row.version.clone())) .with_maybe_target_name(row.target_name.as_deref()); trace!(?row, ?params, "redirecting to random crate result"); @@ -712,7 +716,7 @@ struct BuildQueuePage { description: &'static str, queue: Vec, rebuild_queue: Vec, - in_progress_builds: Vec<(String, Version)>, + in_progress_builds: Vec<(KrateName, Version)>, expand_rebuild_queue: bool, } @@ -728,9 +732,9 @@ pub(crate) async fn build_queue_handler( mut conn: DbConnection, Query(params): Query, ) -> AxumResult { - let in_progress_builds: Vec<(String, Version)> = sqlx::query!( + let in_progress_builds: Vec<(KrateName, Version)> = sqlx::query!( r#"SELECT - crates.name, + crates.name as "name: KrateName", releases.version as "version: Version" FROM builds INNER JOIN releases ON releases.id = builds.rid @@ -826,7 +830,7 @@ mod tests { vec!["foo"], releases .iter() - .map(|release| release.name.as_str()) + .map(|release| release.name.to_string()) .collect::>(), ); @@ -893,7 +897,7 @@ mod tests { ], releases .iter() - .map(|release| release.name.as_str()) + .map(|release| release.name.to_string()) .collect::>(), ); diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 9fd2179ab..59cc09107 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -18,7 +18,7 @@ use crate::{ escaped_uri::EscapedURI, extractors::{ DbConnection, Path, WantedCompression, - rustdoc::{PageKind, RustdocParams}, + rustdoc::{PageKind, RustdocParams, UrlParams}, }, file::StreamingFile, headers::{ETagComputer, IfNoneMatch, X_ROBOTS_TAG}, @@ -34,7 +34,7 @@ use anyhow::{Context as _, anyhow}; use askama::Template; use axum::{ body::Body, - extract::{Extension, Query, RawQuery}, + extract::{Extension, MatchedPath, Query, RawQuery}, http::StatusCode, response::{IntoResponse, Response as AxumResponse}, }; @@ -46,6 +46,7 @@ use http::{HeaderMap, HeaderValue, Uri, header::CONTENT_DISPOSITION, uri::Author use serde::Deserialize; use std::{ collections::HashMap, + iter, sync::{Arc, LazyLock}, }; use tracing::{Instrument, error, info_span, instrument, trace}; @@ -67,140 +68,78 @@ fn generate_content_disposition_header(storage_path: &str) -> anyhow::Result, pub(crate) href: Uri, pub(crate) description: &'static str, } -pub(crate) static DOC_RUST_LANG_ORG_REDIRECTS: LazyLock> = - LazyLock::new(|| { - HashMap::from([ - ( - "alloc", - OfficialCrateDescription { - name: "alloc", - href: "https://doc.rust-lang.org/stable/alloc/".parse().unwrap(), - description: "Rust alloc library", - }, - ), - ( - "liballoc", - OfficialCrateDescription { - name: "alloc", - href: "https://doc.rust-lang.org/stable/alloc/".parse().unwrap(), - description: "Rust alloc library", - }, - ), - ( - "core", - OfficialCrateDescription { - name: "core", - href: "https://doc.rust-lang.org/stable/core/".parse().unwrap(), - description: "Rust core library", - }, - ), - ( - "libcore", - OfficialCrateDescription { - name: "core", - href: "https://doc.rust-lang.org/stable/core/".parse().unwrap(), - description: "Rust core library", - }, - ), - ( - "proc_macro", - OfficialCrateDescription { - name: "proc_macro", - href: "https://doc.rust-lang.org/stable/proc_macro/" - .parse() - .unwrap(), - description: "Rust proc_macro library", - }, - ), - ( - "libproc_macro", - OfficialCrateDescription { - name: "proc_macro", - href: "https://doc.rust-lang.org/stable/proc_macro/" - .parse() - .unwrap(), - description: "Rust proc_macro library", - }, - ), - ( - "proc-macro", - OfficialCrateDescription { - name: "proc_macro", - href: "https://doc.rust-lang.org/stable/proc_macro/" - .parse() - .unwrap(), - description: "Rust proc_macro library", - }, - ), - ( - "libproc-macro", - OfficialCrateDescription { - name: "proc_macro", - href: "https://doc.rust-lang.org/stable/proc_macro/" - .parse() - .unwrap(), - description: "Rust proc_macro library", - }, - ), - ( - "std", - OfficialCrateDescription { - name: "std", - href: "https://doc.rust-lang.org/stable/std/".parse().unwrap(), - description: "Rust standard library", - }, - ), - ( - "libstd", - OfficialCrateDescription { - name: "std", - href: "https://doc.rust-lang.org/stable/std/".parse().unwrap(), - description: "Rust standard library", - }, - ), - ( - "test", - OfficialCrateDescription { - name: "test", - href: "https://doc.rust-lang.org/stable/test/".parse().unwrap(), - description: "Rust test library", - }, - ), - ( - "libtest", - OfficialCrateDescription { - name: "test", - href: "https://doc.rust-lang.org/stable/test/".parse().unwrap(), - description: "Rust test library", - }, - ), - ( - "rustc", - OfficialCrateDescription { - name: "rustc", - href: "https://doc.rust-lang.org/nightly/nightly-rustc/" - .parse() - .unwrap(), - description: "rustc API", - }, - ), - ( - "rustdoc", - OfficialCrateDescription { - name: "rustdoc", - href: "https://doc.rust-lang.org/nightly/nightly-rustc/rustdoc/" - .parse() - .unwrap(), - description: "rustdoc API", - }, - ), - ]) - }); +pub(crate) static DOC_RUST_LANG_ORG_REDIRECTS: LazyLock< + HashMap, +> = LazyLock::new(|| { + HashMap::from_iter( + [ + OfficialCrateDescription { + name: "alloc".parse().unwrap(), + aliases: vec!["liballoc".parse().unwrap()], + href: "https://doc.rust-lang.org/stable/alloc/".parse().unwrap(), + description: "Rust alloc library", + }, + OfficialCrateDescription { + name: "core".parse().unwrap(), + aliases: vec!["libcore".parse().unwrap()], + href: "https://doc.rust-lang.org/stable/core/".parse().unwrap(), + description: "Rust core library", + }, + OfficialCrateDescription { + name: "proc_macro".parse().unwrap(), + aliases: vec![ + "libproc_macro".parse().unwrap(), + "proc-macro".parse().unwrap(), + "libproc-macro".parse().unwrap(), + ], + href: "https://doc.rust-lang.org/stable/proc_macro/" + .parse() + .unwrap(), + description: "Rust proc_macro library", + }, + OfficialCrateDescription { + name: "std".parse().unwrap(), + aliases: vec!["libstd".parse().unwrap()], + href: "https://doc.rust-lang.org/stable/std/".parse().unwrap(), + description: "Rust standard library", + }, + OfficialCrateDescription { + name: "test".parse().unwrap(), + aliases: vec!["libtest".parse().unwrap()], + href: "https://doc.rust-lang.org/stable/test/".parse().unwrap(), + description: "Rust test library", + }, + OfficialCrateDescription { + name: "rustc".parse().unwrap(), + aliases: vec![], + href: "https://doc.rust-lang.org/nightly/nightly-rustc/" + .parse() + .unwrap(), + description: "rustc API", + }, + OfficialCrateDescription { + name: "rustdoc".parse().unwrap(), + aliases: vec![], + href: "https://doc.rust-lang.org/nightly/nightly-rustc/rustdoc/" + .parse() + .unwrap(), + description: "rustdoc API", + }, + ] + .into_iter() + .flat_map(|desc| { + iter::once(desc.name.clone()) + .chain(desc.aliases.clone()) + .map(move |name| (name, desc.clone())) + }), + ) +}); /// try to serve a toolchain specific asset from the legacy location. /// @@ -231,18 +170,32 @@ async fn try_serve_legacy_toolchain_asset( .into_response(if_none_match, STATIC_ASSET_CACHE_POLICY)) } +/// Intermediate struct to accept more variants than +/// `RustdocParams` would accept. +/// +/// After we handled the edge cases we convert this struct +/// into `RustdocParams`. +#[derive(Debug, Deserialize)] +pub(crate) struct RustdocRedirectorParams { + name: String, + #[serde(default)] + version: ReqVersion, + target: Option, +} + /// Handler called for `/:crate` and `/:crate/:version` URLs. Automatically redirects to the docs /// or crate details page based on whether the given crate version was successfully built. +#[allow(clippy::too_many_arguments)] #[instrument(skip(storage, conn))] pub(crate) async fn rustdoc_redirector_handler( - params: RustdocParams, + Path(params): Path, + original_uri: Uri, + matched_path: MatchedPath, Extension(storage): Extension>, mut conn: DbConnection, if_none_match: Option>, RawQuery(original_query): RawQuery, ) -> AxumResult { - let params = params.with_page_kind(PageKind::Rustdoc); - fn redirect_to_doc( original_uri: Option<&EscapedURI>, url: EscapedURI, @@ -272,21 +225,22 @@ pub(crate) async fn rustdoc_redirector_handler( Ok(axum_cached_redirect(url, cache_policy)?) } + // edge case 1: // global static assets for older builds are served from the root, which ends up // in this handler as `params.name`. - if let Some((_, extension)) = params.name().rsplit_once('.') + if let Some((_, extension)) = params.name.rsplit_once('.') && ["css", "js", "png", "svg", "woff", "woff2"] .binary_search(&extension) .is_ok() { - return try_serve_legacy_toolchain_asset(storage, params.name(), if_none_match.as_deref()) + return try_serve_legacy_toolchain_asset(storage, ¶ms.name, if_none_match.as_deref()) .instrument(info_span!("serve static asset")) .await; } - if let Some(extension) = params.file_extension() - && extension == "ico" - { + // edge case 2: + // Redirect all `.ico` requests to the global favicon location. + if original_uri.path().to_lowercase().ends_with(".ico") { // redirect all ico requests // originally from: // https://github.com/rust-lang/docs.rs/commit/f3848a34c391841a2516a9e6ad1f80f6f490c6d0 @@ -296,36 +250,64 @@ pub(crate) async fn rustdoc_redirector_handler( )?); } - let (crate_name, path_in_crate) = match params.name().split_once("::") { + // edge case 3: + // we split `{krate}::{what_to_search}` here from the `{name}` param. + let (crate_name, path_in_crate) = match params.name.split_once("::") { Some((krate, path)) => (krate.to_owned(), Some(path.to_owned())), - None => (params.name().to_owned(), None), + None => (params.name.clone(), None), }; - if let Some(description) = DOC_RUST_LANG_ORG_REDIRECTS.get(&*crate_name) { + // If we're here, we only should have valid crate names. + let crate_name: KrateName = crate_name + .parse() + .context("couldn't parse crate name") + .map_err(AxumNope::BadRequest)?; + + // edge case 4: + // official rust crates redirect to doc.rust-lang.org + if let Some(description) = DOC_RUST_LANG_ORG_REDIRECTS.get(&crate_name) { let target_uri = EscapedURI::from_uri(description.href.clone()).append_raw_query(original_query); - let krate_name: KrateName = crate_name.parse().expect("we know these are valid"); return redirect_to_doc( - params.original_uri(), + Some(&original_uri.into()), target_uri, - CachePolicy::ForeverInCdnAndStaleInBrowser(krate_name.into()), + CachePolicy::ForeverInCdnAndStaleInBrowser(crate_name.into()), path_in_crate.as_deref(), ); } + // after we handled the edge cases above we can generate our "normal" + // `RustdocParam`. + let params = RustdocParams::from_parts( + UrlParams { + name: crate_name.clone(), + version: params.version, + target: params.target, + path: None, + }, + original_uri, + matched_path, + ) + .map_err(AxumNope::BadRequest)? + .with_page_kind(PageKind::Rustdoc); + // it doesn't matter if the version that was given was exact or not, since we're redirecting // anyway let matched_release = match_version(&mut conn, &crate_name, ¶ms.req_version().clone()) .await? .into_exactly_named() .into_canonical_req_version(); + + // after `match_version` we should only use `matched_release.name()` and not the initial + // crate_name, because `.into_exactly_named` above might give us a corrected name. + drop(crate_name); + let params = params.apply_matched_release(&matched_release); trace!( ?matched_release, ?params, "parsed params with matched version" ); - let crate_name = matched_release.name.clone(); // we might get requests to crate-specific JS/CSS files here. if params.inner_path().ends_with(".js") || params.inner_path().ends_with(".css") { @@ -336,7 +318,7 @@ pub(crate) async fn rustdoc_redirector_handler( match storage .stream_rustdoc_file( - &crate_name, + params.name(), &krate.version, krate.latest_build_id, inner_path, @@ -379,9 +361,9 @@ pub(crate) async fn rustdoc_redirector_handler( params.original_uri(), params.rustdoc_url().append_raw_query(original_query), if matched_release.is_latest_url() { - CachePolicy::ForeverInCdn(crate_name.into()) + CachePolicy::ForeverInCdn(params.name().into()) } else { - CachePolicy::ForeverInCdnAndStaleInBrowser(crate_name.into()) + CachePolicy::ForeverInCdnAndStaleInBrowser(params.name().into()) }, path_in_crate.as_deref(), )? @@ -389,7 +371,7 @@ pub(crate) async fn rustdoc_redirector_handler( } else { Ok(axum_cached_redirect( params.crate_details_url().append_raw_query(original_query), - CachePolicy::ForeverInCdn(crate_name.into()), + CachePolicy::ForeverInCdn(params.name().into()), )? .into_response()) } @@ -619,7 +601,7 @@ pub(crate) async fn rustdoc_html_server_handler( AxumNope::Redirect( params .clone() - .with_confirmed_name(Some(corrected_name)) + .with_name(corrected_name) .with_req_version(req_version) .rustdoc_url() .append_raw_query(original_query.as_deref()), @@ -627,10 +609,7 @@ pub(crate) async fn rustdoc_html_server_handler( ) })? .into_canonical_req_version_or_else(|confirmed_name, version| { - let params = params - .clone() - .with_confirmed_name(Some(confirmed_name)) - .with_req_version(version); + let params = params.clone().with_req_version(version); AxumNope::Redirect( params.rustdoc_url(), CachePolicy::ForeverInCdn(confirmed_name.into()), @@ -744,7 +723,7 @@ pub(crate) async fn rustdoc_html_server_handler( ) { error!( - krate = params.name(), + krate = %params.name(), version = %krate.version, original_path = params.original_path(), storage_path, @@ -880,7 +859,7 @@ pub(crate) struct BadgeQueryParams { #[instrument(skip_all)] pub(crate) async fn badge_handler( - Path(name): Path, + Path(name): Path, Query(query): Query, ) -> AxumResult { let url = url::Url::parse(&format!( @@ -916,7 +895,7 @@ pub(crate) async fn json_download_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( @@ -1045,7 +1024,7 @@ pub(crate) async fn download_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.zip_download_url(), diff --git a/src/web/source.rs b/src/web/source.rs index 21fb7a959..dc008ead3 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -203,7 +203,7 @@ pub(crate) async fn source_browser_handler( AxumNope::Redirect( params .clone() - .with_confirmed_name(Some(corrected_name)) + .with_name(corrected_name) .with_req_version(req_version) .source_url(), CachePolicy::NoCaching, @@ -212,7 +212,7 @@ pub(crate) async fn source_browser_handler( .into_canonical_req_version_or_else(|confirmed_name, version| { let params = params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version); AxumNope::Redirect( params.source_url(), diff --git a/src/web/status.rs b/src/web/status.rs index 9dbb84ac8..d8bda616d 100644 --- a/src/web/status.rs +++ b/src/web/status.rs @@ -29,7 +29,7 @@ pub(crate) async fn status_handler( AxumNope::Redirect( params .clone() - .with_confirmed_name(Some(confirmed_name)) + .with_name(confirmed_name) .with_req_version(version) .build_status_url(), CachePolicy::NoCaching, diff --git a/templates/crate/features.html b/templates/crate/features.html index 05631048c..8f0c8cc56 100644 --- a/templates/crate/features.html +++ b/templates/crate/features.html @@ -80,19 +80,28 @@

{{ feature.name }}{%- if is_default %} (default){%- {% when SubFeature::Dependency with (dependency) %} {%- let version = dependency_version(dependency) -%} - dep: - {{- dependency -}} - + {%- if let Some(params) = dependency_params(dependency) -%} + dep: + {{- dependency -}} + + {%- else -%} + dep:{{- dependency -}} + {%- endif -%} {% when SubFeature::DependencyFeature with {dependency, feature, optional} %} {%- let version = dependency_version(dependency) -%} - {%- set dependency_params = RustdocParams::new(dependency.to_owned()).with_req_version(version.to_owned()) -%} - + {%- if let Some(params) = dependency_params(dependency) -%} + + {{- dependency -}} + + {%- if optional -%}?{%- endif -%} + / + {{- feature -}} + + {%- else -%} {{- dependency -}} - - {%- if optional -%}?{%- endif -%} - / - {{- feature -}} - + {%- if optional -%}?{%- endif -%} + /{{- feature -}} + {%- endif -%} {%- endmatch %} {%- if is_default %} (default){%- endif -%} diff --git a/templates/macros.html b/templates/macros.html index db8d0462c..f75d450f9 100644 --- a/templates/macros.html +++ b/templates/macros.html @@ -159,7 +159,7 @@ {% macro dependencies_list(dependencies, use_crate_details, if_empty) %} {%- for dep in dependencies -%}
  • - {%- set dependency_params = RustdocParams::new(dep.name.to_owned()).with_req_version(ReqVersion::Semver(dep.req.clone())) -%} + {%- set dependency_params = dep.rustdoc_params() -%} {%- set href -%} {%- if use_crate_details -%} {%- set href = dependency_params.crate_details_url() -%}