diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 09a9cc13..9af97a39 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -28,6 +28,8 @@ jobs: cargo build -p rustyms --no-default-features --features rand cargo build -p rustyms --no-default-features --features rayon cargo build -p rustyms --no-default-features --features mzdata + cargo build -p rustyms --no-default-features --features glycan-render + cargo build -p rustyms --no-default-features --features glycan-render-bitmap fmt: runs-on: ubuntu-latest diff --git a/.github/workflows/scripts/update-all-databases.sh b/.github/workflows/scripts/update-all-databases.sh index 06e6dbd3..51f37bf8 100644 --- a/.github/workflows/scripts/update-all-databases.sh +++ b/.github/workflows/scripts/update-all-databases.sh @@ -41,11 +41,9 @@ function make-ontologies { curl https://raw.githubusercontent.com/HUPO-PSI/mzIdentML/master/cv/XLMOD.obo \ > ${db_data}/XLMOD.obo curl -L http://purl.obolibrary.org/obo/gno.obo \ - | sed '/(property_value: GNO:00000(022|023|041|042|101|102) .*$\n)|(def: .*$\n)/d' \ - | gzip -c \ - > ${db_data}/GNOme.obo.gz + > ${db_data}/GNOme.obo curl -L https://glycosmos.org/download/glycosmos_glycans_list.csv \ - | gzip -c > ${db_data}/glycosmos_glycans_list.csv.gz + > ${db_data}/glycosmos_glycans_list.csv echo "Serializing the other databases..." diff --git a/.gitignore b/.gitignore index aeaa2116..259ef0c0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ Cargo.lock errors.dat *.dat.Z *.dat +*.html GNOme.obo glycosmos_glycans_list.csv .venv/ diff --git a/Cargo.toml b/Cargo.toml index 2b0e5225..f5afd39b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,15 +27,17 @@ codegen-units = 1 [workspace.dependencies] afl = "0.15" +base64 = "0.22" bincode = "1.3" clap = { version = "4.5", features = ["derive", "cargo"] } directories = "6.0" flate2 = "1.0" iai-callgrind = "0.14" itertools = "0.14" -mzdata = "0.44" +mzdata = {version="0.48", default-features = false, features = ["miniz_oxide"]} ndarray = "0.16" ordered-float = { version = "4.6", features = ["serde"] } +png = "0.17" probability = "0.20" pyo3 = "0.23" rand = "0.9" @@ -45,8 +47,10 @@ roxmltree = "0.20" serde = { version = "1.0", features = ["derive", "rc"] } serde_json = "1.0" similar = "2.7" +swash = "0.2" thin-vec = { version = "0.2", features = ["serde"] } -uom = { version = "0.36", features = ["use_serde", "usize", "isize"] } +uom = { version = "0.36", default-features = false, features = ["use_serde", "usize", "isize", "f64"] } +zeno = {version = "0.3.2" } [workspace.lints.rust] unexpected_cfgs = { level = "allow", check-cfg = [ diff --git a/examples/de-novo-align/Cargo.toml b/examples/de-novo-align/Cargo.toml index 5d6392dd..0c96605b 100644 --- a/examples/de-novo-align/Cargo.toml +++ b/examples/de-novo-align/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true publish = false [dependencies] -rustyms = { path = "../../rustyms" } +rustyms = { path = "../../rustyms", default-features=false, features = ["align", "identification"] } clap = { workspace = true } itertools = { workspace = true } rayon = { workspace = true } diff --git a/rustyms-generate-databases/src/gnome.rs b/rustyms-generate-databases/src/gnome.rs index 6c114088..09959e21 100644 --- a/rustyms-generate-databases/src/gnome.rs +++ b/rustyms-generate-databases/src/gnome.rs @@ -117,7 +117,7 @@ impl std::str::FromStr for GnoSubsumption { } fn parse_gnome() -> HashMap { - let obo = OboOntology::from_file("rustyms-generate-databases/data/GNOme.obo.gz") + let obo = OboOntology::from_file("rustyms-generate-databases/data/GNOme.obo") .expect("Not a valid obo file"); let mut mods = HashMap::new(); @@ -221,7 +221,7 @@ fn parse_gnome_structures() -> HashMap { let mut glycans = HashMap::new(); let mut errors = 0; for line in parse_csv( - "rustyms-generate-databases/data/glycosmos_glycans_list.csv.gz", + "rustyms-generate-databases/data/glycosmos_glycans_list.csv", b',', None, ) diff --git a/rustyms/Cargo.toml b/rustyms/Cargo.toml index c15bd34c..d9ad91a1 100644 --- a/rustyms/Cargo.toml +++ b/rustyms/Cargo.toml @@ -13,7 +13,6 @@ repository = "https://github.com/snijderlab/rustyms" readme = "README.md" include = [ "src/**/*", - "databases/**/*.gz", "README.md", "build.rs", "benches/**/*", @@ -32,12 +31,17 @@ rayon = { workspace = true, optional = true } regex = { workspace = true } serde = { workspace = true } similar = { workspace = true } +swash = {workspace = true, optional = true} thin-vec = { workspace = true } uom = { workspace = true } +zeno = { workspace = true, optional = true } [dev-dependencies] +base64 = { workspace = true } iai-callgrind = { workspace = true } +png = { workspace = true } serde_json = { workspace = true } +directories = {workspace = true} [features] default = [ @@ -48,11 +52,15 @@ default = [ "isotopes", "rand", "mzdata", + "glycan-render", + "glycan-render-bitmap", ] imgt = [] align = [] identification = [] isotopes = ["probability", "ndarray"] +glycan-render = [] +glycan-render-bitmap = ["zeno", "swash", "glycan-render"] [[bench]] name = "iai" diff --git a/rustyms/README.md b/rustyms/README.md index 05b0fe3d..231e16f0 100644 --- a/rustyms/README.md +++ b/rustyms/README.md @@ -80,3 +80,5 @@ It has multiple features which allow you to slim it down if needed (all are enab * `rand` - allows the generation of random peptides. * `rayon` - enables parallel iterators using rayon, mostly for `imgt` but also in consecutive align. * `mzdata` - enables integration with [mzdata](https://github.com/mobiusklein/mzdata) which has more advanced raw file support. +* `glycan-render` - enables the rendering to SVGs for glycans and glycan fragments +* `glycan-render-bitmap` - enables the rendering to bitmaps for glycans, by enabling the optional dependencies zeno and swash diff --git a/rustyms/data/glycan.mgf b/rustyms/data/glycan.mgf index f57eab92..2d484f60 100644 --- a/rustyms/data/glycan.mgf +++ b/rustyms/data/glycan.mgf @@ -2,7 +2,7 @@ BEGIN IONS PEPMASS=660.2457879192369 CHARGE=1+ TITLE=MS/MS scan at 1.535 min with Intensity: 604.0 -SEQUENCE=N[GlycanStructure:Hex(Hex,HexNAc)] +SEQUENCE=N[G:G01141WK] 189.48956 5050.0 283.62076 5050.0 diff --git a/rustyms/images/glycan_root.svg b/rustyms/images/glycan_root.svg new file mode 100644 index 00000000..579ca532 --- /dev/null +++ b/rustyms/images/glycan_root.svg @@ -0,0 +1 @@ +pepNArg \ No newline at end of file diff --git a/rustyms/src/databases/gnome.dat b/rustyms/src/databases/gnome.dat index 7ddd75b6..f43397aa 100644 Binary files a/rustyms/src/databases/gnome.dat and b/rustyms/src/databases/gnome.dat differ diff --git a/rustyms/src/databases/xlmod.dat b/rustyms/src/databases/xlmod.dat index a35224ac..239d85cd 100644 Binary files a/rustyms/src/databases/xlmod.dat and b/rustyms/src/databases/xlmod.dat differ diff --git a/rustyms/src/fragment.rs b/rustyms/src/fragment.rs index b93b362e..cfd67b9a 100644 --- a/rustyms/src/fragment.rs +++ b/rustyms/src/fragment.rs @@ -2,6 +2,7 @@ use std::{ borrow::Cow, + cmp::Ordering, fmt::{Debug, Display}, }; @@ -9,8 +10,10 @@ use itertools::Itertools; use ordered_float::OrderedFloat; use serde::{Deserialize, Serialize}; +#[cfg(feature = "glycan-render")] +use crate::glycan::GlycanSelection; use crate::{ - glycan::MonoSaccharide, + glycan::{GlycanBranchIndex, GlycanBranchMassIndex, MonoSaccharide}, model::ChargeRange, molecular_charge::{CachedCharge, MolecularCharge}, system::{ @@ -273,7 +276,7 @@ pub struct GlycanPosition { /// The series number (from the ion series terminal) pub series_number: usize, /// The branch naming - pub branch: Vec, + pub branch: Vec<(GlycanBranchIndex, GlycanBranchMassIndex)>, /// The aminoacid index where this glycan is attached pub attachment: Option<(AminoAcid, usize)>, } @@ -286,7 +289,7 @@ impl GlycanPosition { self.branch .iter() .enumerate() - .map(|(i, b)| { + .map(|(i, (_, b))| { if i == 0 { char::from_u32( (0x03B1..=0x03C9) @@ -334,7 +337,7 @@ pub enum DiagnosticPosition { } /// The possible types of fragments -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize, Default)] +#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize, Default)] #[expect(non_camel_case_types)] pub enum FragmentType { /// a @@ -360,7 +363,7 @@ pub enum FragmentType { // glycan A fragment (Never generated) //A(GlycanPosition), /// glycan B fragment - B(GlycanPosition), + // B(GlycanPosition), // glycan C fragment (Never generated) //C(GlycanPosition), // glycan X fragment (Never generated) @@ -369,10 +372,17 @@ pub enum FragmentType { Y(Vec), // glycan Z fragment (Never generated) // Z(GlycanPosition), - /// Internal glycan fragment, meaning both a B and Y breakages (and potentially multiple of both), resulting in a set of monosaccharides - Oxonium(Vec), + /// B glycan fragment, potentially with additional Y breakages + B { + /// The root break + b: GlycanPosition, + /// The branch breakages + y: Vec, + /// All branches that are not broken + end: Vec, + }, /// A B or internal glycan fragment for a glycan where only the composition is known, also saves the attachment (AA + sequence index) - OxoniumComposition(Vec<(MonoSaccharide, isize)>, Option<(AminoAcid, usize)>), + BComposition(Vec<(MonoSaccharide, isize)>, Option<(AminoAcid, usize)>), /// A B or internal glycan fragment for a glycan where only the composition is known, also saves the attachment (AA + sequence index) YComposition(Vec<(MonoSaccharide, isize)>, Option<(AminoAcid, usize)>), /// Immonium ion @@ -394,6 +404,101 @@ pub enum FragmentType { Precursor, } +impl std::cmp::Ord for FragmentType { + fn cmp(&self, other: &Self) -> Ordering { + // Sort of type first (precursor/abcxyz/dw/v) + match (self, other) { + // Peptide + (Self::Precursor, Self::Precursor) => Ordering::Equal, + (Self::Precursor, _) => Ordering::Less, + (_, Self::Precursor) => Ordering::Greater, + (Self::a(s), Self::a(o)) => s.cmp(o), + (Self::a(_), _) => Ordering::Less, + (_, Self::a(_)) => Ordering::Greater, + (Self::b(s), Self::b(o)) => s.cmp(o), + (Self::b(_), _) => Ordering::Less, + (_, Self::b(_)) => Ordering::Greater, + (Self::c(s), Self::c(o)) => s.cmp(o), + (Self::c(_), _) => Ordering::Less, + (_, Self::c(_)) => Ordering::Greater, + (Self::x(s), Self::x(o)) => s.cmp(o), + (Self::x(_), _) => Ordering::Less, + (_, Self::x(_)) => Ordering::Greater, + (Self::y(s), Self::y(o)) => s.cmp(o), + (Self::y(_), _) => Ordering::Less, + (_, Self::y(_)) => Ordering::Greater, + (Self::z(s), Self::z(o)) => s.cmp(o), + (Self::z(_), _) => Ordering::Less, + (_, Self::z(_)) => Ordering::Greater, + (Self::z·(s), Self::z·(o)) => s.cmp(o), + (Self::z·(_), _) => Ordering::Less, + (_, Self::z·(_)) => Ordering::Greater, + (Self::d(s), Self::d(o)) => s.cmp(o), + (Self::d(_), _) => Ordering::Less, + (_, Self::d(_)) => Ordering::Greater, + (Self::w(s), Self::w(o)) => s.cmp(o), + (Self::w(_), _) => Ordering::Less, + (_, Self::w(_)) => Ordering::Greater, + (Self::v(s), Self::v(o)) => s.cmp(o), + (Self::v(_), _) => Ordering::Less, + (_, Self::v(_)) => Ordering::Greater, + (Self::Immonium(s, _), Self::Immonium(o, _)) => s.cmp(o), + (Self::Immonium(_, _), _) => Ordering::Less, + (_, Self::Immonium(_, _)) => Ordering::Greater, + (Self::PrecursorSideChainLoss(s, _), Self::PrecursorSideChainLoss(o, _)) => s.cmp(o), + (Self::PrecursorSideChainLoss(_, _), _) => Ordering::Less, + (_, Self::PrecursorSideChainLoss(_, _)) => Ordering::Greater, + (Self::Internal(st, sa, sb), Self::Internal(ot, oa, ob)) => { + sa.cmp(oa).then(sb.cmp(ob)).then(st.cmp(ot)) + } + (Self::Internal(_, _, _), _) => Ordering::Less, + (_, Self::Internal(_, _, _)) => Ordering::Greater, + // Glycans + (Self::B { b: sb, y: sy, .. }, Self::B { b: ob, y: oy, .. }) => { + sy.len().cmp(&oy.len()).then(sb.cmp(ob)) + } + (Self::Y(s), Self::Y(o)) => s.len().cmp(&o.len()), + (Self::B { y: sy, .. }, Self::Y(o)) => { + (sy.len() + 1).cmp(&o.len()).then(Ordering::Greater) + } + (Self::Y(s), Self::B { y: oy, .. }) => { + s.len().cmp(&(oy.len() + 1)).then(Ordering::Less) + } + (Self::B { .. }, _) => Ordering::Less, + (_, Self::B { .. }) => Ordering::Greater, + (Self::Y(_), _) => Ordering::Less, + (_, Self::Y(_)) => Ordering::Greater, + (Self::BComposition(s, sl), Self::BComposition(o, ol)) + | (Self::YComposition(s, sl), Self::YComposition(o, ol)) => { + s.len().cmp(&o.len()).then(sl.cmp(ol)) + } + (Self::BComposition(s, sl), Self::YComposition(o, ol)) => s + .len() + .cmp(&o.len()) + .then(sl.cmp(ol)) + .then(Ordering::Greater), + (Self::YComposition(s, sl), Self::BComposition(o, ol)) => { + s.len().cmp(&o.len()).then(sl.cmp(ol)).then(Ordering::Less) + } + (Self::BComposition(_, _), _) => Ordering::Less, + (_, Self::BComposition(_, _)) => Ordering::Greater, + (Self::YComposition(_, _), _) => Ordering::Less, + (_, Self::YComposition(_, _)) => Ordering::Greater, + // Other + (Self::Diagnostic(s), Self::Diagnostic(o)) => s.cmp(o), + (Self::Diagnostic(_), _) => Ordering::Less, + (_, Self::Diagnostic(_)) => Ordering::Greater, + (Self::Unknown(s), Self::Unknown(o)) => s.cmp(o), + } + } +} + +impl std::cmp::PartialOrd for FragmentType { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + impl FragmentType { /// Get the position of this ion (or None if it is a precursor ion) pub const fn position(&self) -> Option<&PeptidePosition> { @@ -415,10 +520,31 @@ impl FragmentType { } } - /// Get the glycan position of this ion (or None not applicable) + /// Get the root glycan position of this ion (or None if not applicable), Y is not defined as it does not have a root break pub const fn glycan_position(&self) -> Option<&GlycanPosition> { match self { - Self::B(n) | Self::Diagnostic(DiagnosticPosition::Glycan(n, _)) => Some(n), + Self::Diagnostic(DiagnosticPosition::Glycan(b, _)) | Self::B { b, .. } => Some(b), + _ => None, + } + } + + /// Get the glycan break positions of this ion (or None if not applicable), gives the sequence index, the root break, and the branch breaks. + /// Only available with feature 'glycan-render'. + #[cfg(feature = "glycan-render")] + pub fn glycan_break_positions(&self) -> Option<(Option, GlycanSelection<'_>)> { + match self { + Self::Diagnostic(DiagnosticPosition::Glycan(n, _)) => Some(( + n.attachment.map(|(_, p)| p), + GlycanSelection::SingleSugar(n), + )), + Self::Y(breaks) => Some(( + breaks.first().and_then(|p| p.attachment.map(|(_, p)| p)), + GlycanSelection::Subtree(None, breaks), + )), + Self::B { b, y, .. } => Some(( + b.attachment.map(|(_, p)| p), + GlycanSelection::Subtree(Some(b), y), + )), _ => None, } } @@ -439,15 +565,16 @@ impl FragmentType { | Self::Diagnostic(DiagnosticPosition::Peptide(n, _)) | Self::Immonium(n, _) | Self::PrecursorSideChainLoss(n, _) => Some(n.series_number.to_string()), - Self::B(n) | Self::Diagnostic(DiagnosticPosition::Glycan(n, _)) => Some(n.label()), + Self::Diagnostic(DiagnosticPosition::Glycan(n, _)) => Some(n.label()), Self::Y(bonds) => Some(bonds.iter().map(GlycanPosition::label).join("")), - Self::Oxonium(breakages) => Some( - breakages - .iter() - .map(std::string::ToString::to_string) - .join(""), + Self::B { b, y, end } => Some( + b.label() + + &y.iter() + .chain(end.iter()) + .map(GlycanPosition::label) + .join(""), ), - Self::YComposition(sugars, _) | Self::OxoniumComposition(sugars, _) => Some( + Self::YComposition(sugars, _) | Self::BComposition(sugars, _) => Some( sugars .iter() .map(|(sugar, amount)| format!("{sugar}{amount}")) @@ -479,7 +606,6 @@ impl FragmentType { Self::y(_) => Cow::Borrowed("y"), Self::z(_) => Cow::Borrowed("z"), Self::z·(_) => Cow::Borrowed("z·"), - Self::B(_) => Cow::Borrowed("B"), Self::Y(_) | Self::YComposition(_, _) => Cow::Borrowed("Y"), Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => { Cow::Owned(format!("d{}", aa.char())) @@ -490,7 +616,7 @@ impl FragmentType { DiagnosticPosition::Glycan(_, sug) | DiagnosticPosition::GlycanCompositional(sug, _), ) => Cow::Owned(format!("d{sug}")), - Self::Oxonium(_) | Self::OxoniumComposition(_, _) => Cow::Borrowed("oxonium"), + Self::B { .. } | Self::BComposition(_, _) => Cow::Borrowed("B"), Self::Immonium(_, aa) => Cow::Owned(format!("i{}", aa.aminoacid.char())), Self::PrecursorSideChainLoss(_, aa) => Cow::Owned(format!("p-s{}", aa.char())), Self::Precursor => Cow::Borrowed("p"), @@ -521,9 +647,8 @@ impl FragmentType { Self::Diagnostic( DiagnosticPosition::Glycan(_, _) | DiagnosticPosition::GlycanCompositional(_, _), ) - | Self::B(_) - | Self::Oxonium(_) - | Self::OxoniumComposition(_, _) => FragmentKind::Oxonium, + | Self::B { .. } + | Self::BComposition(_, _) => FragmentKind::B, Self::Diagnostic(_) => FragmentKind::diagnostic, Self::Immonium(_, _) => FragmentKind::immonium, Self::PrecursorSideChainLoss(_, _) => FragmentKind::precursor_side_chain_loss, @@ -622,7 +747,7 @@ pub enum FragmentKind { /// glycan Y fragment, generated by one or more branches broken Y, /// B or glycan diagnostic ion or Internal glycan fragment, meaning both a B and Y breakages (and potentially multiple of both), resulting in a set of monosaccharides - Oxonium, + B, /// Immonium ion immonium, /// Precursor with amino acid side chain loss @@ -653,7 +778,7 @@ impl Display for FragmentKind { Self::w => "w", Self::z => "z", Self::Y => "Y", - Self::Oxonium => "oxonium", + Self::B => "oxonium", Self::immonium => "immonium", Self::precursor_side_chain_loss => "precursor side chain loss", Self::diagnostic => "diagnostic", diff --git a/rustyms/src/glycan/glycan_structure.rs b/rustyms/src/glycan/glycan_structure.rs index c6be1d90..d12ad603 100644 --- a/rustyms/src/glycan/glycan_structure.rs +++ b/rustyms/src/glycan/glycan_structure.rs @@ -5,7 +5,10 @@ use std::{fmt::Display, hash::Hash}; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use super::{glycan_parse_list, BaseSugar, MonoSaccharide, PositionedGlycanStructure}; +use super::{ + glycan_parse_list, BaseSugar, GlycanBranchIndex, GlycanBranchMassIndex, MonoSaccharide, + PositionedGlycanStructure, +}; use crate::{ error::{Context, CustomError}, formula::{Chemical, MolecularFormula}, @@ -115,30 +118,34 @@ impl GlycanStructure { fn internal_pos( self, inner_depth: usize, - branch: &[usize], + branch: &[(GlycanBranchIndex, GlycanBranchMassIndex)], ) -> (PositionedGlycanStructure, usize) { // Sort the branches on decreasing molecular weight - let mut branches = self.branches; - branches.sort_unstable_by(|a, b| { - b.formula() - .monoisotopic_mass() - .partial_cmp(&a.formula().monoisotopic_mass()) - .unwrap() - }); + let branches = self + .branches + .into_iter() + .enumerate() + .sorted_unstable_by(|(_, a), (_, b)| { + b.formula() + .monoisotopic_mass() + .partial_cmp(&a.formula().monoisotopic_mass()) + .unwrap() + }) + .collect_vec(); // Get the correct branch indices adding a new layer of indices when needed let branches: Vec<(PositionedGlycanStructure, usize)> = if branches.len() == 1 { branches .into_iter() - .map(|b| b.internal_pos(inner_depth + 1, branch)) + .map(|(_, b)| b.internal_pos(inner_depth + 1, branch)) .collect() } else { branches .into_iter() .enumerate() - .map(|(i, b)| { + .map(|(mass_index, (index, b))| { let mut new_branch = branch.to_vec(); - new_branch.push(i); + new_branch.push((index, mass_index)); b.internal_pos(inner_depth + 1, &new_branch) }) .collect() diff --git a/rustyms/src/glycan/mod.rs b/rustyms/src/glycan/mod.rs index 14f00d27..f4bfa520 100644 --- a/rustyms/src/glycan/mod.rs +++ b/rustyms/src/glycan/mod.rs @@ -3,7 +3,11 @@ mod glycan_structure; mod monosaccharide; mod positioned_structure; +#[cfg(feature = "glycan-render")] +mod render; pub use glycan_structure::*; pub use monosaccharide::*; pub use positioned_structure::*; +#[cfg(feature = "glycan-render")] +pub use render::{GlycanDirection, GlycanRoot, GlycanSelection, RenderedGlycan}; diff --git a/rustyms/src/glycan/monosaccharide.rs b/rustyms/src/glycan/monosaccharide.rs index c7c58e7d..2af1a818 100644 --- a/rustyms/src/glycan/monosaccharide.rs +++ b/rustyms/src/glycan/monosaccharide.rs @@ -73,7 +73,7 @@ impl MonoSaccharide { Charge::default(), peptidoform_ion_index, peptidoform_index, - FragmentType::OxoniumComposition(composition.clone(), attachment), + FragmentType::BComposition(composition.clone(), attachment), ) .with_charge_range(charge_carriers, model.glycan.oxonium_charge_range) .flat_map(|o| o.with_neutral_losses(&model.glycan.neutral_losses)), @@ -193,7 +193,7 @@ impl MonoSaccharide { base.with_neutral_loss(&NeutralLoss::Loss(molecular_formula!(C 2 H 6 O 3))), base.with_neutral_loss(&NeutralLoss::Loss(molecular_formula!(C 4 H 8 O 4))), ] - } else if matches!(self.base_sugar, BaseSugar::Nonose) + } else if matches!(self.base_sugar, BaseSugar::Nonose(_)) && (self.substituents == [ GlycanSubstituent::Amino, @@ -347,7 +347,9 @@ mod tests { ); assert_eq!( parse("D-Araf"), - MonoSaccharide::new(BaseSugar::Pentose(Some(PentoseIsomer::Arabinose)), &[]).furanose() + MonoSaccharide::new(BaseSugar::Pentose(Some(PentoseIsomer::Arabinose)), &[]) + .furanose() + .configuration(Configuration::D) ); assert_eq!( parse("Xyl-onic"), diff --git a/rustyms/src/glycan/positioned_structure.rs b/rustyms/src/glycan/positioned_structure.rs index 81741d0f..5db4b516 100644 --- a/rustyms/src/glycan/positioned_structure.rs +++ b/rustyms/src/glycan/positioned_structure.rs @@ -15,6 +15,11 @@ use crate::{ use crate::uom::num_traits::Zero; +/// The index in the branches as stored in the structure +pub type GlycanBranchIndex = usize; +/// The index in the branches when the branches are sorted on mass, this is used to properly render the names of the branches for human consumption +pub type GlycanBranchMassIndex = usize; + /// Rose tree representation of glycan structure #[derive(Debug, Eq, PartialEq, Clone, Hash, Serialize, Deserialize)] pub struct PositionedGlycanStructure { @@ -22,7 +27,15 @@ pub struct PositionedGlycanStructure { pub(super) branches: Vec, pub(super) inner_depth: usize, pub(super) outer_depth: usize, - pub(super) branch: Vec, + /// The branches taken to get to this location (from the root) as the index in the branches and the index in the branches when sorted by mass. + /// For a general glycan with a fucose on the first hexnac and a bisection after the core double + /// hexnac + hex, this variable will contain an empty list for the root hexnac. For the fucose + /// this variable will contain `[(0, 1)]` indicating it is the first branch in the structure but + /// the second branch if the branches are sorted by mass. For the monosaccharides in the left + /// bisection this variable will contain `[(1, 0), (0, 0)]`, indicating that it took the main + /// branch (and not the fucose) and that it took the left branch for the second bisection which + /// is heavier than the right branch. + pub(super) branch: Vec<(GlycanBranchIndex, GlycanBranchMassIndex)>, } impl Chemical for PositionedGlycanStructure { @@ -144,40 +157,36 @@ impl PositionedGlycanStructure { peptidoform_index: usize, attachment: Option<(AminoAcid, usize)>, ) -> Vec { - // Generate the basic single breakage B fragments - let mut base_fragments = vec![Fragment::new( - self.formula_inner(SequencePosition::default(), peptidoform_index), - Charge::zero(), - peptidoform_ion_index, - peptidoform_index, - FragmentType::B(self.position(attachment)), - )]; - // Extend with all internal fragments, meaning multiple breaking bonds - base_fragments.extend( - self.internal_break_points(peptidoform_index, attachment) - .into_iter() - .filter(|(_, breakages)| { - !breakages - .iter() - .all(|b| matches!(b, GlycanBreakPos::End(_))) - }) - .filter(|(m, _)| *m != MolecularFormula::default()) - .map(|(m, b)| { - ( - m, - [b, vec![GlycanBreakPos::B(self.position(attachment))]].concat(), - ) - }) - .map(|(formula, breakages)| { - Fragment::new( - formula, - Charge::zero(), - peptidoform_ion_index, - peptidoform_index, - FragmentType::Oxonium(breakages), - ) - }), - ); + // Find all B type fragments (with and without Y breakage) + let mut base_fragments = self + .internal_break_points(peptidoform_index, attachment) + .into_iter() + .filter(|(m, _)| *m != MolecularFormula::default()) + .map(|(formula, breakages)| { + Fragment::new( + formula, + Charge::zero(), + peptidoform_ion_index, + peptidoform_index, + FragmentType::B { + b: self.position(attachment), + y: breakages + .iter() + .filter(|b| matches!(b, GlycanBreakPos::Y(_))) + .map(GlycanBreakPos::position) + .cloned() + .collect(), + end: breakages + .iter() + .filter(|b| matches!(b, GlycanBreakPos::End(_))) + .map(GlycanBreakPos::position) + .cloned() + .collect(), + }, + ) + }) + .collect_vec(); + // Extend with the theoretical fragments for all branches of this position base_fragments.extend(self.branches.iter().flat_map(|b| { b.oxonium_fragments(peptidoform_ion_index, peptidoform_index, attachment) diff --git a/rustyms/src/glycan/render/absolute.rs b/rustyms/src/glycan/render/absolute.rs new file mode 100644 index 00000000..bbd85d70 --- /dev/null +++ b/rustyms/src/glycan/render/absolute.rs @@ -0,0 +1,460 @@ +use itertools::Itertools; + +use crate::{ + fragment::GlycanPosition, + glycan::{ + render::{ + element::GlycanRoot, + shape::{Colour, Shape}, + }, + GlycanBranchIndex, GlycanBranchMassIndex, GlycanStructure, RenderedGlycan, + }, + Chemical, +}; + +use super::element::GlycanSelection; + +impl GlycanStructure { + /// Render this glycan to the internal representation. This can then be rendered to SVG or a bitmap. + /// * `basis`: the text or symbol to draw at the root of the tree. + /// * `column_size`: the size (in pixels) of one block in the glycan, the full size with the padding and sugar size included. + /// * `sugar_size`: the size (in pixels) of a monosaccharide. + /// * `stroke_size`: the size (in pixels) of the strokes in the graphic. + /// * `direction`: the direction the draw the image in. + /// * `selection`: the selection of the glycan to draw, used to render fragments. + /// * `foreground`: the colour to be used for the foreground, in RGB order. + /// * `background`: the colour to be used for the background, in RGB order, this is used to fill 'empty' sugars if the isomeric state is unknown. + /// * `footnotes`: used to gather modification texts that are too big to place in line. The caller will have to find their own way of displaying this to the user. + /// + /// # Errors + /// If the underlying buffer errors the error is returned. Otherwise `Ok(false)` is returned if the given `root_break` is not valid, and `Ok(true)` is returned if the rendering was fully successful. + pub fn render<'a>( + &'a self, + basis: GlycanRoot, + column_size: f32, + sugar_size: f32, + stroke_size: f32, + direction: GlycanDirection, + selection: GlycanSelection<'a>, + foreground: [u8; 3], + background: [u8; 3], + footnotes: &'a mut Vec, + ) -> Option { + self.position_absolute(0, &[], footnotes).render( + basis, + column_size, + sugar_size, + stroke_size, + direction, + selection, + foreground, + background, + footnotes, + ) + } + + /// Build the rendered glycan. + fn position_absolute( + &self, + depth: usize, + path: &[(GlycanBranchIndex, GlycanBranchMassIndex)], + footnotes: &mut Vec, + ) -> AbsolutePositionedGlycan { + let (shape, colour, inner_modifications, outer_modifications) = self.sugar.get_shape(); + // Automatically make footnotes out of long outer modification texts + let outer_modifications = if outer_modifications.len() > 6 { + let index = footnotes.iter().position(|e| *e == outer_modifications); + index.map_or_else( + || { + let index = footnotes.len(); + footnotes.push(outer_modifications); + OuterModifications::Footnote(index) + }, + OuterModifications::Footnote, + ) + } else if !outer_modifications.is_empty() { + OuterModifications::Text(outer_modifications) + } else { + OuterModifications::Empty + }; + + if self.branches.is_empty() { + AbsolutePositionedGlycan { + y: 0, + x: 0.0, + mid_point: 0.5, + width: 1.0, + shape, + colour, + inner_modifications, + outer_modifications, + position: GlycanPosition { + inner_depth: depth, + series_number: depth, + branch: path.to_vec(), + attachment: None, + }, + title: self.sugar.to_string(), + branch_index: 0, + branches: Vec::new(), + sides: Vec::new(), + } + } else { + let mut y_depth = 0; + let mut branches = Vec::new(); + let mut sides = Vec::new(); + for (mass_index, (branch_index, branch)) in self + .branches + .iter() + .enumerate() + .sorted_unstable_by(|(_, a), (_, b)| { + b.formula() + .monoisotopic_mass() + .partial_cmp(&a.formula().monoisotopic_mass()) + .unwrap() + }) + .enumerate() + .sorted_unstable_by(|a, b| (a.1 .0.cmp(&b.1 .0))) + { + let mut new_path = path.to_vec(); + new_path.push((branch_index, mass_index)); + let mut rendered = branch.position_absolute( + depth + 1, + if self.branches.len() > 1 { + &new_path + } else { + path + }, + footnotes, + ); + rendered.branch_index = branch_index; + if rendered.is_sideways() && sides.len() < 2 { + if sides.is_empty() && rendered.shape == Shape::Triangle { + rendered.shape = Shape::LeftPointingTriangle; + } else if sides.len() == 1 && rendered.shape == Shape::Triangle { + rendered.shape = Shape::RightPointingTriangle; + } + sides.push(rendered); + } else { + y_depth = y_depth.max(rendered.y); + branches.push(rendered); + } + } + // Update all branch placements + let mut displacement = 0.0; + for branch in &mut branches { + branch.transpose(y_depth - branch.y, displacement); + displacement += branch.width; + } + if !branches.is_empty() { + y_depth += 1; + } + // Determine the center point for this sugar + let mut center = match branches.len() { + 0 => 0.5, + 1 => branches[0].mid_point, + n => { + // Find the median midpoint of the branches + (branches[n / 2 - (n + 1) % 2].x + + branches[n / 2 - (n + 1) % 2].mid_point + + branches[n / 2].x + + branches[n / 2].mid_point) + / 2.0 + } + }; + let mut width = branches.last().map_or(1.0, |b| b.x + b.width); + if !sides.is_empty() { + sides[0].transpose(y_depth, center + 0.5); + width = width.max(center + 0.5 + sides[0].width); + } + if sides.len() == 2 { + let mut x = center - 0.5 - sides[1].width; + if x < 0.0 { + let shift = -x; + center += shift; + for branch in &mut branches { + branch.transpose(0, shift); + } + sides[0].transpose(0, shift); + width += shift; + x = 0.0; + } + sides[1].transpose(y_depth, x); + } + AbsolutePositionedGlycan { + y: y_depth, + x: 0.0, + mid_point: center, + width, + shape, + colour, + inner_modifications, + outer_modifications, + position: GlycanPosition { + inner_depth: depth, + series_number: depth, + branch: path.to_vec(), + attachment: None, + }, + title: self.sugar.to_string(), + branch_index: 0, + branches, + sides, + } + } + } +} + +/// An absolute positioned glycan. +#[derive(Debug, Clone)] +pub(super) struct AbsolutePositionedGlycan { + /// The depth of this sugar along the main axis of the glycan, starting at 0 at the top (in the leaves) + pub(super) y: usize, + /// The sideways placement of this whole tree starting at 0 at the leftmost monosaccharide, 1.0 is the width of one monosaccharide + pub(super) x: f32, + /// The sideways placement of this sugar within this tree, for the absolute sideways placement of this sugar add this to `x` + pub(super) mid_point: f32, + /// The total width of the (sub)tree with all of its branches and sides + pub(super) width: f32, + /// The shape of the monosaccharide + pub(super) shape: Shape, + /// The colour of the monosaccharide + pub(super) colour: Colour, + /// Text to be shown inside the monosaccharide + pub(super) inner_modifications: String, + /// Text to be shown outside the monosaccharide + pub(super) outer_modifications: OuterModifications, + /// The position of this sugar + pub(super) position: GlycanPosition, + /// Full name of the glycan + pub(super) title: String, + /// The index into the branches of the parent monosaccharide + pub(super) branch_index: usize, + /// All branches that go up the tree + pub(super) branches: Vec, + /// All branches that go to the side (Fucoses) + pub(super) sides: Vec, +} + +#[derive(Debug, Clone)] +/// Modifications that are to be shown outside of the monosaccharide +pub(super) enum OuterModifications { + /// Too long of a text, or it did not fit, so show as a footnote + Footnote(usize), + /// Text + Text(String), + /// No modification + Empty, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +/// The direction of the rendered glycan +pub enum GlycanDirection { + /// A top down tree, with the root at the bottom + TopDown, + /// A left to right tree, with the root at the right + LeftToRight, +} + +/// A subtree of a rendered glycan, used to restrict the canvas for glycan fragments +#[derive(Debug, Clone)] +pub(super) struct SubTree<'a> { + /// The root for this sub tree + pub(super) tree: &'a AbsolutePositionedGlycan, + /// Total depth of the glycans with the breaks applied + pub(super) depth: usize, + /// The horizontal offset from the left + pub(super) left_offset: f32, + /// The horizontal offset from the right + pub(super) right_offset: f32, + /// If this fragment is topped by a breaking symbol, needed to calculate the correct height for the canvas + pub(super) break_top: bool, + /// If this fragment is bottomed by a breaking symbol, needed to calculate the correct height for the canvas + pub(super) break_bottom: bool, + /// All breaking branches, standardised to the linked root + pub(super) branch_breaks: Vec<(usize, Vec<(GlycanBranchIndex, GlycanBranchMassIndex)>)>, +} + +impl AbsolutePositionedGlycan { + /// Transpose this glycan and all of its branches + fn transpose(&mut self, y: usize, x: f32) { + self.y += y; + self.x += x; + for branch in &mut self.branches { + branch.transpose(y, x); + } + for side in &mut self.sides { + side.transpose(y, x); + } + } + + /// Check if this sugar should be rendered to the side of the parent sugar + fn is_sideways(&self) -> bool { + self.colour == Colour::Red + && self.shape == Shape::Triangle + && self.branches.is_empty() + && self.sides.is_empty() + } + + /// Get the subtree starting on the given position, return None if the starting position is not valid, it also indicates the depth of this subtree for the given branch breakages and if a break tops the structure + pub(super) fn get_subtree<'a>(&'a self, selection: GlycanSelection<'a>) -> Option> { + /// Calculate the maximal depth, break top, left and right offset + fn canvas_size( + tree: &AbsolutePositionedGlycan, + breakages: &[(usize, Vec<(GlycanBranchIndex, GlycanBranchMassIndex)>)], + ) -> (usize, bool, f32, f32) { + let lx = (tree.x + tree.mid_point - 0.5).max(0.0); + let rx = (tree.width - tree.mid_point - 0.5).max(0.0); + // The tree is cut here + if breakages.iter().any(|b| b.0 == 0) { + return (0, true, lx, rx); + }; + + let total_branches = tree.branches.len() + tree.sides.len(); + let (depth, break_top, left_offset, right_offset) = match total_branches { + 0 => (0, false, lx, rx), + 1 => tree.branches.first().map_or((0, false, lx, rx), |branch| { + canvas_size( + branch, + &breakages + .iter() + .map(|b| (b.0 - 1, b.1.clone())) + .collect_vec(), + ) + }), + _ => tree + .branches + .iter() + .enumerate() + .map(|(i, branch)| { + ( + i, + canvas_size( + branch, + &breakages + .iter() + .filter(|b| { + b.1.first().map(|b| b.0) == Some(branch.branch_index) + }) + .map(|b| (b.0 - 1, b.1[1..].to_vec())) + .collect_vec(), + ), + ) + }) + .fold((0, false, lx, rx), |acc, (i, v)| { + ( + acc.0.max(v.0), + if v.0 >= acc.0 { v.1 } else { acc.1 }, + if i == 0 { v.2 } else { acc.2 }, + if i == tree.branches.len() - 1 { + v.3 + } else { + acc.3 + }, + ) + }), + }; + ( + depth + 1, + break_top, + if tree.sides.len() == 2 { + left_offset.min(tree.x + tree.mid_point - 1.5).max(0.0) + } else { + left_offset + }, + if tree.sides.is_empty() { + right_offset + } else { + right_offset.min(tree.width - tree.mid_point - 1.5).max(0.0) + }, + ) + } + + let (tree, rules, break_bottom) = match selection { + GlycanSelection::Subtree(root, branch_breaks) => { + let start = root.unwrap_or(&self.position); + let mut tree = self; + let mut depth = 0; + let mut branch_choices = start.branch.clone(); + branch_choices.reverse(); + while depth < start.inner_depth { + depth += 1; + + let total_branches = tree.branches.len() + tree.sides.len(); + match total_branches { + 0 => return None, + 1 => tree = tree.branches.first().or_else(|| tree.sides.first())?, + _ => { + let index = branch_choices.pop()?; + tree = tree + .branches + .iter() + .chain(tree.sides.iter()) + .find(|b| b.branch_index == index.0)?; + } + } + } + + let rules = branch_breaks + .iter() + .filter(|b| { + b.inner_depth >= start.inner_depth && b.branch.starts_with(&start.branch) + }) + .map(|b| { + ( + b.inner_depth - start.inner_depth, + b.branch[start.branch.len()..].to_vec(), + ) + }) + .collect_vec(); + (tree, rules, root.is_some()) + } + GlycanSelection::SingleSugar(position) => { + let mut tree = self; + let mut depth = 0; + let mut branch_choices = position.branch.clone(); + branch_choices.reverse(); + while depth < position.inner_depth { + depth += 1; + + let total_branches = tree.branches.len() + tree.sides.len(); + match total_branches { + 0 => return None, + 1 => tree = tree.branches.first().or_else(|| tree.sides.first())?, + _ => { + let index = branch_choices.pop()?; + tree = tree + .branches + .iter() + .find(|b| b.branch_index == index.0) + .or_else(|| { + tree.sides.iter().find(|b| b.branch_index == index.0) + })?; + } + } + } + + let rules = tree + .branches + .iter() + .chain(tree.sides.iter()) + .map(|b| { + (1, vec![(b.branch_index, b.branch_index)]) + // TODO: the mass_index should be stored here, but currently that is unused so for now this does not introduce incorrect behaviour + }) + .collect_vec(); + (tree, rules, true) + } + }; + let (depth, break_top, left_offset, right_offset) = canvas_size(tree, &rules); + Some(SubTree { + tree, + depth, + left_offset, + right_offset, + break_top, + break_bottom, + branch_breaks: rules, + }) + } +} diff --git a/rustyms/src/glycan/render/bitmap.rs b/rustyms/src/glycan/render/bitmap.rs new file mode 100644 index 00000000..465bb47b --- /dev/null +++ b/rustyms/src/glycan/render/bitmap.rs @@ -0,0 +1,387 @@ +use itertools::Itertools; +use swash::{ + scale::{Render, ScaleContext, Source}, + FontRef, +}; +use zeno::{Fill, Format, Mask, PathBuilder, Point, Scratch, Stroke, Vector}; + +use crate::glycan::{render::element::Element, RenderedGlycan}; + +use super::element::{TextAnchor, TextBaseline}; + +impl RenderedGlycan { + /// Render this glycan as an RGBA bitmap. + /// * `format`: the used strategy for antialiasing. + /// * `font`: the font for rendering text. + /// * `context`: the context for caching rendering text. + /// # Panics + /// If the glyph renderer failed. See [`swash::scale::Render::render`]. + pub fn to_bitmap( + &self, + format: Format, + font: FontRef, + context: &mut ScaleContext, + ) -> (Vec, usize) { + let mask_factor = if format == Format::Alpha { 1 } else { 4 }; + let image_width = self.size.0.ceil() as usize; + let mut image = std::iter::repeat([ + self.background[0], + self.background[1], + self.background[2], + 0, + ]) + .take(image_width * self.size.1.ceil() as usize) + .flatten() + .collect_vec(); + + let mut scratch = Scratch::new(); + let mut stroke_mask = Vec::new(); + let mut fill_mask = Vec::new(); + for element in &self.elements { + // Draw into the mask + let (x, y, mask_width, fill, stroke) = match element { + Element::Line { + from, + to, + stroke, + stroke_size, + } => { + let xmin = (from.0.min(to.0) - stroke_size).floor(); + let xmax = (from.0.max(to.0) + stroke_size).ceil(); + let ymin = (from.1.min(to.1) - stroke_size).floor(); + let ymax = (from.1.max(to.1) + stroke_size).ceil(); + let width = (xmax - xmin) as usize; + let height = (ymax - ymin) as usize; + let commands = vec![ + zeno::Command::MoveTo(Vector::new( + from.0 - xmin + stroke_size / 2.0, + from.1 - ymin + stroke_size / 2.0, + )), + zeno::Command::LineTo(Vector::new( + to.0 - xmin + stroke_size / 2.0, + to.1 - ymin + stroke_size / 2.0, + )), + zeno::Command::Close, + ]; + stroke_mask.fill(0); + stroke_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Stroke::new(*stroke_size)) + .size(width as u32, height as u32) + .render_into(&mut stroke_mask, None); + (xmin as usize, ymin as usize, width, None, Some(*stroke)) + } + Element::Circle { + r, + center, + fill, + stroke, + stroke_size, + svg_header: _, + } => { + let width = (center.0.fract() + r * 2.0 + stroke_size).ceil() as usize; + let height = (center.1.fract() + r * 2.0 + stroke_size).ceil() as usize; + let mut commands = Vec::new(); + commands.add_circle( + ( + center.0.fract() + r + stroke_size / 2.0, + center.1.fract() + r + stroke_size / 2.0, + ), + *r, + ); + if fill.is_some() { + fill_mask.fill(0); + fill_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Fill::NonZero) + .size(width as u32, height as u32) + .render_into(&mut fill_mask, None); + } + stroke_mask.fill(0); + stroke_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Stroke::new(*stroke_size)) + .size(width as u32, height as u32) + .render_into(&mut stroke_mask, None); + ( + (center.0 - r) as usize, + (center.1 - r) as usize, + width, + *fill, + Some(*stroke), + ) + } + Element::Rectangle { + top, + w, + h, + fill, + stroke, + stroke_size, + svg_header: _, + } => { + let width = (top.0.fract() + w + stroke_size).ceil() as usize; + let height = (top.1.fract() + h + stroke_size).ceil() as usize; + let mut commands = Vec::new(); + commands.add_rect( + ( + top.0.fract() + stroke_size / 2.0, + top.1.fract() + stroke_size / 2.0, + ), + *w, + *h, + ); + fill_mask.fill(0); + fill_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Fill::NonZero) + .size(width as u32, height as u32) + .render_into(&mut fill_mask, None); + stroke_mask.fill(0); + stroke_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Stroke::new(*stroke_size)) + .size(width as u32, height as u32) + .render_into(&mut stroke_mask, None); + ( + (top.0 - stroke_size / 2.0) as usize, + (top.1 - stroke_size / 2.0) as usize, + width, + Some(*fill), + Some(*stroke), + ) + } + Element::Polygon { + points, + fill, + stroke, + stroke_size, + svg_header: _, + bevel, + } => { + let (xmin, xmax, ymin, ymax) = points + .iter() + .fold((f32::MAX, f32::MIN, f32::MAX, f32::MIN), |acc, (x, y)| { + (acc.0.min(*x), acc.1.max(*x), acc.2.min(*y), acc.3.max(*y)) + }); + let xmin = (xmin - stroke_size).floor(); + let xmax = (xmax + stroke_size).ceil(); + let ymin = (ymin - stroke_size).floor(); + let ymax = (ymax + stroke_size).ceil(); + let width = (xmax - xmin) as usize; + let height = (ymax - ymin) as usize; + let mut commands = Vec::with_capacity(points.len() + 2); + commands.push(zeno::Command::MoveTo(Point::new( + points[0].0 - xmin + stroke_size / 2.0, + points[0].1 - ymin + stroke_size / 2.0, + ))); + for point in points { + commands.push(zeno::Command::LineTo(Point::new( + point.0 - xmin + stroke_size / 2.0, + point.1 - ymin + stroke_size / 2.0, + ))); + } + commands.push(zeno::Command::Close); + fill_mask.fill(0); + fill_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Fill::NonZero) + .size(width as u32, height as u32) + .render_into(&mut fill_mask, None); + stroke_mask.fill(0); + stroke_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Stroke::new(*stroke_size).join(if *bevel { + zeno::Join::Bevel + } else { + zeno::Join::Miter + })) + .size(width as u32, height as u32) + .render_into(&mut stroke_mask, None); + ( + xmin as usize, + ymin as usize, + width, + Some(*fill), + Some(*stroke), + ) + } + Element::Text { + text, + position, + anchor, + baseline, + fill, + size, + italic: _, // Needs a separate font + } => { + let mut scaler = context.builder(font).size(*size).hint(true).build(); + let metrics = font.metrics(&[]); + let normalisation_factor = size / f32::from(metrics.units_per_em); + let y_offset = (match baseline { + TextBaseline::Hanging => metrics.ascent, + TextBaseline::Middle => metrics.ascent - metrics.x_height / 2.0, + TextBaseline::Ideographic => metrics.ascent + metrics.descent, + }) + .mul_add(-normalisation_factor, position.1); + let mut width = 0.0; + for c in text.chars() { + let id = font.charmap().map(c); + width += font.glyph_metrics(&[]).advance_width(id); + } + + let x_offset = (match anchor { + TextAnchor::Start => 0.0, + TextAnchor::Middle => width / 2.0, + TextAnchor::End => width, + }) + .mul_add(-normalisation_factor, position.0); + + let mut offset = 0.0; + for c in text.chars() { + let id = font.charmap().map(c); + let glyph_metrics = font.glyph_metrics(&[]); + let mask = Render::new(&[Source::Outline]) + .format(format) + .offset(Vector::new( + (x_offset + offset).fract(), + y_offset.fract() - 1.0, + )) + .render(&mut scaler, id) + .unwrap(); + draw_mask( + (&mut image, image_width), + (&mask.data, mask.placement.width as usize), + (x_offset + offset + mask.placement.left as f32) as usize, + (y_offset + mask.placement.top as f32) as usize, + *fill, + format, + ); + + offset += glyph_metrics.advance_width(id) * normalisation_factor; + } + (0, 0, 0, None, None) + } + Element::Curve { + start, + points, + stroke, + stroke_size, + } => { + let (xmin, xmax, ymin, ymax) = points.iter().fold( + (f32::MAX, f32::MIN, f32::MAX, f32::MIN), + |acc, (a, b, x, y)| { + ( + acc.0.min(*x).min(*a), + acc.1.max(*x).max(*a), + acc.2.min(*y).min(*b), + acc.3.max(*y).max(*b), + ) + }, + ); + let xmin = (xmin - stroke_size).floor(); + let xmax = (xmax + stroke_size).ceil(); + let ymin = (ymin - stroke_size).floor(); + let ymax = (ymax + stroke_size).ceil(); + let width = (xmax - xmin) as usize; + let height = (ymax - ymin) as usize; + let mut commands = Vec::with_capacity(points.len() + 1); + commands.push(zeno::Command::MoveTo(Point::new( + start.0 - xmin + stroke_size / 2.0, + start.1 - ymin + stroke_size / 2.0, + ))); + for point in points { + commands.push(zeno::Command::QuadTo( + Point::new( + point.0 - xmin + stroke_size / 2.0, + point.1 - ymin + stroke_size / 2.0, + ), + Point::new( + point.2 - xmin + stroke_size / 2.0, + point.3 - ymin + stroke_size / 2.0, + ), + )); + } + stroke_mask.fill(0); + stroke_mask.resize(height * width * mask_factor, 0); + Mask::with_scratch(&commands, &mut scratch) + .format(format) + .style(Stroke::new(*stroke_size)) + .size(width as u32, height as u32) + .render_into(&mut stroke_mask, None); + (xmin as usize, ymin as usize, width, None, Some(*stroke)) + } + }; + if let Some(fill) = fill { + draw_mask( + (&mut image, image_width), + (&fill_mask, mask_width), + x, + y, + fill, + format, + ); + } + if let Some(stroke) = stroke { + draw_mask( + (&mut image, image_width), + (&stroke_mask, mask_width), + x, + y, + stroke, + format, + ); + } + } + (image, image_width) + } +} + +/// Draw the specified mask onto the specified image +#[allow(clippy::identity_op, clippy::needless_pass_by_value)] // I like the + 0 in position calculations for symmetry reasons and image tuple looks makes no sense to pass by reference +fn draw_mask( + image: (&mut [u8], usize), + mask: (&[u8], usize), + x: usize, + y: usize, + colour: [u8; 3], + format: Format, +) { + let mask_factor = if format == Format::Alpha { 1 } else { 4 }; + let mask_height = mask.0.len() / mask_factor / mask.1; + for r in 0..mask_height { + for w in 0..mask.1 { + let image_pos = ((r + y) * image.1 + (w + x)) * 4; + let mask_pos = (r * mask.1 + w) * mask_factor; + + if image_pos >= image.0.len() || mask_pos >= mask.0.len() { + continue; + } + + if format == Format::Alpha { + image.0[image_pos + 0] = blend(mask.0[mask_pos], colour[0], image.0[image_pos + 0]); + image.0[image_pos + 1] = blend(mask.0[mask_pos], colour[1], image.0[image_pos + 1]); + image.0[image_pos + 2] = blend(mask.0[mask_pos], colour[2], image.0[image_pos + 2]); + } else { + image.0[image_pos + 0] = + blend(mask.0[mask_pos + 0], colour[0], image.0[image_pos + 0]); + image.0[image_pos + 1] = + blend(mask.0[mask_pos + 1], colour[1], image.0[image_pos + 1]); + image.0[image_pos + 2] = + blend(mask.0[mask_pos + 2], colour[2], image.0[image_pos + 2]); + } + image.0[image_pos + 3] = 255; + } + } +} + +const fn blend(alpha: u8, foreground: u8, background: u8) -> u8 { + (((alpha as u16 * foreground as u16) + (255 - alpha) as u16 * background as u16) / 255) as u8 +} diff --git a/rustyms/src/glycan/render/element.rs b/rustyms/src/glycan/render/element.rs new file mode 100644 index 00000000..19b60732 --- /dev/null +++ b/rustyms/src/glycan/render/element.rs @@ -0,0 +1,1162 @@ +use std::f32::consts::PI; + +use itertools::Itertools; + +use crate::{ + fragment::GlycanPosition, + glycan::{ + render::{ + absolute::{AbsolutePositionedGlycan, OuterModifications}, + shape::{Colour, Shape}, + }, + GlycanBranchIndex, GlycanBranchMassIndex, GlycanDirection, + }, +}; + +/// A rendered glycan, contains all information needed to render this to svg or a bitmap. +pub struct RenderedGlycan { + /// The size of the canvas + pub(super) size: (f32, f32), + /// All elements to be rendered + pub(super) elements: Vec, + /// The background colour + pub(super) background: [u8; 3], + /// Midpoint in pixels from the right for a top down glycan or in pixels from the top for a left to right glycan + pub midpoint: f32, +} + +#[derive(Debug, Clone)] +pub(super) enum Element { + Line { + from: (f32, f32), + to: (f32, f32), + stroke: [u8; 3], + stroke_size: f32, + }, + Circle { + r: f32, + center: (f32, f32), + fill: Option<[u8; 3]>, + stroke: [u8; 3], + stroke_size: f32, + svg_header: String, + }, + Rectangle { + top: (f32, f32), + w: f32, + h: f32, + fill: [u8; 3], + stroke: [u8; 3], + stroke_size: f32, + svg_header: String, + }, + Polygon { + points: Vec<(f32, f32)>, + fill: [u8; 3], + stroke: [u8; 3], + stroke_size: f32, + svg_header: String, + bevel: bool, + }, + Curve { + start: (f32, f32), + points: Vec<(f32, f32, f32, f32)>, + stroke: [u8; 3], + stroke_size: f32, + }, + Text { + text: String, + position: (f32, f32), + anchor: TextAnchor, + baseline: TextBaseline, + fill: [u8; 3], + size: f32, + italic: bool, + }, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub(super) enum TextAnchor { + Start, + Middle, + End, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub(super) enum TextBaseline { + Hanging, + Middle, + Ideographic, +} + +/// The symbol or text to use at the base of a glycan. +/// +#[doc = include_str!("../../../images/glycan_root.svg")] +/// +/// _Glycan [G01670UQ](http://glytoucan.org/Structures/Glycans/G01670UQ) using the different root types: None, Line, Symbol, Text("pep"), Text("N"), Text("Arg")_ +/// +/// ```rust +/// # use rustyms::glycan::{GlycanStructure, GlycanDirection, GlycanRoot, GlycanSelection}; +/// const COLUMN_SIZE: f32 = 30.0; +/// const SUGAR_SIZE: f32 = 15.0; +/// const STROKE_SIZE: f32 = 1.5; +/// let mut output = String::new(); +/// let mut footnotes = Vec::new(); +/// let short_iupac = "Neu5Ac(a2-6)Gal(b1-4)GlcNAc(b1-2)Man(a1-3)[Gal(b1-4)GlcNAc(b1-2)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc(?1-"; // Definition for G01670UQ +/// let structure = GlycanStructure::from_short_iupac(short_iupac, 0..short_iupac.len(), 0).unwrap(); +/// for root in [ +/// GlycanRoot::None, +/// GlycanRoot::Line, +/// GlycanRoot::Symbol, +/// GlycanRoot::Text("pep".to_string()), +/// GlycanRoot::Text("N".to_string()), +/// GlycanRoot::Text("Arg".to_string()), +/// ] { +/// let rendered = structure +/// .render( +/// root, +/// COLUMN_SIZE, +/// SUGAR_SIZE, +/// STROKE_SIZE, +/// GlycanDirection::TopDown, +/// GlycanSelection::FULL, +/// [0, 0, 0], +/// [255, 255, 255], +/// &mut footnotes, +/// ) +/// .unwrap(); +/// rendered.to_svg(&mut output).unwrap(); +/// } +/// ``` +/// This examples shows how to generate SVGs for all the different root types as seen in the above picture. +/// Note that this writes all SVGs after each other to the variable `output`. Also note that this writes +/// all modifications that did not fit inside the image in the variable `footnotes` and this will need to +/// be dealt with by the caller, as indicated in [`GlycanStructure::render`](crate::glycan::GlycanStructure::render). +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub enum GlycanRoot { + /// No symbol, this will also not draw a line from the root sugar + #[default] + None, + /// No symbol, but this will draw a line from the root sugar + Line, + /// A tilde ('~') like symbol to indicate the full peptidoform + Symbol, + /// A piece of text, take care to not make this too big as it will be cut off in the image. + /// Commonly used options are 'pep' to indicate the full peptidoform, or to indicate the + /// attached amino acid any of 'Arg', or 'N'. + Text(String), +} + +/// The selected (part) of a glycan to render, using [`Self::FULL`] is a shortcut to get the full glycan. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum GlycanSelection<'a> { + /// A subtree of the glycan, with potentially a break of the root of the subtree and breaks in the branches. + /// If no breaks are specified the full glycan is shown. The root is the first monosaccharide to be included + /// in the rendering. The fragment will not include the indicated glycan positions for the branch breaks. + Subtree(Option<&'a GlycanPosition>, &'a [GlycanPosition]), + /// A single sugar, all it branches will be shown as broken. + SingleSugar(&'a GlycanPosition), +} + +impl GlycanSelection<'static> { + /// A shorthand for a full glycan. + pub const FULL: Self = Self::Subtree(None, &[]); +} + +impl AbsolutePositionedGlycan { + /// Render this glycan to the internal rendering representation, returns None if the root break contains an invalid position. + pub(super) fn render<'a>( + &'a self, + basis: GlycanRoot, + column_size: f32, + sugar_size: f32, + stroke_size: f32, + direction: GlycanDirection, + selection: GlycanSelection<'a>, + foreground: [u8; 3], + background: [u8; 3], + footnotes: &'a mut Vec, + ) -> Option { + fn render_element( + buffer: &mut Vec, + element: &AbsolutePositionedGlycan, + column_size: f32, + sugar_size: f32, + stroke_size: f32, + direction: GlycanDirection, + x_offset: f32, + y_offset: f32, + breaks: &[(usize, Vec<(GlycanBranchIndex, GlycanBranchMassIndex)>)], + foreground: [u8; 3], + background: [u8; 3], + incoming_stroke: (f32, f32, f32, f32), + footnotes: &mut Vec, + ) { + let raw_x = element.x - x_offset; + let raw_y = element.y as f32 - y_offset; + + let total_branches = element.branches.len() + element.sides.len(); + let mut strokes = vec![incoming_stroke]; + // First all lines to get good stacking behaviour + for (side, branch) in element + .branches + .iter() + .map(|b| (false, b)) + .chain(element.sides.iter().map(|b| (true, b))) + { + let origin_x = (raw_x + element.mid_point) * column_size; + let origin_y = (raw_y + 0.5) * column_size; + let base_x = (branch.x + branch.mid_point - x_offset) * column_size; + if (total_branches == 1 && breaks.iter().any(|b| b.0 == 1)) + || breaks + .iter() + .any(|b| b.0 == 1 && b.1.first().map(|b| b.0) == Some(branch.branch_index)) + { + let base_y = + (raw_y - 0.5 + f32::from(side)).mul_add(column_size, stroke_size * 0.5); + let angle = f32::atan2(base_y - origin_y, base_x - origin_x); + buffer.push(Element::Line { + from: pick_point((origin_x, origin_y), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + let x1 = (sugar_size / 2.0).mul_add(0.5f32.mul_add(PI, -angle).cos(), base_x); + let y1 = (sugar_size / 2.0).mul_add(-0.5f32.mul_add(PI, -angle).sin(), base_y); + let x2 = (sugar_size / 2.0).mul_add(-0.5f32.mul_add(PI, -angle).cos(), base_x); + let y2 = (sugar_size / 2.0).mul_add(0.5f32.mul_add(PI, -angle).sin(), base_y); + buffer.push(Element::Line { + from: pick_point((x1, y1), direction), + to: pick_point((x2, y2), direction), + stroke: foreground, + stroke_size, + }); + let x3 = (stroke_size * 2.0).mul_add(-angle.cos(), x1); + let y3 = (stroke_size * 2.0).mul_add(-angle.sin(), y1); + buffer.push(Element::Line { + from: pick_point((x1, y1), direction), + to: pick_point((x3, y3), direction), + stroke: foreground, + stroke_size, + }); + let offset = 0.25f32.mul_add(column_size, stroke_size); + let r = 0.25f32 + .mul_add(column_size, -stroke_size) + .min(sugar_size * 0.25); + let adjusted_x = offset.mul_add(-angle.cos(), base_x); + let adjusted_y = offset.mul_add(-angle.sin(), base_y); + buffer.push(Element::Circle { + r, + center: pick_point((adjusted_x, adjusted_y), direction), + fill: None, + stroke: foreground, + stroke_size, + svg_header: String::new(), + }); + strokes.push(pick_box( + ( + origin_x.min(base_x), + base_y.min(origin_y), + origin_x.max(base_x), + base_y.max(origin_y), + ), + direction, + )); + } else { + let base_y = ((branch.y as f32) - y_offset + 0.5) * column_size; + buffer.push(Element::Line { + from: pick_point((origin_x, origin_y), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + strokes.push(pick_box( + ( + origin_x.min(base_x), + base_y.min(origin_y), + origin_x.max(base_x), + base_y.max(origin_y), + ), + direction, + )); + } + } + + // Render the sugar + let fill = if element.colour == Colour::Background { + background + } else { + element.colour.rgb() + }; + let title = format!( + " data-sugar=\"{}\" data-position=\"{}-{}\"", + element.title, + element.position.inner_depth, + element.position.branch.iter().map(|b| b.0).join(",") + ); + match element.shape { + Shape::Circle => buffer.push(Element::Circle { + r: sugar_size / 2.0, + center: pick_point( + ( + (raw_x + element.mid_point) * column_size, + (raw_y + 0.5) * column_size, + ), + direction, + ), + fill: Some(fill), + stroke: foreground, + stroke_size, + svg_header: title, + }), + Shape::Square => buffer.push(Element::Rectangle { + top: pick_point( + ( + (raw_x + element.mid_point).mul_add(column_size, -sugar_size / 2.0), + (raw_y + 0.5).mul_add(column_size, -sugar_size / 2.0), + ), + direction, + ), + w: sugar_size, + h: sugar_size, + fill, + stroke: foreground, + stroke_size, + svg_header: title, + }), + Shape::Rectangle => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + buffer.push(Element::Rectangle { + top: (base_x - sugar_size / 2.0, base_y - sugar_size / 4.0), + w: sugar_size, + h: sugar_size / 2.0, + fill, + stroke: foreground, + stroke_size, + svg_header: title, + }); + } + Shape::Triangle => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let x2 = x1 + sugar_size / 2.0; + let x3 = x1 + sugar_size; + let y1 = base_y - sugar_size / 2.0; + let y2 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![(x1, y2), (x2, y1), (x3, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::LeftPointingTriangle => { + let x1 = (raw_x + element.mid_point).mul_add(column_size, -sugar_size / 2.0); + let x2 = x1 + sugar_size; + let y1 = (raw_y + 0.5).mul_add(column_size, -sugar_size / 2.0); + let y2 = y1 + sugar_size / 2.0; + let y3 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![ + pick_point((x1, y2), direction), + pick_point((x2, y1), direction), + pick_point((x2, y3), direction), + ], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::RightPointingTriangle => { + let x1 = (raw_x + element.mid_point).mul_add(column_size, -sugar_size / 2.0); + let x2 = x1 + sugar_size; + let y1 = (raw_y + 0.5).mul_add(column_size, -sugar_size / 2.0); + let y2 = y1 + sugar_size / 2.0; + let y3 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![ + pick_point((x1, y1), direction), + pick_point((x2, y2), direction), + pick_point((x1, y3), direction), + ], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::Diamond => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let x2 = x1 + sugar_size / 2.0; + let x3 = x1 + sugar_size; + let y1 = base_y - sugar_size / 2.0; + let y2 = y1 + sugar_size / 2.0; + let y3 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![(x2, y1), (x3, y2), (x2, y3), (x1, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::FlatDiamond => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let x2 = x1 + sugar_size / 2.0; + let x3 = x1 + sugar_size; + let y1 = base_y - sugar_size / 4.0; + let y2 = y1 + sugar_size / 4.0; + let y3 = y1 + sugar_size / 2.0; + + buffer.push(Element::Polygon { + points: vec![(x2, y1), (x3, y2), (x2, y3), (x1, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::Hexagon => { + let a = sugar_size / 2.0 / 3.0_f32.sqrt(); + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let x2 = x1 + a; + let x3 = x1 + sugar_size - a; + let x4 = x1 + sugar_size; + let y1 = base_y - sugar_size / 4.0; + let y2 = y1 + sugar_size / 4.0; + let y3 = y1 + sugar_size / 2.0; + + buffer.push(Element::Polygon { + points: vec![(x1, y2), (x2, y1), (x3, y1), (x4, y2), (x3, y3), (x2, y3)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::Pentagon => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let a = (18.0 / 360.0 * 2.0 * PI).cos() * sugar_size / 2.0; + let b = (18.0 / 360.0 * 2.0 * PI).sin() * sugar_size / 2.0; + let c = (36.0 / 360.0 * 2.0 * PI).cos() * sugar_size / 2.0; + let d = (36.0 / 360.0 * 2.0 * PI).sin() * sugar_size / 2.0; + let x1 = base_x - a; + let x2 = base_x - d; + let x3 = base_x; + let x4 = base_x + d; + let x5 = base_x + a; + let y1 = base_y - sugar_size / 2.0; + let y2 = y1 + sugar_size / 2.0 - b; + let y3 = y1 + sugar_size / 2.0 + c; + + buffer.push(Element::Polygon { + points: vec![(x1, y2), (x3, y1), (x5, y2), (x4, y3), (x2, y3)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::Star => { + // The Phi constant, the ratio for the "golden ratio" + const PHI: f32 = 1.618_034_f32; + // Calculate sizes of parts of the pentagram + let a = (18.0 / 360.0 * 2.0 * PI).cos() * sugar_size / 2.0; + let b = (18.0 / 360.0 * 2.0 * PI).sin() * sugar_size / 2.0; + let c = (36.0 / 360.0 * 2.0 * PI).cos() * sugar_size / 2.0; + let d = (36.0 / 360.0 * 2.0 * PI).sin() * sugar_size / 2.0; + let e = 2.0 * a / (54.0 / 360.0 * 2.0 * PI).sin() / (1.0 + 1.0 / PHI); + let f = (18.0 / 360.0 * 2.0 * PI) + .cos() + .mul_add(e, -(sugar_size / 2.0)); + let g = (18.0 / 360.0 * 2.0 * PI).sin() * e; + let h = (sugar_size / 2.0 - b) * (18.0 / 360.0 * 2.0 * PI).tan(); + let j = (18.0 / 360.0 * 2.0 * PI).tan() * g; + // Calculate the positions of the pentagram points + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - a; + let x2 = base_x - d; + let x3 = base_x - g; + let x4 = base_x - h; + let x5 = base_x; + let x6 = base_x + h; + let x7 = base_x + g; + let x8 = base_x + d; + let x9 = base_x + a; + let y1 = base_y - sugar_size / 2.0; + let y2 = y1 + sugar_size / 2.0 - b; + let y3 = y1 + sugar_size / 2.0 + j; + let y4 = y1 + sugar_size / 2.0 + f; + let y5 = y1 + sugar_size / 2.0 + c; + + buffer.push(Element::Polygon { + points: vec![ + (x1, y2), + (x4, y2), + (x5, y1), + (x6, y2), + (x9, y2), + (x7, y3), + (x8, y5), + (x5, y4), + (x2, y5), + (x3, y3), + ], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::CrossedSquare => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let y1 = base_y - sugar_size / 2.0; + let x2 = x1 + sugar_size; + let y2 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![(x1, y1), (x2, y1), (x2, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: String::new(), + bevel: true, + }); + buffer.push(Element::Polygon { + points: vec![(x1, y1), (x1, y2), (x2, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: String::new(), + bevel: true, + }); + buffer.push(Element::Polygon { + points: vec![(x1, y1), (x2, y1), (x2, y2), (x1, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::DividedDiamond => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let x2 = x1 + sugar_size / 2.0; + let x3 = x1 + sugar_size; + let y1 = base_y - sugar_size / 2.0; + let y2 = y1 + sugar_size / 2.0; + let y3 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![(x1, y2), (x2, y1), (x3, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: String::new(), + bevel: true, + }); + buffer.push(Element::Polygon { + points: vec![(x1, y2), (x2, y3), (x3, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: String::new(), + bevel: true, + }); + buffer.push(Element::Polygon { + points: vec![(x1, y2), (x2, y1), (x3, y2), (x2, y3)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + Shape::DividedTriangle => { + let (base_x, base_y) = pick_point( + ( + ((raw_x + element.mid_point) * column_size), + (raw_y + 0.5) * column_size, + ), + direction, + ); + let x1 = base_x - sugar_size / 2.0; + let x2 = x1 + sugar_size / 2.0; + let x3 = x1 + sugar_size; + let y1 = base_y - sugar_size / 2.0; + let y2 = y1 + sugar_size; + + buffer.push(Element::Polygon { + points: vec![(x2, y1), (x3, y2), (x2, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: String::new(), + bevel: true, + }); + buffer.push(Element::Polygon { + points: vec![(x2, y1), (x1, y2), (x2, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: String::new(), + bevel: true, + }); + buffer.push(Element::Polygon { + points: vec![(x2, y1), (x3, y2), (x1, y2)], + fill, + stroke: foreground, + stroke_size, + svg_header: title, + bevel: false, + }); + } + } + if !element.inner_modifications.is_empty() { + buffer.push(Element::Text { + text: element.inner_modifications.clone(), + position: pick_point( + ( + (raw_x + element.mid_point) * column_size, + (raw_y + 0.5) * column_size, + ), + direction, + ), + anchor: TextAnchor::Middle, + baseline: TextBaseline::Middle, + fill: foreground, + size: sugar_size / 2.0, + italic: true, + }); + } + if let Some((pos_x, pos_y, anchor, text)) = text_location( + &element.outer_modifications, + element.shape, + pick_point( + ( + (raw_x + element.mid_point - 0.5) * column_size, + raw_y * column_size, + ), + direction, + ), + column_size, + sugar_size, + stroke_size, + &strokes, + footnotes, + ) { + buffer.push(Element::Text { + text, + position: (pos_x, pos_y), + anchor, + baseline: TextBaseline::Hanging, + fill: foreground, + size: sugar_size / 2.0, + italic: false, + }); + } + // Render all connected sugars + for (index, branch) in element + .branches + .iter() + .chain(element.sides.iter()) + .enumerate() + { + if !((total_branches == 1 && breaks.iter().any(|b| b.0 == 1)) + || breaks + .iter() + .any(|b| b.0 == 1 && b.1.first().map(|b| b.0) == Some(branch.branch_index))) + { + render_element( + buffer, + branch, + column_size, + sugar_size, + stroke_size, + direction, + x_offset, + y_offset, + &breaks + .iter() + .filter(|b| { + (total_branches > 1 + && b.1.first().map(|b| b.0) == Some(branch.branch_index) + || total_branches == 1) + && b.0 > 0 + }) + .map(|b| (b.0 - 1, b.1[usize::from(total_branches > 1)..].to_vec())) + .collect_vec(), + foreground, + background, + strokes[index + 1], + footnotes, + ); + } + } + } + + let sub_tree = self.get_subtree(selection)?; + + let width = + (sub_tree.tree.x + sub_tree.tree.width - sub_tree.left_offset - sub_tree.right_offset) + * column_size; + let depth = sub_tree.depth as f32 + if sub_tree.break_top { 0.75 } else { 0.0 }; + let height = depth * column_size + + if sub_tree.break_bottom { + 3.5 * stroke_size + } else { + (match basis { + GlycanRoot::None => 0.0_f32, + GlycanRoot::Line | GlycanRoot::Symbol => 0.5, + GlycanRoot::Text(_) => 1.0, + }) * column_size + }; + + let size = pick_point((width, height), direction); + + let mut buffer = Vec::new(); + let stroke = if sub_tree.break_bottom { + let base_x = + (sub_tree.tree.x + sub_tree.tree.mid_point - sub_tree.left_offset) * column_size; + let base_y = depth.mul_add(column_size, stroke_size * 3.0); + buffer.push(Element::Line { + from: pick_point((base_x, (depth - 0.5) * column_size), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + buffer.push(Element::Line { + from: pick_point((base_x - sugar_size / 2.0, base_y), direction), + to: pick_point((base_x + sugar_size / 2.0, base_y), direction), + stroke: foreground, + stroke_size, + }); + let bs_x = base_x - sugar_size / 2.0; + buffer.push(Element::Line { + from: pick_point((bs_x, stroke_size.mul_add(-2.0, base_y)), direction), + to: pick_point((bs_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + (base_x, (depth - 0.5) * column_size, base_x, base_y) + } else { + match basis { + GlycanRoot::None => ( + sub_tree.tree.x + sub_tree.tree.mid_point, + (depth + 0.5) * column_size, + sub_tree.tree.x + sub_tree.tree.mid_point, + (depth + 0.5) * column_size, + ), + GlycanRoot::Line => { + let base_x = (sub_tree.tree.x + sub_tree.tree.mid_point - sub_tree.left_offset) + * column_size; + let base_y = depth.mul_add(column_size, (column_size - sugar_size) / 2.0); + buffer.push(Element::Line { + from: pick_point((base_x, (depth - 0.5) * column_size), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + (base_x, (depth - 0.5) * column_size, base_x, base_y) + } + GlycanRoot::Symbol => { + let base_x = (sub_tree.tree.x + sub_tree.tree.mid_point - sub_tree.left_offset) + * column_size; + let base_y = depth.mul_add(column_size, (column_size - sugar_size) / 2.0); + buffer.push(Element::Line { + from: pick_point((base_x, (depth - 0.5) * column_size), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + buffer.push(Element::Curve { + start: pick_point( + (base_x - (sugar_size * 0.75).min(column_size * 0.5), base_y), + direction, + ), + points: vec![ + pick_double_point( + ( + sugar_size.mul_add(-0.5, base_x), + sugar_size.mul_add(0.5, base_y), + base_x, + base_y, + ), + direction, + ), + pick_double_point( + ( + sugar_size.mul_add(0.5, base_x), + sugar_size.mul_add(-0.5, base_y), + base_x + (sugar_size * 0.75).min(column_size * 0.5), + base_y, + ), + direction, + ), + ], + stroke: foreground, + stroke_size, + }); + (base_x, (depth - 0.5) * column_size, base_x, base_y) + } + GlycanRoot::Text(basis) => { + let base_x = (sub_tree.tree.x + sub_tree.tree.mid_point - sub_tree.left_offset) + * column_size; + let base_y = depth.mul_add(column_size, (column_size - sugar_size) / 2.0); + buffer.push(Element::Line { + from: pick_point((base_x, (depth - 0.5) * column_size), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + if direction == GlycanDirection::TopDown { + buffer.push(Element::Text { + text: basis, + position: (base_x, base_y + sugar_size), + anchor: TextAnchor::Middle, + baseline: TextBaseline::Ideographic, + fill: foreground, + size: sugar_size, + italic: false, + }); + } else { + buffer.push(Element::Text { + text: basis, + position: ((depth + 1.0) * column_size, base_x), + anchor: TextAnchor::End, + baseline: TextBaseline::Middle, + fill: foreground, + size: sugar_size, + italic: false, + }); + } + (base_x, (depth - 0.5) * column_size, base_x, base_y) + } + } + }; + + // If the full glycan has broken off immediately draw the break symbol + if sub_tree.branch_breaks.iter().any(|r| r.0 == 0) { + let origin_y = depth.mul_add(column_size, (column_size - sugar_size) / 2.0); + let base_x = + (sub_tree.tree.x + sub_tree.tree.mid_point - sub_tree.left_offset) * column_size; + let base_y = (depth - 0.5).mul_add(column_size, stroke_size * 0.5); + let angle = -PI / 2.0; // Always straight + buffer.push(Element::Line { + from: pick_point((base_x, origin_y), direction), + to: pick_point((base_x, base_y), direction), + stroke: foreground, + stroke_size, + }); + let x1 = (sugar_size / 2.0).mul_add(0.5f32.mul_add(PI, -angle).cos(), base_x); + let y1 = (sugar_size / 2.0).mul_add(-0.5f32.mul_add(PI, -angle).sin(), base_y); + let x2 = (sugar_size / 2.0).mul_add(-0.5f32.mul_add(PI, -angle).cos(), base_x); + let y2 = (sugar_size / 2.0).mul_add(0.5f32.mul_add(PI, -angle).sin(), base_y); + buffer.push(Element::Line { + from: pick_point((x1, y1), direction), + to: pick_point((x2, y2), direction), + stroke: foreground, + stroke_size, + }); + let x3 = (stroke_size * 2.0).mul_add(-angle.cos(), x1); + let y3 = (stroke_size * 2.0).mul_add(-angle.sin(), y1); + buffer.push(Element::Line { + from: pick_point((x1, y1), direction), + to: pick_point((x3, y3), direction), + stroke: foreground, + stroke_size, + }); + let offset = 0.25f32.mul_add(column_size, stroke_size); + let r = 0.25f32 + .mul_add(column_size, -stroke_size) + .min(sugar_size * 0.25); + let adjusted_x = offset.mul_add(-angle.cos(), base_x); + let adjusted_y = offset.mul_add(-angle.sin(), base_y); + buffer.push(Element::Circle { + r, + center: pick_point((adjusted_x, adjusted_y), direction), + fill: None, + stroke: foreground, + stroke_size, + svg_header: String::new(), + }); + } else { + render_element( + &mut buffer, + sub_tree.tree, + column_size, + sugar_size, + stroke_size, + direction, + sub_tree.left_offset, + sub_tree.tree.y as f32 - (depth - 1.0), + &sub_tree.branch_breaks, + foreground, + background, + pick_box(stroke, direction), + footnotes, + ); + } + + Some(RenderedGlycan { + size, + elements: buffer, + background, + midpoint: (sub_tree.tree.mid_point - sub_tree.left_offset) * column_size, + }) + } +} + +/// Determine the best location for text, returns the x, y, text-anchor, and the contents +fn text_location( + outer_modifications: &OuterModifications, + shape: Shape, + position: (f32, f32), + column_size: f32, + sugar_size: f32, + stroke_size: f32, + strokes: &[(f32, f32, f32, f32)], // x1, y1, x2, y2 + footnotes: &mut Vec, +) -> Option<(f32, f32, TextAnchor, String)> { + let text = match outer_modifications { + OuterModifications::Empty => return None, + OuterModifications::Footnote(index) => (index + 1).to_string(), // Human numbering + OuterModifications::Text(text) => text.clone(), + }; + + // Stay within box, dodge strokes, if not fitting fall back to adding to footnotes + let text_height = sugar_size / 2.0; + let text_width = (text.len() as f32) * text_height; // Rule of thumb, on average text is thinner than square, so this should be a good upper limit + let vertical_padding = shape.height().mul_add(-sugar_size, column_size) / 2.0; + + if vertical_padding >= text_height { + let mut options = vec![ + ( + ( + (column_size - text_width).mul_add(0.5, position.0), + position.1, + (column_size + text_width).mul_add(0.5, position.0), + position.1 + text_height, + ), + TextAnchor::Middle, + ), + ( + ( + (column_size - text_width).mul_add(0.5, position.0), + position.1 + column_size - text_height, + (column_size + text_width).mul_add(0.5, position.0), + position.1 + column_size, + ), + TextAnchor::Middle, + ), + ( + ( + position.0 + column_size.mul_add(0.5, -stroke_size) - text_width, + position.1, + position.0 + column_size.mul_add(0.5, -stroke_size), + position.1 + text_height, + ), + TextAnchor::End, + ), + ( + ( + stroke_size.mul_add(2.0, column_size.mul_add(0.5, position.0)), + position.1 + column_size - text_height, + stroke_size.mul_add(2.0, column_size.mul_add(0.5, position.0) + text_width), + position.1 + column_size, + ), + TextAnchor::Start, + ), + ( + ( + position.0, + position.1, + position.0 + text_width, + position.1 + text_height, + ), + TextAnchor::Start, + ), + ( + ( + position.0 + column_size - text_width, + position.1, + position.0 + column_size, + position.1 + text_height, + ), + TextAnchor::End, + ), + ( + ( + position.0, + position.1 + column_size - text_height, + position.0 + text_width, + position.1 + column_size, + ), + TextAnchor::Start, + ), + ( + ( + position.0 + column_size - text_width, + position.1 + column_size - text_height, + position.0 + column_size, + position.1 + column_size, + ), + TextAnchor::End, + ), + ]; + + // Remove any options that put text outside of the box + options.retain(|(option, _)| { + option.0 >= position.0 + && option.2 <= position.0 + column_size + && option.1 >= position.1 + && option.3 <= position.1 + column_size + }); + + for stroke in strokes { + options.retain(|(option, _)| !hitbox_test(*option, *stroke)); + } + if let Some((option, anchor)) = options.first() { + let (x, y) = match *anchor { + TextAnchor::Start => (option.0, option.1), + TextAnchor::Middle => ((option.0 + option.2) / 2.0, option.1), + TextAnchor::End => (option.2, option.1), + }; + return Some((x, y, *anchor, text)); + } + } + + if let OuterModifications::Text(text) = outer_modifications { + let index = footnotes + .iter() + .position(|p| *p == *text) + .unwrap_or_else(|| { + footnotes.push(text.clone()); + footnotes.len() - 1 + }); + + text_location( + &OuterModifications::Footnote(index), + shape, + position, + column_size, + sugar_size, + stroke_size, + strokes, + footnotes, + ) + } else { + Some(( + position.0 + column_size, + position.1 + column_size - text_height, + TextAnchor::End, + text, + )) + } +} + +/// Test if two boxes hit +fn hitbox_test(box1: (f32, f32, f32, f32), box2: (f32, f32, f32, f32)) -> bool { + debug_assert!(box1.0 <= box1.2, "Invalid boxes: {box1:?} {box2:?}"); + debug_assert!(box2.0 <= box2.2, "Invalid boxes: {box1:?} {box2:?}"); + debug_assert!(box1.1 <= box1.3, "Invalid boxes: {box1:?} {box2:?}"); + debug_assert!(box2.1 <= box2.3, "Invalid boxes: {box1:?} {box2:?}"); + box1.2 > box2.0 && box1.0 < box2.2 && box1.3 > box2.1 && box1.1 < box2.3 +} + +fn pick_point(a: (T, T), direction: GlycanDirection) -> (T, T) { + if direction == GlycanDirection::TopDown { + a + } else { + (a.1, a.0) + } +} + +fn pick_double_point(a: (T, T, T, T), direction: GlycanDirection) -> (T, T, T, T) { + if direction == GlycanDirection::TopDown { + a + } else { + (a.1, a.0, a.3, a.2) + } +} + +fn pick_box(a: (T, T, T, T), direction: GlycanDirection) -> (T, T, T, T) { + if direction == GlycanDirection::TopDown { + a + } else { + (a.1, a.0, a.3, a.2) + } +} diff --git a/rustyms/src/glycan/render/mod.rs b/rustyms/src/glycan/render/mod.rs new file mode 100644 index 00000000..157466ac --- /dev/null +++ b/rustyms/src/glycan/render/mod.rs @@ -0,0 +1,11 @@ +mod absolute; +#[cfg(feature = "glycan-render-bitmap")] +mod bitmap; +mod element; +mod shape; +mod svg; +#[cfg(all(test, not(github_action)))] +mod test; + +pub use absolute::GlycanDirection; +pub use element::{GlycanRoot, GlycanSelection, RenderedGlycan}; diff --git a/rustyms/src/glycan/render/shape.rs b/rustyms/src/glycan/render/shape.rs new file mode 100644 index 00000000..fe066416 --- /dev/null +++ b/rustyms/src/glycan/render/shape.rs @@ -0,0 +1,479 @@ +use crate::glycan::{ + BaseSugar, Configuration, GlycanSubstituent, HeptoseIsomer, HexoseIsomer, MonoSaccharide, + NonoseIsomer, PentoseIsomer, +}; + +impl MonoSaccharide { + /// Get the shape, colour, inner modifications, and outer modifications for this monosaccharide. + pub(super) fn get_shape(&self) -> (Shape, Colour, String, String) { + // Common substitutions + let mut nacetyl = 0; + let mut acid = 0; + let mut amino = 0; + let mut deoxy = 0; + // Additional needed substitutions + let mut acetyl = 0; + let mut glycolyl = 0; + let mut nglycolyl = 0; + let mut o_carboxy_ethyl = 0; + let mut inner_modifications = if self.furanose { + "f".to_string() + } else { + String::new() + }; + if let Some(c) = &self.configuration { + inner_modifications.push_str(match *c { + Configuration::D => "D", + Configuration::L => "L", + Configuration::DD => "DD", + Configuration::LL => "LL", + Configuration::DL => "DL", + Configuration::LD => "LD", + }); + } + let mut outer_modifications = String::new(); + for m in &self.substituents { + match m { + GlycanSubstituent::NAcetyl => nacetyl += 1, + GlycanSubstituent::Acid => acid += 1, + GlycanSubstituent::Amino => amino += 1, + GlycanSubstituent::Deoxy => deoxy += 1, + GlycanSubstituent::Acetyl => acetyl += 1, + GlycanSubstituent::Glycolyl => glycolyl += 1, + GlycanSubstituent::OCarboxyEthyl => o_carboxy_ethyl += 1, + GlycanSubstituent::NGlycolyl => nglycolyl += 1, + GlycanSubstituent::Didehydro => inner_modifications.push_str("en"), + GlycanSubstituent::Alcohol => inner_modifications.push('o'), // Missing symbols: an for anhydro, on for lactone, am for lactam + _ => outer_modifications.push_str(m.notation()), + } + } + let outer_mods = |nacetyl: usize, + acid: usize, + amino: usize, + deoxy: usize, + acetyl: usize, + glycolyl: usize, + nglycolyl: usize, + o_carboxy_ethyl: usize| { + [ + GlycanSubstituent::NAcetyl.notation().repeat(nacetyl), + GlycanSubstituent::Acid.notation().repeat(acid), + GlycanSubstituent::Amino.notation().repeat(amino), + GlycanSubstituent::Deoxy.notation().repeat(deoxy), + GlycanSubstituent::Acetyl.notation().repeat(acetyl), + GlycanSubstituent::Glycolyl.notation().repeat(glycolyl), + GlycanSubstituent::NGlycolyl.notation().repeat(nglycolyl), + GlycanSubstituent::OCarboxyEthyl + .notation() + .repeat(o_carboxy_ethyl), + outer_modifications, + ] + .join("") + }; + match &self.base_sugar { + BaseSugar::Pentose(isomer) => ( + Shape::Star, + match isomer { + None | Some(PentoseIsomer::Xylulose) => Colour::Background, + Some(PentoseIsomer::Arabinose) => Colour::Green, + Some(PentoseIsomer::Lyxose) => Colour::Yellow, + Some(PentoseIsomer::Xylose) => Colour::Orange, + Some(PentoseIsomer::Ribose) => Colour::Pink, + }, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ), + BaseSugar::Hexose(isomer) => { + if o_carboxy_ethyl > 0 && nacetyl > 0 { + ( + Shape::Hexagon, + Colour::Purple, + inner_modifications, + outer_mods( + nacetyl - 1, + acid, + amino, + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl - 1, + ), + ) + } else if o_carboxy_ethyl > 0 && nglycolyl > 0 { + ( + Shape::Hexagon, + Colour::LightBlue, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy, + acetyl, + glycolyl, + nglycolyl - 1, + o_carboxy_ethyl - 1, + ), + ) + } else if o_carboxy_ethyl > 0 && amino > 0 { + ( + Shape::Hexagon, + Colour::Brown, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino - 1, + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl - 1, + ), + ) + } else if deoxy > 1 { + let c = match isomer { + Some(HexoseIsomer::Glucose) => Colour::Blue, + Some(HexoseIsomer::Mannose) => Colour::Green, + Some(HexoseIsomer::Galactose) => Colour::Orange, + Some(HexoseIsomer::Altrose) => Colour::Pink, + Some(HexoseIsomer::Allose) => Colour::Purple, + Some(HexoseIsomer::Talose) => Colour::LightBlue, + Some(_) | None => Colour::Background, + }; + ( + Shape::Rectangle, + c, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy - 2, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } else if amino > 1 && deoxy > 0 { + ( + Shape::Hexagon, + Colour::Blue, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino - 2, + deoxy - 1, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } else if nacetyl > 0 && deoxy > 0 { + let c = match isomer { + Some(HexoseIsomer::Glucose) => Colour::Blue, + Some(HexoseIsomer::Mannose) => Colour::Green, + Some(HexoseIsomer::Galactose) => Colour::Red, + Some(HexoseIsomer::Altrose) => Colour::Pink, + Some(HexoseIsomer::Talose) => Colour::LightBlue, + Some(_) | None => Colour::Background, + }; + ( + Shape::DividedTriangle, + c, + inner_modifications, + outer_mods( + nacetyl - 1, + acid, + amino, + deoxy - 1, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } else if deoxy > 0 { + let c = match isomer { + Some(HexoseIsomer::Glucose) => Colour::Blue, + Some(HexoseIsomer::Mannose) => Colour::Green, + Some(HexoseIsomer::Galactose) => Colour::Red, + Some(HexoseIsomer::Gulose) => Colour::Orange, + Some(HexoseIsomer::Altrose) => Colour::Pink, + Some(HexoseIsomer::Talose) => Colour::LightBlue, + Some(_) | None => Colour::Background, + }; + ( + Shape::Triangle, + c, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy - 1, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } else if acid > 0 || amino > 0 || nacetyl > 0 { + let c = match isomer { + Some(HexoseIsomer::Glucose) => Colour::Blue, + Some(HexoseIsomer::Mannose) => Colour::Green, + Some(HexoseIsomer::Galactose) => Colour::Yellow, + Some(HexoseIsomer::Gulose) => Colour::Orange, + Some(HexoseIsomer::Altrose) => Colour::Pink, + Some(HexoseIsomer::Allose) => Colour::Purple, + Some(HexoseIsomer::Talose) => Colour::LightBlue, + Some(HexoseIsomer::Idose) => Colour::Brown, + Some(_) | None => Colour::Background, + }; + let shape = if acid > 0 { + Shape::DividedDiamond + } else if amino > 0 { + Shape::CrossedSquare + } else { + Shape::Square + }; + ( + shape, + c, + inner_modifications, + outer_mods( + nacetyl - usize::from(shape == Shape::Square), + acid - usize::from(shape == Shape::DividedDiamond), + amino - usize::from(shape == Shape::CrossedSquare), + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } else { + let (s, c) = match isomer { + None => (Shape::Circle, Colour::Background), + Some(HexoseIsomer::Glucose) => (Shape::Circle, Colour::Blue), + Some(HexoseIsomer::Mannose) => (Shape::Circle, Colour::Green), + Some(HexoseIsomer::Galactose) => (Shape::Circle, Colour::Yellow), + Some(HexoseIsomer::Gulose) => (Shape::Circle, Colour::Orange), + Some(HexoseIsomer::Altrose) => (Shape::Circle, Colour::Pink), + Some(HexoseIsomer::Allose) => (Shape::Circle, Colour::Purple), + Some(HexoseIsomer::Talose) => (Shape::Circle, Colour::LightBlue), + Some(HexoseIsomer::Idose) => (Shape::Circle, Colour::Brown), + Some(HexoseIsomer::Psicose) => (Shape::Pentagon, Colour::Pink), + Some(HexoseIsomer::Fructose) => (Shape::Pentagon, Colour::Green), + Some(HexoseIsomer::Sorbose) => (Shape::Pentagon, Colour::Orange), + Some(HexoseIsomer::Tagatose) => (Shape::Pentagon, Colour::Yellow), + }; + ( + s, + c, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } + } + BaseSugar::Heptose(Some(HeptoseIsomer::GlyceroMannoHeptopyranose)) => ( + Shape::Hexagon, + Colour::Green, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ), + BaseSugar::Heptose(None) if acid > 1 && deoxy > 0 => ( + Shape::Hexagon, + Colour::Orange, + inner_modifications, + outer_mods( + nacetyl, + acid - 2, + amino, + deoxy - 1, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ), + BaseSugar::Octose if acid > 0 && deoxy > 0 => ( + Shape::Hexagon, + Colour::Yellow, + inner_modifications, + outer_mods( + nacetyl, + acid - 1, + amino, + deoxy - 1, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ), + BaseSugar::Nonose(isomer) if acid > 0 && amino > 0 => { + if amino > 1 && deoxy > 1 { + ( + Shape::FlatDiamond, + match isomer { + Some(NonoseIsomer::Pse) => Colour::Green, + Some(NonoseIsomer::Leg) => Colour::Yellow, + Some(NonoseIsomer::ELeg) => Colour::LightBlue, + Some(NonoseIsomer::Aci) => Colour::Pink, + _ => Colour::Background, + }, + inner_modifications, + outer_mods( + nacetyl, + acid - 1, + amino - 2, + deoxy - 2, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ) + } else { + let colour = if deoxy > 0 { + if *isomer == Some(NonoseIsomer::Kdn) { + Colour::Green + } else { + Colour::Red + } + } else if acetyl > 0 { + Colour::Purple + } else if glycolyl > 0 { + Colour::LightBlue + } else { + Colour::Brown + }; + ( + Shape::Diamond, + colour, + inner_modifications, + outer_mods( + nacetyl, + acid - 1, + amino - 1, + deoxy - usize::from(colour == Colour::Red || colour == Colour::Green), + acetyl - usize::from(colour == Colour::Purple), + glycolyl - usize::from(colour == Colour::LightBlue), + nglycolyl, + o_carboxy_ethyl, + ), + ) + } + } + _ => ( + Shape::Hexagon, + Colour::Background, + inner_modifications, + outer_mods( + nacetyl, + acid, + amino, + deoxy, + acetyl, + glycolyl, + nglycolyl, + o_carboxy_ethyl, + ), + ), + } + } +} + +/// All colours from Symbol Nomenclature For Glycans (SNFG) +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub(super) enum Colour { + Background, + Blue, + Green, + Yellow, + Orange, + Pink, + Purple, + LightBlue, + Brown, + Red, +} + +impl Colour { + /// Represented as bytes 0..=255 + pub(super) const fn rgb(self) -> [u8; 3] { + match self { + Self::Background => [255, 255, 255], + Self::Blue => [0, 144, 188], + Self::Green => [0, 166, 81], + Self::Yellow => [255, 212, 0], + Self::Orange => [244, 121, 32], + Self::Pink => [246, 158, 161], + Self::Purple => [165, 67, 153], + Self::LightBlue => [143, 204, 233], + Self::Brown => [161, 122, 77], + Self::Red => [237, 28, 36], + } + } +} + +/// All symbols from Symbol Nomenclature For Glycans (SNFG) +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub(super) enum Shape { + Circle, + Square, + CrossedSquare, + DividedDiamond, + Triangle, + LeftPointingTriangle, + RightPointingTriangle, + DividedTriangle, + Rectangle, + Star, + Diamond, + FlatDiamond, + Hexagon, + Pentagon, +} + +impl Shape { + /// The height of a symbol as ratio to the width + pub(super) const fn height(self) -> f32 { + match self { + Self::Rectangle | Self::FlatDiamond | Self::Hexagon => 0.5, + _ => 1.0, + } + } +} diff --git a/rustyms/src/glycan/render/svg.rs b/rustyms/src/glycan/render/svg.rs new file mode 100644 index 00000000..7869dc15 --- /dev/null +++ b/rustyms/src/glycan/render/svg.rs @@ -0,0 +1,110 @@ +use std::fmt::Write; + +use itertools::Itertools; + +use crate::glycan::{ + render::element::{Element, TextAnchor, TextBaseline}, + RenderedGlycan, +}; + +impl RenderedGlycan { + /// Render this glycan as SVG. The SVG will be appended to the given buffer. + /// * `output`: the buffer to append the SVG to. + /// + /// # Errors + /// If the underlying buffer errors the error is returned. + pub fn to_svg(&self, mut output: impl Write) -> Result<(), std::fmt::Error> { + fn clr(clr: Option<&[u8; 3]>) -> String { + if let Some([r, g, b]) = clr { + format!("rgb({r},{g},{b})") + } else { + "transparent".to_string() + } + } + + write!( + output, + "", + self.size.0, self.size.1, self.midpoint + )?; + for element in &self.elements { + match element { + Element::Line { + from, + to, + stroke, + stroke_size, + } => write!(output, + "", + from.0, + from.1, + to.0, + to.1, + clr(Some(stroke)))?, + Element::Circle { + r, + center, + fill, + stroke, + stroke_size, + svg_header, + } => write!(output, + "", + center.0, + center.1, + clr(fill.as_ref()), + clr(Some(stroke)))?, + Element::Rectangle { + top, + w, + h, + fill, + stroke, + stroke_size, + svg_header, + } => write!(output, + "", + top.0, + top.1, + clr(Some(fill)), + clr(Some(stroke)))?, + Element::Polygon { + points, + fill, + stroke, + stroke_size, + svg_header, + bevel, + } => write!(output, + "", + points.iter().map(|(a, b)| format!("{a} {b}")).join(" "), + clr(Some(fill)), + clr(Some(stroke)), + if *bevel {" stroke-linejoin=\"bevel\""} else {""})?, + Element::Text { + text, + position, + anchor, + baseline, + fill, + size, + italic, + } => write!(output, + "{text}", + position.0, + position.1, + clr(Some(fill)), + match anchor {TextAnchor::Start => "start", TextAnchor::Middle => "middle", TextAnchor::End => "End"}, + match baseline {TextBaseline::Hanging => "hanging", TextBaseline::Middle => "middle", TextBaseline::Ideographic => "ideographic"}, + if *italic {" font-style=\"italic\""} else {""})?, + Element::Curve { start, points, stroke, stroke_size } => write!(output, + "", + start.0, start.1, + points.iter().map(|(a, b, c, d)| format!("Q {a} {b} {c} {d}")).join(" "), + clr(Some(stroke)))?, + } + } + write!(output, "")?; + Ok(()) + } +} diff --git a/rustyms/src/glycan/render/test.rs b/rustyms/src/glycan/render/test.rs new file mode 100644 index 00000000..d1294078 --- /dev/null +++ b/rustyms/src/glycan/render/test.rs @@ -0,0 +1,561 @@ +#![allow(clippy::missing_panics_doc)] +use base64::Engine; +use swash::{scale::ScaleContext, CacheKey, FontRef}; + +use crate::{ + fragment::GlycanPosition, + glycan::{render::element::GlycanSelection, GlycanDirection, GlycanRoot, GlycanStructure}, +}; +use std::{ + fmt::Write, + io::BufWriter, + path::{Path, PathBuf}, +}; + +pub struct Font { + // Full content of the font file + data: Vec, + // Offset to the table directory + offset: u32, + // Cache key + key: CacheKey, +} + +impl Font { + pub fn from_file(path: PathBuf, index: usize) -> Option { + // Read the full font file + let data = std::fs::read(path).ok()?; + // Create a temporary font reference for the first font in the file. + // This will do some basic validation, compute the necessary offset + // and generate a fresh cache key for us. + let font = FontRef::from_index(&data, index)?; + let (offset, key) = (font.offset, font.key); + // Return our struct with the original file data and copies of the + // offset and key from the font reference + Some(Self { data, offset, key }) + } + + // Create the transient font reference for accessing this crate's + // functionality. + pub fn as_ref(&self) -> FontRef { + // Note that you'll want to initialize the struct directly here as + // using any of the FontRef constructors will generate a new key which, + // while completely safe, will nullify the performance optimizations of + // the caching mechanisms used in this crate. + FontRef { + data: &self.data, + offset: self.offset, + key: self.key, + } + } +} + +#[test] +fn test_rendering() { + const COLUMN_SIZE: f32 = 30.0; + const SUGAR_SIZE: f32 = 15.0; + const STROKE_SIZE: f32 = 1.5; + + let font = Font::from_file( + std::fs::read_dir( + directories::UserDirs::font_dir( + &directories::UserDirs::new().expect("Could not find user directories"), + ) + .unwrap_or_else(|| Path::new("C:/WINDOWS/Fonts")), // Font directory not defined for windows + ) + .expect("Could not open font directory") + .find(|p| { + p.as_ref() + .is_ok_and(|p| p.file_name().eq_ignore_ascii_case("times.ttf")) + }) + .expect("No font files") + .expect("Could not open font file") + .path(), + 0, + ) + .expect("Invalid font"); + + let mut html = String::new(); + let mut footnotes = Vec::new(); + write!(&mut html, "Glycan render test").unwrap(); + + let codes = [ + ("G01670UQ", "Neu5Ac(a2-6)Gal(b1-4)GlcNAc(b1-2)Man(a1-3)[Gal(b1-4)GlcNAc(b1-2)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc(?1-"), + ("G13523IF", "Fuc(?1-?)Gal(?1-?)GalNAc(?1-"), + ("G00613DO", "GlcN(b1-4)GlcNAc(b1-4)GlcNAc(b1-4)GlcNAc6S(?1-"), + ("G00621IU", "Neu5Gc(a2-3/6)Gal(b1-4)[Fuc(a1-3)]GlcNAc(b1-2)[Gal(a1-3)Gal(b1-4)GlcNAc(b1-4)]Man(a1-3)[Neu5Ac(a2-8)Neu5Ac(a2-3/6)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)GlcNAc(b1-2)[Neu5Ac(a2-3/6)Gal(b1-4)[Fuc(a1-3)]GlcNAc(b1-6)]Man(a1-6)]Man(b1-4)GlcNAc(b1-4)[Fuc(a1-6)]GlcNAc(?1-"), + ("G01464QV", "Rha2,3,4Ac3(a1-2)[Xyl(b1-3)]Ara(a1-"), + ("G04421VO", "Fruf(b2-1a)[Glc(a1-2)Glc(a1-2)Glc(a1-2)Glc(a1-2)Glc(a1-2)Glc(a1-2)]Glc"), + ("G04458LN", "Kdn(a2-3)Gal(b1-4)ManNAc(b1-2)[Kdn(a2-3)Gal(b1-4)GlcNAc(b1-4)]Man(a1-3)[GlcNAc(b1-4)][Kdn(a2-3)Gal(b1-4)GlcNAc(b1-2)[Neu5Gc(a2-3)Gal(b1-4)GlcNAc(b1-6)]Man(a1-6)]Man(b1-4)GlcNAc(b1-4)[Fuc(a1-6)]GlcNAc(b1-"), + ("G69524KC", "Xyl(?1-?)Ara(?1-?)[Gal(?1-?)]GlcA"), + ("G37707YH", "Fuc(a1-2)Gal(b1-4)[Fuc(a1-3)]GlcNAc(b1-2)[Gal(a1-3)Gal(b1-4)GlcNAc(b1-4)]Man(a1-3)[GlcNAc(b1-4)][Neu5Gc(a2-3/6)Gal(b1-4)[Fuc(a1-3)]GlcNAc(b1-3)Gal(b1-4)GlcNAc(b1-2)[Neu5Ac(a2-3/6)Gal(b1-4)[Fuc(a1-3)]GlcNAc(b1-6)]Man(a1-6)]Man(b1-4)GlcNAc(b1-4)[Fuc(a1-6)]GlcNAc(?1-"), + ("G07370RP", "Rha(a1-3)Qui(b1-4)Rha(a1-2)Glc(b1-2)[Rha(a1-6)]Glc(b1-"), + ("G11504PZ", "Dig3CMe(b1-3)Oli(b1-3)Oli(b1-"), + ("G64699IM", "GlcA(b1-3)GalNAc(b1-4)4eLeg?5,7Ac2(a2-"), + ("G14402AU", "D-Araf(b1-5)Dha(?2-3)[GalA(a1-4)GalA(a1-4)]GalA(a1-4)GalA"), + ("G08395BZ", "Glc(b1-2a)[Ido(b1-3)]Psif"), + ("G49642ZT", "Man(?1-?)[Man(?1-?)]Man(?1-?)[Man(?1-?)]Man(?1-?)GlcNAc(?1-?)[Fuc(?1-?)][Fuc(?1-?)]GlcNAc(?1-"), + ("G59426OB", "Hex(?1-?)HexNAc(?1-?)HexA(?1-?)Gal(?1-?)GalNAc-ol"), + ("G75424NV", "Hex?(?1-?)Hex?NAc(?1-?)[Hex?NAc(?1-?)]Hex?(?1-?)[Hex?(?1-?)[Hex?(?1-?)]Hex?(?1-?)][Hex?NAc(?1-?)]Hex?(?1-?)Hex?NAc(?1-?)Hex?NAc(?1-"), + ("G36128WO", "Ido(b1-3)ManNAc(?1-3)[Ido(b1-3)L-AllNAc(b1-3)Ido(b1-4)AltNAc(b1-6)]Tal(b1-4)D-Ido(?1-"), + ("G83422GV", "L-6dTal(a1-3)[Fuc(a1-2)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)]GlcNAc(b1-3)Gal(b1-3)[Neu5Ac(a2-3)Gal(b1-4)[Fuc(a1-3)]GlcNAc(b1-6)]GalNAc(a1-"), + ("G09073GJ","GalNAc(?1-?)GlcA2,3NAc2(?1-?)D-FucNAc"), + ("G00069DT","Neu(a2-3)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc(b1-"), + ("G00468KU","GlcNAc(b1-2)Man(a1-3)[GlcNAc(b1-4)][Man(a1-?)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc(?1-"), + ("G75079FY","Neu5Ac(?2-?)Gal(?1-?)GlcNAc(?1-?)Man(?1-?)[Neu5Ac(?2-?)Gal(?1-?)GlcNAc(?1-?)Man(?1-?)][GlcNAc(?1-?)]Man(?1-?)GlcNAc(?1-?)[Fuc(?1-?)]GlcNAc"), + ]; + + let mut context = ScaleContext::new(); + for (index, (_, iupac)) in codes.iter().enumerate() { + let structure = GlycanStructure::from_short_iupac(iupac, 0..iupac.len(), 0).unwrap(); + let rendered = structure + .render( + crate::glycan::render::GlycanRoot::Text("pep".to_string()), + COLUMN_SIZE, + SUGAR_SIZE, + STROKE_SIZE, + if index % 3 == 0 { + GlycanDirection::LeftToRight + } else { + GlycanDirection::TopDown + }, + GlycanSelection::FULL, + [66, 66, 66], + [255, 255, 255], + &mut footnotes, + ) + .unwrap(); + rendered.to_svg(&mut html).unwrap(); + let (bitmap, width) = rendered.to_bitmap( + if index % 2 == 0 { + zeno::Format::subpixel_bgra() + } else { + zeno::Format::Alpha + }, + font.as_ref(), + &mut context, + ); + let mut buffer = Vec::new(); + let mut w = BufWriter::new(&mut buffer); + let mut encoder = + png::Encoder::new(&mut w, width as u32, (bitmap.len() / 4 / width) as u32); + encoder.set_color(png::ColorType::Rgba); + encoder.set_depth(png::BitDepth::Eight); + let mut writer = encoder.write_header().unwrap(); + writer.write_image_data(&bitmap).unwrap(); + drop(writer); + drop(w); + + write!(&mut html, "").unwrap(); + } + + for (index, selection) in [ + ( + 0, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 2, + series_number: 2, + branch: Vec::new(), + attachment: None, + }), + &[], + ), + ), + ( + 0, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 2, + series_number: 2, + branch: Vec::new(), + attachment: None, + }), + &[GlycanPosition { + inner_depth: 4, + series_number: 4, + branch: vec![(1, 1)], + attachment: None, + }], + ), + ), + ( + 0, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 2, + series_number: 2, + branch: Vec::new(), + attachment: None, + }), + &[ + GlycanPosition { + inner_depth: 5, + series_number: 5, + branch: vec![(0, 0)], + attachment: None, + }, + GlycanPosition { + inner_depth: 3, + series_number: 3, + branch: vec![(1, 1)], + attachment: None, + }, + ], + ), + ), + ( + 0, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 4, + series_number: 4, + branch: vec![(1, 1)], + attachment: None, + }), + &[], + ), + ), + ( + 14, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 0, + series_number: 0, + branch: Vec::new(), + attachment: None, + }), + &[GlycanPosition { + inner_depth: 1, + series_number: 1, + branch: vec![(0, 0)], + attachment: None, + }], + ), + ), + ( + 14, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 1, + series_number: 1, + branch: vec![(0, 0)], + attachment: None, + }), + &[GlycanPosition { + inner_depth: 2, + series_number: 2, + branch: vec![(0, 0)], + attachment: None, + }], + ), + ), + ( + 16, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 1, + series_number: 1, + branch: Vec::new(), + attachment: None, + }), + &[ + GlycanPosition { + inner_depth: 3, + series_number: 3, + branch: vec![(0, 0)], + attachment: None, + }, + GlycanPosition { + inner_depth: 4, + series_number: 4, + branch: vec![(1, 1), (1, 1)], + attachment: None, + }, + ], + ), + ), + ( + 18, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 1, + series_number: 1, + branch: vec![(0, 0)], + attachment: None, + }), + &[ + GlycanPosition { + inner_depth: 3, + series_number: 3, + branch: vec![(0, 0), (0, 0)], + attachment: None, + }, + GlycanPosition { + inner_depth: 6, + series_number: 6, + branch: vec![(0, 0), (1, 1)], + attachment: None, + }, + ], + ), + ), + ( + 1, + GlycanSelection::Subtree( + None, + &[GlycanPosition { + inner_depth: 1, + series_number: 1, + branch: Vec::new(), + attachment: None, + }], + ), + ), + ( + 1, + GlycanSelection::Subtree( + None, + &[GlycanPosition { + inner_depth: 2, + series_number: 2, + branch: Vec::new(), + attachment: None, + }], + ), + ), + ( + // Y5 + 21, + GlycanSelection::Subtree( + None, + &[GlycanPosition { + inner_depth: 0, + series_number: 5, + branch: Vec::new(), + attachment: None, + }], + ), + ), + ( + // B3Y1gY2bY2a + 21, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 2, + series_number: 3, + branch: vec![], + attachment: None, + }), + &[ + GlycanPosition { + inner_depth: 3, + series_number: 1, + branch: vec![(1, 2)], + attachment: None, + }, + GlycanPosition { + inner_depth: 3, + series_number: 2, + branch: vec![(2, 1)], + attachment: None, + }, + GlycanPosition { + inner_depth: 3, + series_number: 2, + branch: vec![(0, 0)], + attachment: None, + }, + ], + ), + ), + ( + // B2aY1a + 21, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 3, + series_number: 2, + branch: vec![(0, 0)], + attachment: None, + }), + &[GlycanPosition { + inner_depth: 4, + series_number: 1, + branch: vec![(0, 0)], + attachment: None, + }], + ), + ), + ( + // B2bY1b + 21, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 3, + series_number: 2, + branch: vec![(2, 1)], + attachment: None, + }), + &[GlycanPosition { + inner_depth: 4, + series_number: 1, + branch: vec![(2, 1)], + attachment: None, + }], + ), + ), + ( + // B1b + 21, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 4, + series_number: 1, + branch: vec![(2, 1)], + attachment: None, + }), + &[], + ), + ), + ( + // dHex3 + 21, + GlycanSelection::SingleSugar(&GlycanPosition { + inner_depth: 2, + series_number: 3, + branch: vec![], + attachment: None, + }), + ), + ( + // dHex2a + 21, + GlycanSelection::SingleSugar(&GlycanPosition { + inner_depth: 3, + series_number: 2, + branch: vec![(0, 0)], + attachment: None, + }), + ), + ( + // dHex2b + 21, + GlycanSelection::SingleSugar(&GlycanPosition { + inner_depth: 3, + series_number: 2, + branch: vec![(2, 1)], + attachment: None, + }), + ), + ( + // dHex1b + 21, + GlycanSelection::SingleSugar(&GlycanPosition { + inner_depth: 4, + series_number: 1, + branch: vec![(2, 1)], + attachment: None, + }), + ), + ( + // B4a' + 22, + GlycanSelection::Subtree( + Some(&GlycanPosition { + inner_depth: 3, + series_number: 4, + branch: vec![(0, 0), (1, 1)], + attachment: None, + }), + &[], + ), + ), + ] { + let structure = + GlycanStructure::from_short_iupac(codes[index].1, 0..codes[index].1.len(), 0).unwrap(); + if let Some(rendered) = structure.render( + crate::glycan::render::GlycanRoot::Symbol, + COLUMN_SIZE, + SUGAR_SIZE, + STROKE_SIZE, + GlycanDirection::TopDown, + selection, + [0, 0, 0], + [255, 255, 255], + &mut footnotes, + ) { + rendered.to_svg(&mut html).unwrap(); + let (bitmap, width) = rendered.to_bitmap( + if index % 2 == 0 { + zeno::Format::subpixel_bgra() + } else { + zeno::Format::Alpha + }, + font.as_ref(), + &mut context, + ); + let mut buffer = Vec::new(); + let mut w = BufWriter::new(&mut buffer); + let mut encoder = + png::Encoder::new(&mut w, width as u32, (bitmap.len() / 4 / width) as u32); + encoder.set_color(png::ColorType::Rgba); + encoder.set_depth(png::BitDepth::Eight); + let mut writer = encoder.write_header().unwrap(); + writer.write_image_data(&bitmap).unwrap(); + drop(writer); + drop(w); + + write!(&mut html, "").unwrap(); + } else { + write!(&mut html, "Render error: invalid root").unwrap(); + } + } + + let structure = GlycanStructure::from_short_iupac(codes[0].1, 0..codes[0].1.len(), 0).unwrap(); + for root in [ + GlycanRoot::None, + GlycanRoot::Line, + GlycanRoot::Symbol, + GlycanRoot::Text("pep".to_string()), + GlycanRoot::Text("N".to_string()), + GlycanRoot::Text("Arg".to_string()), + ] { + let rendered = structure + .render( + root, + COLUMN_SIZE, + SUGAR_SIZE, + STROKE_SIZE, + GlycanDirection::TopDown, + GlycanSelection::FULL, + [0, 0, 0], + [255, 255, 255], + &mut footnotes, + ) + .unwrap(); + rendered.to_svg(&mut html).unwrap(); + } + + write!(&mut html, "
").unwrap(); + if !footnotes.is_empty() { + write!(&mut html, "
    ").unwrap(); + for note in footnotes { + write!(&mut html, "
  1. {note}
  2. ").unwrap(); + } + write!(&mut html, "

").unwrap(); + } + for (code, _) in &codes { + write!( + &mut html, + "" + ) + .unwrap(); + } + write!(&mut html, "").unwrap(); + std::fs::write("../rendered_glycans.html", html).unwrap(); +} diff --git a/rustyms/src/modification.rs b/rustyms/src/modification.rs index d0024055..cc37523a 100644 --- a/rustyms/src/modification.rs +++ b/rustyms/src/modification.rs @@ -37,7 +37,7 @@ impl ModificationId { )), Ontology::Gnome => Some(format!( "http://glytoucan.org/Structures/Glycans/{}", - self.name + self.name.to_ascii_uppercase() )), Ontology::Resid => Some(format!( "https://proteininformationresource.org/cgi-bin/resid?id=AA{:04}", @@ -48,6 +48,21 @@ impl ModificationId { } } +impl Display for ModificationId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.ontology == Ontology::Gnome { + write!( + f, + "{}:{}", + self.ontology.char(), + self.name.to_ascii_uppercase() + ) + } else { + write!(f, "{}:{}", self.ontology.char(), self.name) + } + } +} + /// The result of checking if a modification can be placed somewhere. #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)] pub enum RulePossible { @@ -321,19 +336,8 @@ impl SimpleModificationInner { } if specification_compliant => { write!(f, "Formula:{formula}|INFO:Custom:{name}")?; } - Self::Database { - id: - ModificationId { - name, - ontology: Ontology::Custom, - .. - }, - .. - } if specification_compliant => { - write!(f, "C:{name}")?; - } Self::Database { id, .. } | Self::Gno { id, .. } | Self::Linker { id, .. } => { - write!(f, "{}:{}", id.ontology.char(), id.name)?; + write!(f, "{id}")?; } } Ok(()) diff --git a/rustyms/src/rand.rs b/rustyms/src/rand.rs index 79d68d6e..bae36ddd 100644 --- a/rustyms/src/rand.rs +++ b/rustyms/src/rand.rs @@ -74,50 +74,42 @@ impl Distribution for StandardUniform { impl Distribution for StandardUniform { fn sample(&self, rng: &mut R) -> GlycanSubstituent { - match rng.random_range(1..=44) { + match rng.random_range(1..=36) { 1 => GlycanSubstituent::Acetimidoyl, 2 => GlycanSubstituent::Acetyl, - 3 => GlycanSubstituent::AcetylAlanyl, - 4 => GlycanSubstituent::AcetylGlutaminyl, - 5 => GlycanSubstituent::Acid, - 6 => GlycanSubstituent::Alanyl, - 7 => GlycanSubstituent::Alcohol, - 8 => GlycanSubstituent::Amino, - 9 => GlycanSubstituent::Aric, - 10 => GlycanSubstituent::CargoxyEthylidene, - 11 => GlycanSubstituent::Deoxy, - 12 => GlycanSubstituent::Didehydro, - 13 => GlycanSubstituent::DiHydroxyButyryl, - 14 => GlycanSubstituent::DiMethyl, - 15 => GlycanSubstituent::DiMethylAcetimidoyl, - 16 => GlycanSubstituent::DiMethylGlyceryl, - 17 => GlycanSubstituent::Element(rng.random()), - 18 => GlycanSubstituent::Ethanolamine, - 19 => GlycanSubstituent::EtOH, - 20 => GlycanSubstituent::Formyl, - 21 => GlycanSubstituent::Glyceryl, - 22 => GlycanSubstituent::Glycolyl, - 23 => GlycanSubstituent::Glycyl, - 24 => GlycanSubstituent::HydroxyButyryl, - 25 => GlycanSubstituent::HydroxyMethyl, - 26 => GlycanSubstituent::Lac, - 27 => GlycanSubstituent::Lactyl, - 28 => GlycanSubstituent::Methyl, - 29 => GlycanSubstituent::MethylAcetimidoyl, - 30 => GlycanSubstituent::MethylGlutamyl, - 31 => GlycanSubstituent::NAcetyl, - 32 => GlycanSubstituent::NDiMe, - 33 => GlycanSubstituent::NFo, - 34 => GlycanSubstituent::NGlycolyl, - 35 => GlycanSubstituent::OCarboxyEthyl, - 36 => GlycanSubstituent::PCholine, - 37 => GlycanSubstituent::Phosphate, - 38 => GlycanSubstituent::Pyruvyl, - 39 => GlycanSubstituent::Suc, - 40 => GlycanSubstituent::Sulfate, - 41 => GlycanSubstituent::Tauryl, - 42 => GlycanSubstituent::Ulo, - 43 => GlycanSubstituent::Ulof, + 3 => GlycanSubstituent::Acid, + 4 => GlycanSubstituent::Alanyl, + 5 => GlycanSubstituent::Alcohol, + 6 => GlycanSubstituent::Amino, + 7 => GlycanSubstituent::Aric, + 8 => GlycanSubstituent::CargoxyEthylidene, + 9 => GlycanSubstituent::Deoxy, + 10 => GlycanSubstituent::Didehydro, + 11 => GlycanSubstituent::DiMethyl, + 12 => GlycanSubstituent::Element(rng.random()), + 13 => GlycanSubstituent::Ethanolamine, + 14 => GlycanSubstituent::EtOH, + 15 => GlycanSubstituent::Formyl, + 16 => GlycanSubstituent::Glyceryl, + 17 => GlycanSubstituent::Glycolyl, + 18 => GlycanSubstituent::Glycyl, + 19 => GlycanSubstituent::HydroxyButyryl, + 20 => GlycanSubstituent::Lac, + 21 => GlycanSubstituent::Lactyl, + 22 => GlycanSubstituent::Methyl, + 23 => GlycanSubstituent::NAcetyl, + 24 => GlycanSubstituent::NDiMe, + 25 => GlycanSubstituent::NFo, + 26 => GlycanSubstituent::NGlycolyl, + 27 => GlycanSubstituent::OCarboxyEthyl, + 28 => GlycanSubstituent::PCholine, + 29 => GlycanSubstituent::Phosphate, + 30 => GlycanSubstituent::Pyruvyl, + 31 => GlycanSubstituent::Suc, + 32 => GlycanSubstituent::Sulfate, + 33 => GlycanSubstituent::Tauryl, + 34 => GlycanSubstituent::Ulo, + 35 => GlycanSubstituent::Ulof, _ => GlycanSubstituent::Water, } } @@ -134,7 +126,7 @@ impl Distribution for StandardUniform { 5 => BaseSugar::Hexose(None), 6 => BaseSugar::Heptose(None), 7 => BaseSugar::Octose, - 8 => BaseSugar::Nonose, + 8 => BaseSugar::Nonose(None), _ => BaseSugar::Decose, } } diff --git a/rustyms/src/shared/element.rs b/rustyms/src/shared/element.rs index fb564a60..1be81b52 100644 --- a/rustyms/src/shared/element.rs +++ b/rustyms/src/shared/element.rs @@ -505,135 +505,137 @@ impl TryFrom for Element { } } +impl Element { + /// Get the symbol for this element + pub const fn symbol(self) -> &'static str { + match self { + Self::H => "H", + Self::He => "He", + Self::Li => "Li", + Self::Be => "Be", + Self::B => "B", + Self::C => "C", + Self::N => "N", + Self::O => "O", + Self::F => "F", + Self::Ne => "Ne", + Self::Na => "Na", + Self::Mg => "Mg", + Self::Al => "Al", + Self::Si => "Si", + Self::P => "P", + Self::S => "S", + Self::Cl => "Cl", + Self::Ar => "Ar", + Self::K => "K", + Self::Ca => "Ca", + Self::Sc => "Sc", + Self::Ti => "Ti", + Self::V => "V", + Self::Cr => "Cr", + Self::Mn => "Mn", + Self::Fe => "Fe", + Self::Co => "Co", + Self::Ni => "Ni", + Self::Cu => "Cu", + Self::Zn => "Zn", + Self::Ga => "Ga", + Self::Ge => "Ge", + Self::As => "As", + Self::Se => "Se", + Self::Br => "Br", + Self::Kr => "Kr", + Self::Rb => "Rb", + Self::Sr => "Sr", + Self::Y => "Y", + Self::Zr => "Zr", + Self::Nb => "Nb", + Self::Mo => "Mo", + Self::Tc => "Tc", + Self::Ru => "Ru", + Self::Rh => "Rh", + Self::Pd => "Pd", + Self::Ag => "Ag", + Self::Cd => "Cd", + Self::In => "In", + Self::Sn => "Sn", + Self::Sb => "Sb", + Self::Te => "Te", + Self::I => "I", + Self::Xe => "Xe", + Self::Cs => "Cs", + Self::Ba => "Ba", + Self::La => "La", + Self::Ce => "Ce", + Self::Pr => "Pr", + Self::Nd => "Nd", + Self::Pm => "Pm", + Self::Sm => "Sm", + Self::Eu => "Eu", + Self::Gd => "Gd", + Self::Tb => "Tb", + Self::Dy => "Dy", + Self::Ho => "Ho", + Self::Er => "Er", + Self::Tm => "Tm", + Self::Yb => "Yb", + Self::Lu => "Lu", + Self::Hf => "Hf", + Self::Ta => "Ta", + Self::W => "W", + Self::Re => "Re", + Self::Os => "Os", + Self::Ir => "Ir", + Self::Pt => "Pt", + Self::Au => "Au", + Self::Hg => "Hg", + Self::Tl => "Tl", + Self::Pb => "Pb", + Self::Bi => "Bi", + Self::Po => "Po", + Self::At => "At", + Self::Rn => "Rn", + Self::Fr => "Fr", + Self::Ra => "Ra", + Self::Ac => "Ac", + Self::Th => "Th", + Self::Pa => "Pa", + Self::U => "U", + Self::Np => "Np", + Self::Pu => "Pu", + Self::Am => "Am", + Self::Cm => "Cm", + Self::Bk => "Bk", + Self::Cf => "Cf", + Self::Es => "Es", + Self::Fm => "Fm", + Self::Md => "Md", + Self::No => "No", + Self::Lr => "Lr", + Self::Rf => "Rf", + Self::Db => "Db", + Self::Sg => "Sg", + Self::Bh => "Bh", + Self::Hs => "Hs", + Self::Mt => "Mt", + Self::Ds => "Ds", + Self::Rg => "Rg", + Self::Cn => "Cn", + Self::Nh => "Nh", + Self::Fl => "Fl", + Self::Mc => "Mc", + Self::Lv => "Lv", + Self::Ts => "Ts", + Self::Og => "Og", + // Self::Proton => "Proton", + Self::Electron => "e", + } + } +} + impl std::fmt::Display for Element { - #[expect(clippy::too_many_lines)] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - Self::H => "H", - Self::He => "He", - Self::Li => "Li", - Self::Be => "Be", - Self::B => "B", - Self::C => "C", - Self::N => "N", - Self::O => "O", - Self::F => "F", - Self::Ne => "Ne", - Self::Na => "Na", - Self::Mg => "Mg", - Self::Al => "Al", - Self::Si => "Si", - Self::P => "P", - Self::S => "S", - Self::Cl => "Cl", - Self::Ar => "Ar", - Self::K => "K", - Self::Ca => "Ca", - Self::Sc => "Sc", - Self::Ti => "Ti", - Self::V => "V", - Self::Cr => "Cr", - Self::Mn => "Mn", - Self::Fe => "Fe", - Self::Co => "Co", - Self::Ni => "Ni", - Self::Cu => "Cu", - Self::Zn => "Zn", - Self::Ga => "Ga", - Self::Ge => "Ge", - Self::As => "As", - Self::Se => "Se", - Self::Br => "Br", - Self::Kr => "Kr", - Self::Rb => "Rb", - Self::Sr => "Sr", - Self::Y => "Y", - Self::Zr => "Zr", - Self::Nb => "Nb", - Self::Mo => "Mo", - Self::Tc => "Tc", - Self::Ru => "Ru", - Self::Rh => "Rh", - Self::Pd => "Pd", - Self::Ag => "Ag", - Self::Cd => "Cd", - Self::In => "In", - Self::Sn => "Sn", - Self::Sb => "Sb", - Self::Te => "Te", - Self::I => "I", - Self::Xe => "Xe", - Self::Cs => "Cs", - Self::Ba => "Ba", - Self::La => "La", - Self::Ce => "Ce", - Self::Pr => "Pr", - Self::Nd => "Nd", - Self::Pm => "Pm", - Self::Sm => "Sm", - Self::Eu => "Eu", - Self::Gd => "Gd", - Self::Tb => "Tb", - Self::Dy => "Dy", - Self::Ho => "Ho", - Self::Er => "Er", - Self::Tm => "Tm", - Self::Yb => "Yb", - Self::Lu => "Lu", - Self::Hf => "Hf", - Self::Ta => "Ta", - Self::W => "W", - Self::Re => "Re", - Self::Os => "Os", - Self::Ir => "Ir", - Self::Pt => "Pt", - Self::Au => "Au", - Self::Hg => "Hg", - Self::Tl => "Tl", - Self::Pb => "Pb", - Self::Bi => "Bi", - Self::Po => "Po", - Self::At => "At", - Self::Rn => "Rn", - Self::Fr => "Fr", - Self::Ra => "Ra", - Self::Ac => "Ac", - Self::Th => "Th", - Self::Pa => "Pa", - Self::U => "U", - Self::Np => "Np", - Self::Pu => "Pu", - Self::Am => "Am", - Self::Cm => "Cm", - Self::Bk => "Bk", - Self::Cf => "Cf", - Self::Es => "Es", - Self::Fm => "Fm", - Self::Md => "Md", - Self::No => "No", - Self::Lr => "Lr", - Self::Rf => "Rf", - Self::Db => "Db", - Self::Sg => "Sg", - Self::Bh => "Bh", - Self::Hs => "Hs", - Self::Mt => "Mt", - Self::Ds => "Ds", - Self::Rg => "Rg", - Self::Cn => "Cn", - Self::Nh => "Nh", - Self::Fl => "Fl", - Self::Mc => "Mc", - Self::Lv => "Lv", - Self::Ts => "Ts", - Self::Og => "Og", - // Self::Proton => "Proton", - Self::Electron => "e", - } - ) + write!(f, "{}", self.symbol()) } } diff --git a/rustyms/src/shared/glycan.rs b/rustyms/src/shared/glycan.rs index 96e1dec4..de25777f 100644 --- a/rustyms/src/shared/glycan.rs +++ b/rustyms/src/shared/glycan.rs @@ -9,12 +9,30 @@ use crate::{ Element, SequencePosition, ELEMENT_PARSE_LIST, }; +/// Glycan absolute configuration +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] +pub enum Configuration { + /// D configuration + D, + /// L configuration + L, + /// Double configuration D and D + DD, + /// Double configuration L and L + LL, + /// Double configuration D and L + DL, + /// Double configuration L and D + LD, +} + /// A monosaccharide with all its complexity #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] pub struct MonoSaccharide { pub(super) base_sugar: BaseSugar, pub(super) substituents: Vec, pub(super) furanose: bool, + pub(super) configuration: Option, pub(super) proforma_name: Option, } @@ -25,6 +43,7 @@ impl MonoSaccharide { base_sugar: sugar, substituents: substituents.to_owned(), furanose: false, + configuration: None, proforma_name: None, } } @@ -49,6 +68,16 @@ impl MonoSaccharide { } } + /// Set this saccharide up to be a certain configuration + #[must_use] + #[allow(dead_code)] + pub fn configuration(self, configuration: Configuration) -> Self { + Self { + configuration: Some(configuration), + ..self + } + } + /// Simplify a glycan composition to be sorted and deduplicated. /// Returns None if overflow occurred, meaning that there where more than `isize::MAX` or less then `isize::MIN` monosaccharides for one species. pub(crate) fn simplify_composition( @@ -116,9 +145,21 @@ impl MonoSaccharide { let line = original_line.to_ascii_lowercase(); let bytes = line.as_bytes(); let mut substituents = Vec::new(); + let mut configuration = None; + let mut epi = false; // ignore stuff - index += line[index..].ignore(&["keto-", "d-", "l-", "?-"]); + index += line[index..].ignore(&["keto-"]); + if line[index..].starts_with("d-") { + configuration = Some(Configuration::D); + index += 2; + } else if line[index..].starts_with("l-") { + configuration = Some(Configuration::L); + index += 2; + } else if line[index..].starts_with("?-") { + configuration = None; + index += 2; + } // Prefix mods let mut amount = 1; if bytes[index].is_ascii_digit() { @@ -160,19 +201,31 @@ impl MonoSaccharide { } index += line[index..].ignore(&["-"]); } - // Detect & ignore epi state - index += line[index..].ignore(&["e"]); + // Detect epi state + if line[index..].starts_with('e') { + epi = true; + index += 1; + } // Get the prefix mods if !line[index..].starts_with("dig") && !line[index..].starts_with("dha") { if let Some(o) = line[index..].take_any(PREFIX_SUBSTITUENTS, |e| { - substituents.extend(std::iter::repeat(e.clone()).take(amount)); + substituents.extend(std::iter::repeat(*e).take(amount)); }) { index += o; } index += line[index..].ignore(&["-"]); } // Another optional isomeric state - index += line[index..].ignore(&["d-", "l-", "?-"]); + if line[index..].starts_with("d-") { + configuration = Some(Configuration::D); + index += 2; + } else if line[index..].starts_with("l-") { + configuration = Some(Configuration::L); + index += 2; + } else if line[index..].starts_with("?-") { + configuration = None; + index += 2; + } // Base sugar let mut sugar = None; for sug in BASE_SUGARS { @@ -185,12 +238,18 @@ impl MonoSaccharide { let mut sugar = sugar .map(|(b, s)| { let mut alo = Self { - base_sugar: b, + base_sugar: match b { + BaseSugar::Nonose(Some(NonoseIsomer::Leg)) if epi => { + BaseSugar::Nonose(Some(NonoseIsomer::ELeg)) + } + other => other, + }, substituents, furanose: false, + configuration, proforma_name: None, }; - alo.substituents.extend(s.iter().cloned()); + alo.substituents.extend(s.iter().copied()); alo }) .ok_or_else(|| { @@ -243,14 +302,14 @@ impl MonoSaccharide { sugar.substituents.extend( e.iter() .flat_map(|s| std::iter::repeat(s).take(double_amount)) - .cloned(), + .copied(), ); if single_amount > 0 { sugar.substituents.extend( e.iter() .filter(|s| **s != GlycanSubstituent::Water) .flat_map(|s| std::iter::repeat(s).take(single_amount)) - .cloned(), + .copied(), ); } }) { @@ -272,7 +331,7 @@ impl MonoSaccharide { if let Some(o) = line[index..].take_any(POSTFIX_SUBSTITUENTS, |e| { sugar .substituents - .extend(std::iter::repeat(e.clone()).take(amount)); + .extend(std::iter::repeat(*e).take(amount)); }) { index += o; } else if let Some(o) = line[index..].take_any(ELEMENT_PARSE_LIST, |e| { @@ -455,7 +514,7 @@ pub enum BaseSugar { /// 8 carbon base sugar Octose, /// 9 carbon base sugar - Nonose, + Nonose(Option), /// 10 carbon base sugar Decose, } @@ -474,7 +533,7 @@ impl Display for BaseSugar { Self::Hexose(_) => "Hex", Self::Heptose(_) => "Hep", Self::Octose => "Oct", - Self::Nonose => "Non", + Self::Nonose(_) => "Non", Self::Decose => "Dec", } ) @@ -496,7 +555,7 @@ impl Chemical for BaseSugar { Self::Hexose(_) => molecular_formula!(H 10 C 6 O 5), Self::Heptose(_) => molecular_formula!(H 12 C 7 O 6), Self::Octose => molecular_formula!(H 14 C 8 O 7), - Self::Nonose => molecular_formula!(H 16 C 9 O 8), + Self::Nonose(_) => molecular_formula!(H 16 C 9 O 8), Self::Decose => molecular_formula!(H 18 C 10 O 9), } } @@ -568,19 +627,34 @@ pub enum HeptoseIsomer { Sedoheptulose, } +/// Any 9 carbon glycan, these isomers are modification specific (need the correct substituents +/// applied to be meaningful). These are to be used only to store isomeric state that was inferred +/// from other sources that cannot be tracked in other ways in the current structure. Any isomer +/// used that does not have the correct monosaccharide substituents applied is meaningless. +#[allow(dead_code)] +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] +pub enum NonoseIsomer { + /// 3-Deoxy-D-glycero-D-galacto-non-2-ulopyranosonic acid + Kdn, + /// 5,7-Diamino-3,5,7,9-tetradeoxy-L-glycero-L-manno-non-2-ulopyranosonic acid + Pse, + /// 5,7-Diamino-3,5,7,9-tetradeoxy-D-glycero-D-galacto-non-2-ulopyranosonic acid + Leg, + /// 4 or 8 eLeg + ELeg, + /// 5,7-Diamino-3,5,7,9-tetradeoxy-L-glycero-L-altro-non-2-ulopyranosonic acid + Aci, +} + /// Any substituent on a monosaccharide. /// Source: table 3. #[allow(dead_code)] -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] +#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] pub enum GlycanSubstituent { ///`Am` N-acetimidoyl Acetimidoyl, ///`Ac` acetyl Acetyl, - ///`Ala2Ac` N-acetyl-D-alanyl - AcetylAlanyl, - ///`Gln2Ac` N-acetyl-glutaminyl - AcetylGlutaminyl, ///`A` acid Acid, ///`Ala` D-alanyl @@ -595,14 +669,8 @@ pub enum GlycanSubstituent { CargoxyEthylidene, ///`d` Deoxy Deoxy, - ///`3,4Hb` 3,4-dihydroxybutyryl - DiHydroxyButyryl, ///`DiMe` two methyl DiMethyl, - ///`AmMe2` N-(N,N-dimethyl-acetimidoyl) - DiMethylAcetimidoyl, - ///`Gr2,3Me2` 2,3-di-O-methyl-glyceryl - DiMethylGlyceryl, ///`en` didehydro an addition of a double bond Didehydro, ///`An` element that replaces a side chain @@ -629,10 +697,6 @@ pub enum GlycanSubstituent { Lactyl, ///`Me` methyl Methyl, - ///`AmMe` N-(N-methyl-acetimidoyl) - MethylAcetimidoyl, - ///5Glu2Me N-methyl-5-glutamyl - MethylGlutamyl, ///`NAc` N-acetyl NAcetyl, ///`N2DiMe` N linked double methyl @@ -663,58 +727,54 @@ pub enum GlycanSubstituent { Water, } +impl GlycanSubstituent { + /// Get the symbol used to denote this substituent + pub const fn notation(self) -> &'static str { + match self { + Self::Acetimidoyl => "Am", + Self::Acetyl => "Ac", + Self::Acid => "A", + Self::Alanyl => "Ala", + Self::Alcohol => "ol", + Self::Amino => "N", + Self::Aric => "aric", + Self::CargoxyEthylidene => "Pyr", + Self::Deoxy => "d", + Self::Didehydro => "en", + Self::DiMethyl => "Me2", + Self::Ethanolamine => "Etn", + Self::Element(el) => el.symbol(), + Self::EtOH => "EtOH", + Self::Formyl => "Fo", + Self::Glyceryl => "Gr", + Self::Glycolyl => "Gc", + Self::Glycyl => "Gly", + Self::HydroxyButyryl => "Hb", + Self::HydroxyMethyl => "HMe", + Self::Lac => "Lac", + Self::Lactyl => "Lt", + Self::Methyl => "Me", + Self::NAcetyl => "NAc", + Self::NDiMe => "NDiMe", + Self::NFo => "NFo", + Self::NGlycolyl => "NGc", + Self::OCarboxyEthyl => "carboxyethyl", + Self::PCholine => "PCho", + Self::Phosphate => "P", + Self::Pyruvyl => "Py", + Self::Suc => "Suc", + Self::Sulfate => "S", + Self::Tauryl => "Tau", + Self::Ulo => "ulo", + Self::Ulof => "ulof", + Self::Water => "water_loss", + } + } +} + impl Display for GlycanSubstituent { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - Self::Acetimidoyl => "Am".to_string(), - Self::Acetyl => "Ac".to_string(), - Self::AcetylAlanyl => "Ala2Ac".to_string(), - Self::AcetylGlutaminyl => "Gln2Ac".to_string(), - Self::Acid => "A".to_string(), - Self::Alanyl => "Ala".to_string(), - Self::Alcohol => "ol".to_string(), - Self::Amino => "N".to_string(), - Self::Aric => "aric".to_string(), - Self::CargoxyEthylidene => "Pyr".to_string(), - Self::Deoxy => "d".to_string(), - Self::Didehydro => "en".to_string(), - Self::DiHydroxyButyryl => "3,4Hb".to_string(), - Self::DiMethyl => "DiMe".to_string(), - Self::DiMethylAcetimidoyl => "AmMe2".to_string(), - Self::DiMethylGlyceryl => "Gr2,3Me2".to_string(), - Self::Ethanolamine => "Etn".to_string(), - Self::Element(el) => el.to_string(), - Self::EtOH => "EtOH".to_string(), - Self::Formyl => "Fo".to_string(), - Self::Glyceryl => "Gr".to_string(), - Self::Glycolyl => "Gc".to_string(), - Self::Glycyl => "Gly".to_string(), - Self::HydroxyButyryl => "Hb".to_string(), - Self::HydroxyMethyl => "HMe".to_string(), - Self::Lac => "Lac".to_string(), - Self::Lactyl => "Lt".to_string(), - Self::Methyl => "Me".to_string(), - Self::MethylAcetimidoyl => "AmMe".to_string(), - Self::MethylGlutamyl => "5Glu2Me".to_string(), - Self::NAcetyl => "NAc".to_string(), - Self::NDiMe => "NDiMe".to_string(), - Self::NFo => "NFo".to_string(), - Self::NGlycolyl => "NGc".to_string(), - Self::OCarboxyEthyl => "carboxyethyl".to_string(), - Self::PCholine => "PCho".to_string(), - Self::Phosphate => "P".to_string(), - Self::Pyruvyl => "Py".to_string(), - Self::Suc => "Suc".to_string(), - Self::Sulfate => "S".to_string(), - Self::Tauryl => "Tau".to_string(), - Self::Ulo => "ulo".to_string(), - Self::Ulof => "ulof".to_string(), - Self::Water => "water_loss".to_string(), - } - ) + write!(f, "{}", self.notation()) } } @@ -727,8 +787,6 @@ impl Chemical for GlycanSubstituent { let side = match self { Self::Acetimidoyl => molecular_formula!(H 5 C 2 N 1), Self::Acetyl => molecular_formula!(H 3 C 2 O 1), - Self::AcetylAlanyl => molecular_formula!(H 8 C 5 N 1 O 2), - Self::AcetylGlutaminyl => molecular_formula!(H 11 C 7 N 2 O 3), Self::Acid => molecular_formula!(H -1 O 2), // Together with the replacement below this is H-2 O+1 Self::Alanyl => molecular_formula!(H 6 C 3 N 1 O 1), Self::Alcohol => molecular_formula!(H 3 O 1), // Together with the replacement below this is H+2 @@ -737,10 +795,7 @@ impl Chemical for GlycanSubstituent { Self::CargoxyEthylidene => molecular_formula!(H 3 C 3 O 3), // double substituent, calculated to work with the additional side chain deletion Self::Deoxy => molecular_formula!(H 1), // Together with the replacement below this is O-1 Self::Didehydro => molecular_formula!(H -1 O 1), // Together with the replacement below this is H-2 - Self::DiHydroxyButyryl => molecular_formula!(H 7 C 4 O 3), Self::DiMethyl => molecular_formula!(H 5 C 2), // assumed to replace the both the OH and H on a single carbon - Self::DiMethylAcetimidoyl => molecular_formula!(H 9 C 4 N 1), - Self::DiMethylGlyceryl => molecular_formula!(H 9 C 5 O 3), Self::Ethanolamine => molecular_formula!(H 6 C 2 N 1 O 1), Self::EtOH => molecular_formula!(H 5 C 2 O 2), Self::Element(el) => MolecularFormula::new(&[(*el, None, 1)], &[]).unwrap(), @@ -752,8 +807,6 @@ impl Chemical for GlycanSubstituent { Self::HydroxyMethyl | Self::Ulo => molecular_formula!(H 3 C 1 O 2), // Ulo: replaces H, together with replacement below this is H2C1O1 Self::Lactyl => molecular_formula!(H 5 C 3 O 2), Self::Methyl => molecular_formula!(H 3 C 1), - Self::MethylAcetimidoyl => molecular_formula!(H 7 C 3 N 1), - Self::MethylGlutamyl => molecular_formula!(H 10 C 6 N 1 O 3), Self::NDiMe => molecular_formula!(H 6 C 2 N 1), Self::NFo => molecular_formula!(H 2 C 1 N 1 O 1), Self::NGlycolyl => molecular_formula!(H 4 C 2 N 1 O 2), diff --git a/rustyms/src/shared/glycan_lists.rs b/rustyms/src/shared/glycan_lists.rs index cbb5dd6b..b45dfb28 100644 --- a/rustyms/src/shared/glycan_lists.rs +++ b/rustyms/src/shared/glycan_lists.rs @@ -34,12 +34,12 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ ), ( "neu", - BaseSugar::Nonose, + BaseSugar::Nonose(None), &[GlycanSubstituent::Amino, GlycanSubstituent::Acid], ), ( "sia", - BaseSugar::Nonose, + BaseSugar::Nonose(None), &[ GlycanSubstituent::Amino, GlycanSubstituent::Deoxy, @@ -48,7 +48,7 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ ), ( "kdn", - BaseSugar::Nonose, + BaseSugar::Nonose(Some(NonoseIsomer::Kdn)), &[ GlycanSubstituent::Amino, GlycanSubstituent::Deoxy, @@ -167,7 +167,7 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ ), ( "pse", - BaseSugar::Nonose, + BaseSugar::Nonose(Some(NonoseIsomer::Pse)), &[ GlycanSubstituent::Amino, GlycanSubstituent::Deoxy, @@ -178,7 +178,7 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ ), ( "leg", - BaseSugar::Nonose, + BaseSugar::Nonose(Some(NonoseIsomer::Leg)), &[ GlycanSubstituent::Acid, GlycanSubstituent::Amino, @@ -189,7 +189,7 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ ), ( "aci", - BaseSugar::Nonose, + BaseSugar::Nonose(Some(NonoseIsomer::Aci)), &[ GlycanSubstituent::Acid, GlycanSubstituent::Amino, @@ -205,23 +205,16 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ // * Add an additional level which defines the leaving group, to make the chemical formula difference easier const POSTFIX_SUBSTITUENTS: &[(&str, GlycanSubstituent)] = &[ ("ac", GlycanSubstituent::Acetyl), - ("ala2ac", GlycanSubstituent::AcetylAlanyl), ("ala", GlycanSubstituent::Alanyl), - ("amme2", GlycanSubstituent::DiMethylAcetimidoyl), - ("amme", GlycanSubstituent::MethylAcetimidoyl), ("am", GlycanSubstituent::Acetimidoyl), ("en", GlycanSubstituent::Didehydro), ("fo", GlycanSubstituent::Formyl), ("gc", GlycanSubstituent::Glycolyl), - ("gln2ac", GlycanSubstituent::AcetylGlutaminyl), - ("5glu2me", GlycanSubstituent::MethylGlutamyl), ("gly", GlycanSubstituent::Glycyl), ("gr", GlycanSubstituent::Glyceryl), - ("gr2,3Me2", GlycanSubstituent::DiMethylGlyceryl), ("4hb", GlycanSubstituent::HydroxyButyryl), ("3rhb", GlycanSubstituent::HydroxyButyryl), ("3shb", GlycanSubstituent::HydroxyButyryl), - ("3,4Hb", GlycanSubstituent::DiHydroxyButyryl), ("lt", GlycanSubstituent::Lactyl), ("lac", GlycanSubstituent::Lac), ("me", GlycanSubstituent::Methyl), @@ -277,6 +270,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Phosphate], proforma_name: Some("phosphate".to_string()), furanose: false, + configuration: None, }, ), ( @@ -286,6 +280,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Sulfate], proforma_name: Some("sulfate".to_string()), furanose: false, + configuration: None, }, ), ( @@ -295,6 +290,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Sug".to_string()), furanose: false, + configuration: None, }, ), ( @@ -304,6 +300,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Tri".to_string()), furanose: false, + configuration: None, }, ), ( @@ -313,6 +310,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Tet".to_string()), furanose: false, + configuration: None, }, ), ( @@ -322,6 +320,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Pen".to_string()), furanose: false, + configuration: None, }, ), ( @@ -331,6 +330,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Pen".to_string()), furanose: false, + configuration: None, }, ), ( @@ -340,6 +340,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Pen".to_string()), furanose: false, + configuration: None, }, ), ( @@ -349,6 +350,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Pen".to_string()), furanose: false, + configuration: None, }, ), ( @@ -358,6 +360,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Pen".to_string()), furanose: false, + configuration: None, }, ), ( @@ -367,6 +370,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Pen".to_string()), furanose: false, + configuration: None, }, ), ( @@ -376,6 +380,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Acid], proforma_name: Some("a-Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -389,6 +394,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("en,a-Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -398,6 +404,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("d-Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -407,6 +414,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("d-Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -416,6 +424,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl, GlycanSubstituent::Sulfate], proforma_name: Some("HexNAc(S)".to_string()), furanose: false, + configuration: None, }, ), ( @@ -425,6 +434,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -434,6 +444,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -443,6 +454,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -452,6 +464,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -461,6 +474,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -470,6 +484,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -479,6 +494,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -488,6 +504,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -497,6 +514,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Amino, GlycanSubstituent::Sulfate], proforma_name: Some("HexNS".to_string()), furanose: false, + configuration: None, }, ), ( @@ -506,6 +524,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Amino], proforma_name: Some("HexN".to_string()), furanose: false, + configuration: None, }, ), ( @@ -515,6 +534,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Sulfate], proforma_name: Some("HexS".to_string()), furanose: false, + configuration: None, }, ), ( @@ -524,6 +544,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Phosphate], proforma_name: Some("HexP".to_string()), furanose: false, + configuration: None, }, ), ( @@ -533,6 +554,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -542,6 +564,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -551,6 +574,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -560,6 +584,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -569,6 +594,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -578,6 +604,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -587,6 +614,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -596,6 +624,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -605,6 +634,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -614,6 +644,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hep".to_string()), furanose: false, + configuration: None, }, ), ( @@ -623,6 +654,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Oct".to_string()), furanose: false, + configuration: None, }, ), ( @@ -632,21 +664,23 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Deoxy, GlycanSubstituent::Acid], proforma_name: Some("Oct".to_string()), furanose: false, + configuration: None, }, ), ( "non".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![], proforma_name: Some("Non".to_string()), furanose: false, + configuration: None, }, ), ( "kdn".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(Some(NonoseIsomer::Kdn)), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Deoxy, @@ -654,12 +688,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Non".to_string()), furanose: false, + configuration: None, }, ), ( "sia".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Deoxy, @@ -667,6 +702,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Non".to_string()), furanose: false, + configuration: None, }, ), ( @@ -676,12 +712,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Dec".to_string()), furanose: false, + configuration: None, }, ), ( "neu5ac".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Acetyl, @@ -689,12 +726,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Ac".to_string()), furanose: false, + configuration: None, }, ), ( "neuac".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Acetyl, @@ -702,12 +740,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Ac".to_string()), furanose: false, + configuration: None, }, ), ( "neu5gc".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Glycolyl, @@ -715,12 +754,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Gc".to_string()), furanose: false, + configuration: None, }, ), ( "neugc".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Glycolyl, @@ -728,12 +768,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Gc".to_string()), furanose: false, + configuration: None, }, ), ( "neu".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Deoxy, @@ -741,6 +782,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu".to_string()), furanose: false, + configuration: None, }, ), ( @@ -750,6 +792,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Deoxy], proforma_name: Some("Fuc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -759,6 +802,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Xxx".to_string()), furanose: false, + configuration: None, }, ), ( @@ -768,6 +812,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Alcohol], proforma_name: None, furanose: false, + configuration: None, }, ), ( @@ -777,6 +822,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Methyl], proforma_name: None, furanose: false, + configuration: None, }, ), // Single letter codes, by defining them like this they will be read but exported to the standard ProForma codes @@ -787,6 +833,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Acetyl], proforma_name: None, furanose: false, + configuration: None, }, ), ( @@ -796,6 +843,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Phosphate], proforma_name: Some("Hexphosphate".to_string()), // TODO: technically maybe not working when multiple are in there, think it through, should be two different elements, both getting counts after them furanose: false, + configuration: None, }, ), ( @@ -805,6 +853,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![], proforma_name: Some("Hex".to_string()), furanose: false, + configuration: None, }, ), ( @@ -814,6 +863,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::NAcetyl], proforma_name: Some("HexNAc".to_string()), furanose: false, + configuration: None, }, ), ( @@ -823,12 +873,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { substituents: vec![GlycanSubstituent::Deoxy], proforma_name: Some("Fuc".to_string()), furanose: false, + configuration: None, }, ), ( "s".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Acetyl, @@ -836,12 +887,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Ac".to_string()), furanose: false, + configuration: None, }, ), ( "a".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Acetyl, @@ -849,12 +901,13 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Ac".to_string()), furanose: false, + configuration: None, }, ), ( "g".to_string(), MonoSaccharide { - base_sugar: BaseSugar::Nonose, + base_sugar: BaseSugar::Nonose(None), substituents: vec![ GlycanSubstituent::Amino, GlycanSubstituent::Glycolyl, @@ -862,6 +915,7 @@ pub fn glycan_parse_list() -> &'static Vec<(String, MonoSaccharide)> { ], proforma_name: Some("Neu5Gc".to_string()), furanose: false, + configuration: None, }, ), ] diff --git a/rustyms/src/spectrum/fragmentation.rs b/rustyms/src/spectrum/fragmentation.rs index 2f5c052c..15b2195e 100644 --- a/rustyms/src/spectrum/fragmentation.rs +++ b/rustyms/src/spectrum/fragmentation.rs @@ -41,7 +41,12 @@ pub trait AnnotatableSpectrum { // Get the index of the element closest to this value if let Some(index) = Self::search(self, mz, tolerance) { - annotated.spectrum[index].annotation.push(fragment.clone()); + // Keep the theoretical fragments sorted to have the highest theoretical likelihood on top + match annotated.spectrum[index].annotation.binary_search(fragment) { + Ok(ai) | Err(ai) => annotated.spectrum[index] + .annotation + .insert(ai, fragment.clone()), + } } } } diff --git a/rustyms/src/spectrum/scores.rs b/rustyms/src/spectrum/scores.rs index f43253f9..46506f1f 100644 --- a/rustyms/src/spectrum/scores.rs +++ b/rustyms/src/spectrum/scores.rs @@ -270,7 +270,7 @@ impl AnnotatedSpectrum { .chain( [ FragmentKind::Y, - FragmentKind::Oxonium, + FragmentKind::B, FragmentKind::immonium, FragmentKind::precursor_side_chain_loss, FragmentKind::diagnostic,