diff --git a/rustyms-generate-imgt/src/structs.rs b/rustyms-generate-imgt/src/structs.rs index 3f1bb907..f6901152 100644 --- a/rustyms-generate-imgt/src/structs.rs +++ b/rustyms-generate-imgt/src/structs.rs @@ -4,7 +4,7 @@ use std::str::FromStr; use crate::imgt_gene::IMGTGene; use crate::shared::{AnnotatedSequence, Gene, Species}; -use rustyms::AminoAcid; +use rustyms::{AminoAcid, IsAminoAcid}; #[derive(Debug)] pub struct DataItem { @@ -57,7 +57,12 @@ impl Display for Region { // self.found_seq.0, self.found_seq .as_ref() - .map(|seq| seq.1 .0.iter().map(|a| a.char()).collect::()) + .map(|seq| seq + .1 + .0 + .iter() + .map(|a| a.pro_forma_definition()) + .collect::()) .unwrap_or_else(|e| format!(": {e}")), ) } @@ -237,7 +242,10 @@ impl std::fmt::Debug for AASequence { write!( f, "[{}]", - self.0.iter().map(|a| a.char()).collect::() + self.0 + .iter() + .map(|a| a.pro_forma_definition()) + .collect::() ) } } diff --git a/rustyms-py/src/lib.rs b/rustyms-py/src/lib.rs index 13c23e5f..3d0822c2 100644 --- a/rustyms-py/src/lib.rs +++ b/rustyms-py/src/lib.rs @@ -6,7 +6,7 @@ use std::num::NonZeroU16; use ordered_float::OrderedFloat; use pyo3::{exceptions::PyValueError, prelude::*, types::PyType}; -use rustyms::{AnnotatableSpectrum, Chemical, Linked, MultiChemical}; +use rustyms::{AnnotatableSpectrum, Chemical, IsAminoAcid, Linked, MultiChemical}; /// Mass mode enum. #[pyclass(eq, eq_int)] @@ -440,7 +440,7 @@ impl AminoAcid { } fn __str__(&self) -> String { - self.0.char().to_string() + self.0.pro_forma_definition().to_string() } fn __repr__(&self) -> String { @@ -1124,7 +1124,7 @@ impl Peptidoform { self.0 .sequence() .iter() - .map(|x| x.aminoacid.char()) + .map(|x| x.aminoacid.pro_forma_definition()) .collect() } diff --git a/rustyms/src/align/multi_alignment.rs b/rustyms/src/align/multi_alignment.rs index 24e72a8f..6359de3d 100644 --- a/rustyms/src/align/multi_alignment.rs +++ b/rustyms/src/align/multi_alignment.rs @@ -36,7 +36,7 @@ impl MultiAlignmentLine<'_, Complexity> { { print!( "{}{}", - piece.1.aminoacid.char(), + piece.1.aminoacid, "·".repeat(piece.0.step as usize - 1) ); } diff --git a/rustyms/src/aminoacids.rs b/rustyms/src/aminoacids.rs index 687ec960..c81ad472 100644 --- a/rustyms/src/aminoacids.rs +++ b/rustyms/src/aminoacids.rs @@ -12,7 +12,7 @@ use crate::{ use std::borrow::Cow; /// A general trait to define amino acids. -pub trait IsAminoAcid { +pub trait IsAminoAcid: MultiChemical { /// The full name for this amino acid. fn name(&self) -> Cow<'_, str>; /// The three letter code for this amino acid. Or None if there is no common three letter @@ -26,9 +26,6 @@ pub trait IsAminoAcid { /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be /// used if Arginine was not defined as `R` in ProForma. fn pro_forma_definition(&self) -> Cow<'_, str>; - /// The full molecular formula for this amino acid. It allows multiple molecular formulas to - /// allow ambiguous amino acids such as B and Z. - fn formulas(&self) -> Cow<'_, Multi>; /// The monoisotopic mass of this amino acid. Should be redefined for better performance. fn monoisotopic_mass(&self) -> Cow<'_, Multi> { Cow::Owned( @@ -51,79 +48,224 @@ pub trait IsAminoAcid { fn mass(&self, mode: MassMode) -> Cow<'_, Multi> { Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect()) } - /// The molecular formula of the side chain of the amino acid. - fn side_chain(&self) -> Cow<'_, Multi>; + /// The molecular formula of the side chain of the amino acid. The `sequence_index` and + /// `peptidoform_index` are used to keep track of ambiguous amino acids. + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi>; /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned - /// if no satellite ions are possible. - fn satellite_ion_fragments(&self) -> Option>>; + /// if no satellite ions are possible. The `sequence_index` and `peptidoform_index` are used to + /// keep track of ambiguous amino acids. + fn satellite_ion_fragments( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Option>>; /// Common neutral losses for the immonium ion of this amino acid. fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>; } +impl std::fmt::Display for dyn IsAminoAcid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.pro_forma_definition()) + } +} + include!("shared/aminoacid.rs"); -impl AminoAcid { - /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X) - pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[ - Self::Glycine, - Self::Alanine, - Self::Arginine, - Self::Asparagine, - Self::AsparticAcid, - Self::Cysteine, - Self::Glutamine, - Self::GlutamicAcid, - Self::Histidine, - Self::AmbiguousLeucine, - Self::Lysine, - Self::Methionine, - Self::Phenylalanine, - Self::Proline, - Self::Serine, - Self::Threonine, - Self::Tryptophan, - Self::Tyrosine, - Self::Valine, - Self::Selenocysteine, - Self::Pyrrolysine, - ]; +impl std::fmt::Display for AminoAcid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.pro_forma_definition()) + } +} - /// All 20 canonical amino acids - pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[ - Self::Glycine, - Self::Alanine, - Self::Arginine, - Self::Asparagine, - Self::AsparticAcid, - Self::Cysteine, - Self::Glutamine, - Self::GlutamicAcid, - Self::Histidine, - Self::Leucine, - Self::Isoleucine, - Self::Lysine, - Self::Methionine, - Self::Phenylalanine, - Self::Proline, - Self::Serine, - Self::Threonine, - Self::Tryptophan, - Self::Tyrosine, - Self::Valine, - ]; +impl IsAminoAcid for AminoAcid { + /// Get the single letter representation of the amino acid + fn one_letter_code(&self) -> Option { + Some(match self { + Self::Alanine => 'A', + Self::AmbiguousAsparagine => 'B', + Self::Cysteine => 'C', + Self::AsparticAcid => 'D', + Self::GlutamicAcid => 'E', + Self::Phenylalanine => 'F', + Self::Glycine => 'G', + Self::Histidine => 'H', + Self::Isoleucine => 'I', + Self::AmbiguousLeucine => 'J', + Self::Lysine => 'K', + Self::Leucine => 'L', + Self::Methionine => 'M', + Self::Asparagine => 'N', + Self::Pyrrolysine => 'O', + Self::Proline => 'P', + Self::Glutamine => 'Q', + Self::Arginine => 'R', + Self::Serine => 'S', + Self::Threonine => 'T', + Self::Selenocysteine => 'U', + Self::Valine => 'V', + Self::Tryptophan => 'W', + Self::Unknown => 'X', + Self::Tyrosine => 'Y', + Self::AmbiguousGlutamine => 'Z', + }) + } + + fn pro_forma_definition(&self) -> Cow<'_, str> { + Cow::Borrowed(match self { + Self::Alanine => "A", + Self::AmbiguousAsparagine => "B", + Self::Cysteine => "C", + Self::AsparticAcid => "D", + Self::GlutamicAcid => "E", + Self::Phenylalanine => "F", + Self::Glycine => "G", + Self::Histidine => "H", + Self::Isoleucine => "I", + Self::AmbiguousLeucine => "J", + Self::Lysine => "K", + Self::Leucine => "L", + Self::Methionine => "M", + Self::Asparagine => "N", + Self::Pyrrolysine => "O", + Self::Proline => "P", + Self::Glutamine => "Q", + Self::Arginine => "R", + Self::Serine => "S", + Self::Threonine => "T", + Self::Selenocysteine => "U", + Self::Valine => "V", + Self::Tryptophan => "W", + Self::Unknown => "X", + Self::Tyrosine => "Y", + Self::AmbiguousGlutamine => "Z", + }) + } + + /// Get the 3 letter code for the amino acid + fn three_letter_code(&self) -> Option> { + Some(Cow::Borrowed(match self { + Self::Alanine => "Ala", + Self::AmbiguousAsparagine => "Asx", + Self::Cysteine => "Cys", + Self::AsparticAcid => "Asp", + Self::GlutamicAcid => "Glu", + Self::Phenylalanine => "Phe", + Self::Glycine => "Gly", + Self::Histidine => "His", + Self::Isoleucine => "Ile", + Self::AmbiguousLeucine => "Xle", + Self::Lysine => "Lys", + Self::Leucine => "Leu", + Self::Methionine => "Met", + Self::Asparagine => "Asn", + Self::Pyrrolysine => "Pyl", + Self::Proline => "Pro", + Self::Glutamine => "Gln", + Self::Arginine => "Arg", + Self::Serine => "Ser", + Self::Threonine => "Thr", + Self::Selenocysteine => "Sec", + Self::Valine => "Val", + Self::Tryptophan => "Trp", + Self::Unknown => "Xaa", + Self::Tyrosine => "Tyr", + Self::AmbiguousGlutamine => "Glx", + })) + } + + /// Get the full name for the amino acid + fn name(&self) -> Cow<'_, str> { + Cow::Borrowed(match self { + Self::Alanine => "Alanine", + Self::AmbiguousAsparagine => "AmbiguousAsparagine", + Self::Cysteine => "Cysteine", + Self::AsparticAcid => "AsparticAcid", + Self::GlutamicAcid => "GlutamicAcid", + Self::Phenylalanine => "Phenylalanine", + Self::Glycine => "Glycine", + Self::Histidine => "Histidine", + Self::Isoleucine => "Isoleucine", + Self::AmbiguousLeucine => "AmbiguousLeucine", + Self::Lysine => "Lysine", + Self::Leucine => "Leucine", + Self::Methionine => "Methionine", + Self::Asparagine => "Asparagine", + Self::Pyrrolysine => "Pyrrolysine", + Self::Proline => "Proline", + Self::Glutamine => "Glutamine", + Self::Arginine => "Arginine", + Self::Serine => "Serine", + Self::Threonine => "Threonine", + Self::Selenocysteine => "Selenocysteine", + Self::Valine => "Valine", + Self::Tryptophan => "Tryptophan", + Self::Unknown => "Unknown", + Self::Tyrosine => "Tyrosine", + Self::AmbiguousGlutamine => "AmbiguousGlutamine", + }) + } + + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi> { + let crate::SequencePosition::Index(sequence_index) = sequence_index else { + return Cow::Owned(Multi::default()); + }; + Cow::Owned(match self { + Self::Alanine => molecular_formula!(H 3 C 1).into(), + Self::Arginine => molecular_formula!(H 10 C 4 N 3).into(), // One of the H's counts as the charge carrier and is added later + Self::Asparagine => molecular_formula!(H 4 C 2 O 1 N 1).into(), + Self::AsparticAcid => molecular_formula!(H 3 C 2 O 2).into(), + Self::AmbiguousAsparagine => vec![ + molecular_formula!(H 4 C 2 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Asparagine, sequence_index, peptidoform_index})), + molecular_formula!(H 3 C 2 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::AsparticAcid, sequence_index, peptidoform_index})), + ] + .into(), + Self::Cysteine => molecular_formula!(H 3 C 1 S 1).into(), + Self::Glutamine => molecular_formula!(H 6 C 3 O 1 N 1).into(), + Self::GlutamicAcid => molecular_formula!(H 5 C 3 O 2).into(), + Self::AmbiguousGlutamine => vec![ + molecular_formula!(H 6 C 3 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Glutamine, sequence_index, peptidoform_index})), + molecular_formula!(H 5 C 3 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::GlutamicAcid, sequence_index, peptidoform_index})), + ] + .into(), + Self::Glycine => molecular_formula!(H 1).into(), + Self::Histidine => molecular_formula!(H 5 C 4 N 2).into(), + Self::AmbiguousLeucine | Self::Isoleucine | Self::Leucine => { + molecular_formula!(H 9 C 4).into() + } + Self::Lysine => molecular_formula!(H 10 C 4 N 1).into(), + Self::Methionine => molecular_formula!(H 7 C 3 S 1).into(), + Self::Phenylalanine => molecular_formula!(H 7 C 7).into(), + Self::Proline => molecular_formula!(H 5 C 3).into(), + Self::Pyrrolysine => molecular_formula!(H 17 C 9 O 1 N 2).into(), + Self::Selenocysteine => molecular_formula!(H 3 C 1 Se 1).into(), + Self::Serine => molecular_formula!(H 3 C 1 O 1).into(), + Self::Threonine => molecular_formula!(H 5 C 2 O 1).into(), + Self::Tryptophan => molecular_formula!(H 8 C 9 N 1).into(), + Self::Tyrosine => molecular_formula!(H 7 C 7 O 1).into(), + Self::Valine => molecular_formula!(H 7 C 3).into(), + Self::Unknown => molecular_formula!().into(), + }) + } // TODO: Take side chain mutations into account (maybe define pyrrolysine as a mutation) - /// # Panics - /// When the sequence index is terminal. - pub(crate) fn satellite_ion_fragments( - self, + fn satellite_ion_fragments( + &self, sequence_index: SequencePosition, peptidoform_index: usize, - ) -> Multi { + ) -> Option>> { let crate::SequencePosition::Index(sequence_index) = sequence_index else { - panic!("Not allowed to call satellite ion fragments with a terminal sequence index") + return None; }; + Some(Cow::Owned( match self { Self::Alanine | Self::Glycine @@ -172,7 +314,7 @@ impl AminoAcid { ] .into(), Self::Valine => molecular_formula!(H 3 C 1).into(), // Technically two options, but both have the same mass - } + })) } /// All losses from the base immonium ions. Compiled from the sources below. @@ -251,9 +393,9 @@ impl AminoAcid { /// | | 55 | | | 55 | | 55 | | | | | 55.0548 | | | | | | 4 | 55.0548 | | 17.0263 | | H3N1 | | H3N1 | /// | | 44 | | | | | | | | | | | | | | | | 1 | 44 | | 28.0811 | | C1H2N1 | | C1H2N1 | /// | | | | | 41 | | 41 | | | | | 41.0391 | | | | | | 3 | 41.0391 | | 31.0420 | | C1H5N1 | | C1H5N1 | - fn immonium_losses(self) -> Vec { + fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]> { // TODO: For B/Z there are common immonium ions, but the mass is the same (meaning the loss is different), find a way of representing that - match self { + Cow::Owned(match self { Self::Arginine => vec![ NeutralLoss::Gain(molecular_formula!(C 2 O 2)), NeutralLoss::Loss(molecular_formula!(C 1 H 2)), @@ -321,8 +463,59 @@ impl AminoAcid { NeutralLoss::Loss(molecular_formula!(C 1 H 5 N 1)), ], _ => Vec::new(), - } + }) } +} + +impl AminoAcid { + /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X) + pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[ + Self::Glycine, + Self::Alanine, + Self::Arginine, + Self::Asparagine, + Self::AsparticAcid, + Self::Cysteine, + Self::Glutamine, + Self::GlutamicAcid, + Self::Histidine, + Self::AmbiguousLeucine, + Self::Lysine, + Self::Methionine, + Self::Phenylalanine, + Self::Proline, + Self::Serine, + Self::Threonine, + Self::Tryptophan, + Self::Tyrosine, + Self::Valine, + Self::Selenocysteine, + Self::Pyrrolysine, + ]; + + /// All 20 canonical amino acids + pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[ + Self::Glycine, + Self::Alanine, + Self::Arginine, + Self::Asparagine, + Self::AsparticAcid, + Self::Cysteine, + Self::Glutamine, + Self::GlutamicAcid, + Self::Histidine, + Self::Leucine, + Self::Isoleucine, + Self::Lysine, + Self::Methionine, + Self::Phenylalanine, + Self::Proline, + Self::Serine, + Self::Threonine, + Self::Tryptophan, + Self::Tyrosine, + Self::Valine, + ]; // TODO: generalise over used storage type, so using molecularformula, monoisotopic mass, or average mass, also make sure that AAs can return these numbers in a const fashion #[expect(clippy::too_many_lines, clippy::too_many_arguments)] @@ -383,19 +576,23 @@ impl AminoAcid { )); } if ions.d.0 && allow_terminal.0 { - base_fragments.extend(Fragment::generate_all( - &(-self.satellite_ion_fragments(sequence_index, peptidoform_index) - * modifications - * self.formulas_inner(sequence_index, peptidoform_index) - + molecular_formula!(H 1 C 1 O 1)), - peptidoform_ion_index, - peptidoform_index, - &FragmentType::d(n_pos), - n_term, - ions.d.1, - charge_carriers, - ions.d.2, - )); + if let Some(satellite_ion_fragments) = + self.satellite_ion_fragments(sequence_index, peptidoform_index) + { + base_fragments.extend(Fragment::generate_all( + &(-satellite_ion_fragments.as_ref() + * modifications + * self.formulas_inner(sequence_index, peptidoform_index) + + molecular_formula!(H 1 C 1 O 1)), + peptidoform_ion_index, + peptidoform_index, + &FragmentType::d(n_pos), + n_term, + ions.d.1, + charge_carriers, + ions.d.2, + )); + } } if ions.v.0 && allow_terminal.1 { base_fragments.extend(Fragment::generate_all( @@ -410,19 +607,23 @@ impl AminoAcid { )); } if ions.w.0 && allow_terminal.1 { - base_fragments.extend(Fragment::generate_all( - &(-self.satellite_ion_fragments(sequence_index, peptidoform_index) - * modifications - * self.formulas_inner(sequence_index, peptidoform_index) - + molecular_formula!(H 2 N 1)), - peptidoform_ion_index, - peptidoform_index, - &FragmentType::w(c_pos), - c_term, - ions.w.1, - charge_carriers, - ions.w.2, - )); + if let Some(satellite_ion_fragments) = + self.satellite_ion_fragments(sequence_index, peptidoform_index) + { + base_fragments.extend(Fragment::generate_all( + &(-satellite_ion_fragments.as_ref() + * modifications + * self.formulas_inner(sequence_index, peptidoform_index) + + molecular_formula!(H 2 N 1)), + peptidoform_ion_index, + peptidoform_index, + &FragmentType::w(c_pos), + c_term, + ions.w.1, + charge_carriers, + ions.w.2, + )); + } } if ions.x.0 && allow_terminal.1 { base_fragments.extend(Fragment::generate_all( @@ -483,7 +684,7 @@ impl AminoAcid { peptidoform_index, &FragmentType::Immonium(n_pos, self.into()), // TODO: get the actual sequenceelement here &Multi::default(), - self.immonium_losses().as_slice(), + self.immonium_losses().as_ref(), charge_carriers, ions.immonium.1, )); @@ -491,102 +692,6 @@ impl AminoAcid { base_fragments } - /// Get the single letter representation of the amino acid - pub const fn char(self) -> char { - match self { - Self::Alanine => 'A', - Self::AmbiguousAsparagine => 'B', - Self::Cysteine => 'C', - Self::AsparticAcid => 'D', - Self::GlutamicAcid => 'E', - Self::Phenylalanine => 'F', - Self::Glycine => 'G', - Self::Histidine => 'H', - Self::Isoleucine => 'I', - Self::AmbiguousLeucine => 'J', - Self::Lysine => 'K', - Self::Leucine => 'L', - Self::Methionine => 'M', - Self::Asparagine => 'N', - Self::Pyrrolysine => 'O', - Self::Proline => 'P', - Self::Glutamine => 'Q', - Self::Arginine => 'R', - Self::Serine => 'S', - Self::Threonine => 'T', - Self::Selenocysteine => 'U', - Self::Valine => 'V', - Self::Tryptophan => 'W', - Self::Unknown => 'X', - Self::Tyrosine => 'Y', - Self::AmbiguousGlutamine => 'Z', - } - } - - /// Get the 3 letter code for the amino acid - pub const fn code(self) -> &'static str { - match self { - Self::Alanine => "Ala", - Self::AmbiguousAsparagine => "Asx", - Self::Cysteine => "Cys", - Self::AsparticAcid => "Asp", - Self::GlutamicAcid => "Glu", - Self::Phenylalanine => "Phe", - Self::Glycine => "Gly", - Self::Histidine => "His", - Self::Isoleucine => "Ile", - Self::AmbiguousLeucine => "Xle", - Self::Lysine => "Lys", - Self::Leucine => "Leu", - Self::Methionine => "Met", - Self::Asparagine => "Asn", - Self::Pyrrolysine => "Pyl", - Self::Proline => "Pro", - Self::Glutamine => "Gln", - Self::Arginine => "Arg", - Self::Serine => "Ser", - Self::Threonine => "Thr", - Self::Selenocysteine => "Sec", - Self::Valine => "Val", - Self::Tryptophan => "Trp", - Self::Unknown => "Xaa", - Self::Tyrosine => "Tyr", - Self::AmbiguousGlutamine => "Glx", - } - } - - /// Get the full name for the amino acid - pub const fn name(self) -> &'static str { - match self { - Self::Alanine => "Alanine", - Self::AmbiguousAsparagine => "AmbiguousAsparagine", - Self::Cysteine => "Cysteine", - Self::AsparticAcid => "AsparticAcid", - Self::GlutamicAcid => "GlutamicAcid", - Self::Phenylalanine => "Phenylalanine", - Self::Glycine => "Glycine", - Self::Histidine => "Histidine", - Self::Isoleucine => "Isoleucine", - Self::AmbiguousLeucine => "AmbiguousLeucine", - Self::Lysine => "Lysine", - Self::Leucine => "Leucine", - Self::Methionine => "Methionine", - Self::Asparagine => "Asparagine", - Self::Pyrrolysine => "Pyrrolysine", - Self::Proline => "Proline", - Self::Glutamine => "Glutamine", - Self::Arginine => "Arginine", - Self::Serine => "Serine", - Self::Threonine => "Threonine", - Self::Selenocysteine => "Selenocysteine", - Self::Valine => "Valine", - Self::Tryptophan => "Tryptophan", - Self::Unknown => "Unknown", - Self::Tyrosine => "Tyrosine", - Self::AmbiguousGlutamine => "AmbiguousGlutamine", - } - } - /// Check if two amino acids are considered identical. X is identical to anything, J to IL, B to ND, Z to EQ. pub(crate) fn canonical_identical(self, rhs: Self) -> bool { match (self, rhs) { @@ -604,12 +709,6 @@ impl AminoAcid { } } -impl std::fmt::Display for AminoAcid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.char()) - } -} - #[cfg(test)] #[expect(clippy::unreadable_literal, clippy::missing_panics_doc)] mod tests { @@ -666,7 +765,7 @@ mod tests { ); println!( "{}: {} {} {} {}", - aa.char(), + aa.pro_forma_definition(), mono, mono_mass, weight, diff --git a/rustyms/src/checked_aminoacid.rs b/rustyms/src/checked_aminoacid.rs index f709c828..5d8036d1 100644 --- a/rustyms/src/checked_aminoacid.rs +++ b/rustyms/src/checked_aminoacid.rs @@ -3,7 +3,8 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ - AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, UnAmbiguous, + aminoacids::IsAminoAcid, AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, + SemiAmbiguous, UnAmbiguous, }; /// A checked amino acid. This wraps an [`AminoAcid`] to keep track of the maximal complexity of @@ -279,24 +280,48 @@ impl CheckedAminoAcid { self.aminoacid.canonical_identical(rhs.aminoacid) } - /// Get the description of the amino acid as a single character - pub const fn char(self) -> char { - self.aminoacid.char() + /// Get the underlying (unchecked) amino acid + pub const fn aminoacid(self) -> AminoAcid { + self.aminoacid } +} - /// Get the 3 letter code for the amino acid - pub const fn code(self) -> &'static str { - self.aminoacid.code() +impl IsAminoAcid for CheckedAminoAcid { + fn name(&self) -> std::borrow::Cow<'_, str> { + self.aminoacid.name() } - /// Get the full name of the amino acid - pub const fn name(self) -> &'static str { - self.aminoacid.name() + fn three_letter_code(&self) -> Option> { + self.aminoacid.three_letter_code() } - /// Get the underlying (unchecked) amino acid - pub const fn aminoacid(self) -> AminoAcid { + fn one_letter_code(&self) -> Option { + self.aminoacid.one_letter_code() + } + + fn pro_forma_definition(&self) -> std::borrow::Cow<'_, str> { + self.aminoacid.pro_forma_definition() + } + + fn immonium_losses(&self) -> std::borrow::Cow<'_, [crate::NeutralLoss]> { + self.aminoacid.immonium_losses() + } + + fn satellite_ion_fragments( + &self, + sequence_index: crate::SequencePosition, + peptidoform_index: usize, + ) -> Option>> { self.aminoacid + .satellite_ion_fragments(sequence_index, peptidoform_index) + } + + fn side_chain( + &self, + sequence_index: crate::SequencePosition, + peptidoform_index: usize, + ) -> std::borrow::Cow<'_, Multi> { + self.aminoacid.side_chain(sequence_index, peptidoform_index) } } @@ -398,7 +423,7 @@ impl Default for CheckedAminoAcid { impl std::fmt::Display for CheckedAminoAcid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.char()) + write!(f, "{}", self.pro_forma_definition()) } } diff --git a/rustyms/src/fragment.rs b/rustyms/src/fragment.rs index b93b362e..6e63bf65 100644 --- a/rustyms/src/fragment.rs +++ b/rustyms/src/fragment.rs @@ -481,9 +481,7 @@ impl FragmentType { Self::z·(_) => Cow::Borrowed("z·"), Self::B(_) => Cow::Borrowed("B"), Self::Y(_) | Self::YComposition(_, _) => Cow::Borrowed("Y"), - Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => { - Cow::Owned(format!("d{}", aa.char())) - } + Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => Cow::Owned(format!("d{aa}")), Self::Diagnostic(DiagnosticPosition::Reporter) => Cow::Borrowed("r"), Self::Diagnostic(DiagnosticPosition::Labile(m)) => Cow::Owned(format!("d{m}")), Self::Diagnostic( @@ -491,8 +489,8 @@ impl FragmentType { | DiagnosticPosition::GlycanCompositional(sug, _), ) => Cow::Owned(format!("d{sug}")), Self::Oxonium(_) | Self::OxoniumComposition(_, _) => Cow::Borrowed("oxonium"), - Self::Immonium(_, aa) => Cow::Owned(format!("i{}", aa.aminoacid.char())), - Self::PrecursorSideChainLoss(_, aa) => Cow::Owned(format!("p-s{}", aa.char())), + Self::Immonium(_, aa) => Cow::Owned(format!("i{}", aa.aminoacid)), + Self::PrecursorSideChainLoss(_, aa) => Cow::Owned(format!("p-s{aa}")), Self::Precursor => Cow::Borrowed("p"), Self::Internal(fragmentation, _, _) => Cow::Owned(format!( "m{}", diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs index 96d410f9..ce3a09e5 100644 --- a/rustyms/src/lib.rs +++ b/rustyms/src/lib.rs @@ -85,7 +85,7 @@ pub use crate::sequence_element::SequenceElement; pub use crate::sequence_position::*; pub use crate::spectrum::{AnnotatableSpectrum, AnnotatedSpectrum, RawSpectrum}; pub use crate::tolerance::*; -pub use aminoacids::AminoAcid; +pub use aminoacids::{AminoAcid, IsAminoAcid}; pub use checked_aminoacid::CheckedAminoAcid; pub use fragment::Fragment; pub use peptidoform::{CompoundPeptidoformIon, Peptidoform, PeptidoformIon}; diff --git a/rustyms/src/sequence_element.rs b/rustyms/src/sequence_element.rs index e53ce32a..9c843973 100644 --- a/rustyms/src/sequence_element.rs +++ b/rustyms/src/sequence_element.rs @@ -117,7 +117,7 @@ impl SequenceElement { if self.ambiguous.is_some() && last_ambiguous != self.ambiguous { write!(f, "(?")?; } - write!(f, "{}", self.aminoacid.char())?; + write!(f, "{}", self.aminoacid)?; for m in &self.modifications { let mut display_ambiguous = false; if let Modification::Ambiguous { id, .. } = m {