Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion psm_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Common utilities for parsing and handling PSMs, and search engine results."""

__version__ = "1.5.0.post1"
__version__ = "1.5.1"
__all__ = ["Peptidoform", "PSM", "PSMList"]

from warnings import filterwarnings
Expand Down
10 changes: 9 additions & 1 deletion psm_utils/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,16 @@ def _supports_write_psm(writer: type[WriterBase]) -> bool:
temp_file.close()
Path(temp_file.name).unlink()
example_psm = PSM(peptidoform="ACDE", spectrum_id="0")

# Prepare writer-specific kwargs for writers that need them
writer_kwargs = {}
if writer == percolator.PercolatorTabWriter:
writer_kwargs["style"] = "pin"

try:
with writer(temp_file.name, example_psm=example_psm) as writer_instance:
with writer(
temp_file.name, example_psm=example_psm, **writer_kwargs
) as writer_instance:
writer_instance.write_psm(example_psm)
except NotImplementedError:
supports_write_psm = False
Expand Down
38 changes: 31 additions & 7 deletions psm_utils/io/idxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@
import pyopenms as oms # type: ignore[import]

_has_openms = True
# Check if we have pyOpenMS 3.5+ with PeptideIdentificationList
_has_peptide_id_list = hasattr(oms, "PeptideIdentificationList")
except ImportError:
_has_openms = False
_has_peptide_id_list = False
oms = None # type: ignore[assignment]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -157,8 +160,17 @@ def _parse_idxml(self) -> tuple[Any, Any]:

"""
protein_ids: Any = [] # list[oms.ProteinIdentification]
peptide_ids: Any = [] # list[oms.PeptideIdentification]
oms.IdXMLFile().load(str(self.filename), protein_ids, peptide_ids) # type: ignore
# In pyOpenMS 3.5+, peptide_ids must be a PeptideIdentificationList
if _has_peptide_id_list:
peptide_ids: Any = oms.PeptideIdentificationList() # type: ignore
else:
peptide_ids = [] # list[oms.PeptideIdentification] for pyOpenMS <3.5

# Load the idXML file - the lists will be populated by pyOpenMS
idxml_file = oms.IdXMLFile() # type: ignore
# Ensure filename is a string, not a Path object
filename_str: str = str(self.filename)
idxml_file.load(filename_str, protein_ids, peptide_ids)

if len(protein_ids) == 0:
raise IdXMLReaderEmptyListException(
Expand Down Expand Up @@ -564,7 +576,10 @@ def _update_existing_ids(

peptide_id.setHits(updated_peptide_hits)

oms.IdXMLFile().store(str(self.filename), self.protein_ids, self.peptide_ids) # type: ignore
# Store the idXML file
idxml_file = oms.IdXMLFile() # type: ignore
filename_str: str = str(self.filename)
idxml_file.store(filename_str, self.protein_ids, self.peptide_ids)

def _update_peptide_hit(self, peptide_hit: Any, psm: PSM) -> None:
"""Inplace update of PeptideHit with novel predicted features information from PSM."""
Expand Down Expand Up @@ -594,7 +609,11 @@ def _create_ids_for_collection(
) -> None:
"""Create ProteinIdentification and PeptideIdentification objects for a single collection."""
self.protein_ids = [oms.ProteinIdentification()] # type: ignore
self.peptide_ids = []
# In pyOpenMS 3.5+, peptide_ids must be a PeptideIdentificationList
if _has_peptide_id_list:
self.peptide_ids = oms.PeptideIdentificationList() # type: ignore
else:
self.peptide_ids = [] # list[oms.PeptideIdentification] for pyOpenMS <3.5

# Set msrun filename with spectra_data meta value
msrun_reference = [str(run).encode() for run in runs.keys()]
Expand All @@ -617,14 +636,19 @@ def _create_ids_for_collection(
# Create PeptideHits
peptide_hits = [self._create_peptide_hit(psm) for psm in psms]
peptide_id.setHits(peptide_hits)
self.peptide_ids.append(peptide_id)
# Use push_back for pyOpenMS 3.5+, append for older versions
if _has_peptide_id_list:
self.peptide_ids.push_back(peptide_id) # type: ignore
else:
self.peptide_ids.append(peptide_id) # type: ignore[union-attr]

# Create protein hits
self._create_protein_hits(protein_list)

# Write idXML file
filename = "/".join(filter(None, [collection, str(self.filename)]))
oms.IdXMLFile().store(filename, self.protein_ids, self.peptide_ids) # type: ignore
filename: str = "/".join(filter(None, [collection, str(self.filename)]))
idxml_file = oms.IdXMLFile() # type: ignore
idxml_file.store(filename, self.protein_ids, self.peptide_ids) # type: ignore

def _create_peptide_identification(
self,
Expand Down
3 changes: 2 additions & 1 deletion psm_utils/io/peptide_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,8 @@ def from_dataframe(peprec_df: pd.DataFrame) -> PSMList:
"""
psm_list = []
for _, row in peprec_df.iterrows():
entry = _PeprecEntry(**row.to_dict())
row_dict = {str(k): v for k, v in row.to_dict().items()}
entry = _PeprecEntry(**row_dict)
psm_list.append(PeptideRecordReader._entry_to_psm(entry, filename=""))
return PSMList(psm_list=psm_list)

Expand Down
46 changes: 28 additions & 18 deletions psm_utils/io/pepxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
"mzFidelity",
]

KNOWN_METADATA_KEYS = [
"num_matched_ions",
"tot_num_ions",
"num_missed_cleavages",
]


class PepXMLReader(ReaderBase):
"""Reader for pepXML PSM files."""
Expand Down Expand Up @@ -127,47 +133,51 @@ def _parse_peptidoform(

def _parse_psm(self, spectrum_query: dict[str, Any], search_hit: dict[str, Any]) -> PSM:
"""Parse pepXML PSM to PSM."""
metadata = {
"num_matched_ions": str(search_hit["num_matched_ions"]),
"tot_num_ions": str(search_hit["tot_num_ions"]),
"num_missed_cleavages": str(search_hit["num_missed_cleavages"]),
}
# Build metadata from optional search hit fields
metadata = {key: str(search_hit[key]) for key in KNOWN_METADATA_KEYS if key in search_hit}

# Add all search scores to metadata
metadata.update(
{
f"search_score_{key.lower()}": str(search_hit["search_score"][key])
for key in search_hit["search_score"]
f"search_score_{key.lower()}": str(value)
for key, value in search_hit["search_score"].items()
}
)

# Build provenance data from optional spectrum query fields
provenance_data = {
k: str(v)
for k, v in {
"pepxml_index": spectrum_query.get("index"),
"start_scan": spectrum_query.get("start_scan"),
"end_scan": spectrum_query.get("end_scan"),
}.items()
if v is not None
}

return PSM(
peptidoform=self._parse_peptidoform(
search_hit["peptide"],
search_hit["modifications"],
spectrum_query["assumed_charge"],
),
spectrum_id=spectrum_query["spectrumNativeID"]
if "spectrumNativeID" in spectrum_query
else spectrum_query["spectrum"],
spectrum_id=spectrum_query.get("spectrumNativeID", spectrum_query.get("spectrum")),
run=None,
collection=None,
spectrum=None,
is_decoy=None,
score=search_hit["search_score"][self.score_key],
score=search_hit["search_score"].get(self.score_key, None),
qvalue=None,
pep=None,
precursor_mz=mass_to_mz(
spectrum_query["precursor_neutral_mass"], spectrum_query["assumed_charge"]
),
retention_time=spectrum_query.get("retention_time_sec"),
ion_mobility=spectrum_query.get("ion_mobility"),
protein_list=[p["protein"] for p in search_hit["proteins"]],
rank=search_hit["hit_rank"],
protein_list=[p["protein"] for p in search_hit.get("proteins", [])],
rank=search_hit.get("hit_rank", None),
source=None,
provenance_data={
"pepxml_index": str(spectrum_query["index"]),
"start_scan": str(spectrum_query["start_scan"]),
"end_scan": str(spectrum_query["end_scan"]),
},
provenance_data=provenance_data,
metadata=metadata,
rescoring_features={},
)
6 changes: 4 additions & 2 deletions psm_utils/io/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,10 @@ def write_file(self, psm_list: PSMList) -> None:
f, fieldnames=self._columns, delimiter="\t", extrasaction="ignore"
)
writer.writeheader()
for psm in psm_list:
writer.writerow(self._psm_to_entry(psm))
for i, psm in enumerate(psm_list):
entry = self._psm_to_entry(psm)
entry["ScanNr"] = i
writer.writerow(entry)

def _psm_to_entry(self, psm: PSM) -> dict[str, Any]:
"""Parse PSM to Percolator Tab entry."""
Expand Down
Loading