From bf693f344b06b7902de3662758424ac1568d2587 Mon Sep 17 00:00:00 2001 From: AderikVoorspoels Date: Tue, 29 Jul 2025 14:44:20 +0100 Subject: [PATCH 1/6] added BASEPAIRING option for ato;type used to determine contacts --- conkit/io/pdb.py | 14 ++++++++++++++ conkit/plot/modelvalidation.py | 8 ++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/conkit/io/pdb.py b/conkit/io/pdb.py index 7799c472..5c5eb42b 100644 --- a/conkit/io/pdb.py +++ b/conkit/io/pdb.py @@ -112,6 +112,20 @@ def _chain_contacts(self, chain1, chain2): def _remove_atom(self, chain, type): """Tidy up a chain removing all HETATM entries""" + + if type == 'BASEPAIRING': + #handle special request for contacts/distances based on basepairing atoms in NA rather than backbone atoms + for residue in chain.copy(): + for atom in residue.copy(): + if atom.is_disordered(): + chain[residue.id].detach_child(atom.id) + elif atom.id == 'N1' and residue.resname in ['A', 'G', 'DA', 'DG']: + continue + elif atom.id == 'N9' and residue.resname in ['C', 'T', 'U' 'DC', 'DT','DU']: + continue + else: + chain[residue.id].detach_child(atom.id) + for residue in chain.copy(): for atom in residue.copy(): if atom.is_disordered(): diff --git a/conkit/plot/modelvalidation.py b/conkit/plot/modelvalidation.py index 1bbc4d0b..3397208a 100644 --- a/conkit/plot/modelvalidation.py +++ b/conkit/plot/modelvalidation.py @@ -303,7 +303,7 @@ def _parse_data(self, predicted_dict, *metrics): self.data['SCORE'] = 0 self.data['CONTACTS'] = 0 self.data['PLDDT'] = 0 - self.data['Q_IN_ERROR'] = '' + self.data['Q_IN_ERROR'] = '' @@ -428,9 +428,13 @@ def Run_gesamt_filter(self, experimentfile, predictionfile, gesamt_exe, moltype= chain_experiment = chain.get_id() for region in flagged_regions: + print(f'running gesamt on region {region}') Q_region = tools.Gesamt_Q_score(predictionfile,experimentfile,region,gesamt_exe=gesamt_exe, chain_experiment = chain_experiment, chain_prediction = 'A', moltype=moltype) self.data.loc[ (self.data['RESNUM'] <= region[1]) & (self.data['RESNUM'] >= region[0]), 'Q_IN_ERROR'] = Q_region + + Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict() + print(Qs) return 0 @@ -487,7 +491,7 @@ def draw(self,RUN_SVM=True,RUN_MAP_ALIGN=True,RUN_FILTERS=True,n_contacts_per_re if 'Q_IN_ERROR' in self.data.columns: Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict() - + print(Qs) color_scheme = tools.ColorDefinitions.Q_COLORS thresholds = list(color_scheme.keys()) thresholds.sort(reverse=True) From ccabd26994ae1658dd262c3c5b549242edacdab4 Mon Sep 17 00:00:00 2001 From: AderikVoorspoels Date: Tue, 29 Jul 2025 14:47:19 +0100 Subject: [PATCH 2/6] re removed printstatments that snuck in during rebasing --- conkit/plot/modelvalidation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/conkit/plot/modelvalidation.py b/conkit/plot/modelvalidation.py index 3397208a..7c4fb143 100644 --- a/conkit/plot/modelvalidation.py +++ b/conkit/plot/modelvalidation.py @@ -428,13 +428,11 @@ def Run_gesamt_filter(self, experimentfile, predictionfile, gesamt_exe, moltype= chain_experiment = chain.get_id() for region in flagged_regions: - print(f'running gesamt on region {region}') + Q_region = tools.Gesamt_Q_score(predictionfile,experimentfile,region,gesamt_exe=gesamt_exe, chain_experiment = chain_experiment, chain_prediction = 'A', moltype=moltype) self.data.loc[ (self.data['RESNUM'] <= region[1]) & (self.data['RESNUM'] >= region[0]), 'Q_IN_ERROR'] = Q_region - Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict() - print(Qs) return 0 @@ -491,7 +489,6 @@ def draw(self,RUN_SVM=True,RUN_MAP_ALIGN=True,RUN_FILTERS=True,n_contacts_per_re if 'Q_IN_ERROR' in self.data.columns: Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict() - print(Qs) color_scheme = tools.ColorDefinitions.Q_COLORS thresholds = list(color_scheme.keys()) thresholds.sort(reverse=True) From 99a9102b45492e277be626ff68732204073c72ba Mon Sep 17 00:00:00 2001 From: AderikVoorspoels Date: Tue, 29 Jul 2025 15:52:04 +0100 Subject: [PATCH 3/6] fixed bugs introduced by adding basepiring thing --- conkit/command_line/conkit_validate.py | 2 +- conkit/io/pdb.py | 26 +++++++++++++------------- conkit/plot/modelvalidation.py | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/conkit/command_line/conkit_validate.py b/conkit/command_line/conkit_validate.py index 83eb201a..db33e67b 100644 --- a/conkit/command_line/conkit_validate.py +++ b/conkit/command_line/conkit_validate.py @@ -320,7 +320,7 @@ def main(): if args.RUN_FILTERS=='yes': logger.info(os.linesep + "Running Filters.") - validation.count_contacts() + validation.count_contacts(cutoff=cutoff) if (prediction.plddt != None) and (args.PLDDT_IN_DISTFILE == 'yes'): ##turn into check for plddt diff --git a/conkit/io/pdb.py b/conkit/io/pdb.py index 5c5eb42b..57e49d25 100644 --- a/conkit/io/pdb.py +++ b/conkit/io/pdb.py @@ -71,6 +71,7 @@ def _build_plddts(self, chain): for residue in chain: for atom in residue.get_atoms(): plddts[residue.get_id()[1]] = atom.get_bfactor() + return plddts @@ -119,21 +120,20 @@ def _remove_atom(self, chain, type): for atom in residue.copy(): if atom.is_disordered(): chain[residue.id].detach_child(atom.id) - elif atom.id == 'N1' and residue.resname in ['A', 'G', 'DA', 'DG']: - continue - elif atom.id == 'N9' and residue.resname in ['C', 'T', 'U' 'DC', 'DT','DU']: - continue else: + atom_needed = (atom.id == 'N1' and residue.resname in ['A', 'G', 'DA', 'DG']) + atom_needed = atom_needed or (atom.id == 'N3' and residue.resname in ['C', 'T', 'U', 'DC', 'DT','DU']) + if not atom_needed: + chain[residue.id].detach_child(atom.id) + else: + for residue in chain.copy(): + for atom in residue.copy(): + if atom.is_disordered(): + chain[residue.id].detach_child(atom.id) + elif residue.resname == "GLY" and type == "CB" and atom.id == "CA": + continue + elif atom.id != type: chain[residue.id].detach_child(atom.id) - - for residue in chain.copy(): - for atom in residue.copy(): - if atom.is_disordered(): - chain[residue.id].detach_child(atom.id) - elif residue.resname == "GLY" and type == "CB" and atom.id == "CA": - continue - elif atom.id != type: - chain[residue.id].detach_child(atom.id) def _remove_hetatm(self, chain): """Tidy up a chain removing all HETATM entries""" diff --git a/conkit/plot/modelvalidation.py b/conkit/plot/modelvalidation.py index 7c4fb143..5b0bd642 100644 --- a/conkit/plot/modelvalidation.py +++ b/conkit/plot/modelvalidation.py @@ -383,9 +383,9 @@ def map_align(self,map_align_exe=None): else: self.data['MISALIGNED'] = False - def count_contacts(self): + def count_contacts(self,cutoff): - cmap = self.prediction.as_contactmap() + cmap = self.prediction.as_contactmap(distance_cutoff=cutoff) cmap_dict = cmap.as_dict() self.data['CONTACTS'] = self.data['RESNUM'].apply(lambda x: len(cmap_dict[int(x)])) From 89af5b9963789a82644740bcd297e88e4d5b1da6 Mon Sep 17 00:00:00 2001 From: AderikVoorspoels Date: Fri, 1 Aug 2025 11:30:03 +0100 Subject: [PATCH 4/6] changed default behavior to not run svm if provided prredicted contacts are derived from a structure --- conkit/io/pdb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/conkit/io/pdb.py b/conkit/io/pdb.py index 57e49d25..ca2965d7 100644 --- a/conkit/io/pdb.py +++ b/conkit/io/pdb.py @@ -116,6 +116,7 @@ def _remove_atom(self, chain, type): if type == 'BASEPAIRING': #handle special request for contacts/distances based on basepairing atoms in NA rather than backbone atoms + #this could be improved to handle hoogsteen pairs for residue in chain.copy(): for atom in residue.copy(): if atom.is_disordered(): From 4f040af84df6eb8759822a0c355b55cfdd0b89c3 Mon Sep 17 00:00:00 2001 From: AderikVoorspoels Date: Fri, 1 Aug 2025 11:30:43 +0100 Subject: [PATCH 5/6] changed default behavior to not run svm if provided prredicted contacts are derived from a structure --- conkit/command_line/conkit_validate.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/conkit/command_line/conkit_validate.py b/conkit/command_line/conkit_validate.py index db33e67b..b4ea6928 100644 --- a/conkit/command_line/conkit_validate.py +++ b/conkit/command_line/conkit_validate.py @@ -94,7 +94,7 @@ def create_argument_parser(): help="Number of iterations") parser.add_argument("--moltype", dest="moltype", default="Protein", type=str, help="Type of molecule") - parser.add_argument("--run_svm", dest="RUN_SVM", default='yes', type=str, + parser.add_argument("--run_svm", dest="RUN_SVM", default='yes if prediction not pdb or mmcif', type=str, help="Whether to run the support vector machine validation") parser.add_argument("--run_map_align", dest="RUN_MAP_ALIGN", default='yes', type=str, help="Whether to run the contactmap alignment validation") @@ -300,6 +300,12 @@ def main(): validation = conkit.plot.ModelValidationFigure(model, prediction, sequence) + if args.RUN_SVM=='yes if prediction not pdb or mmcif': #don't run the svm if prediction is a structure by default + if args.distformat in ['pdb', 'mmcif']: + args.RUN_SVM='no' + else: + args.RUN_SVM='yes' + if args.RUN_SVM=='yes': logger.info(os.linesep + "Running Support Vector Machine.") From 81624272a3355a982a7d4942f2de9667773fd710 Mon Sep 17 00:00:00 2001 From: AderikVoorspoels Date: Fri, 1 Aug 2025 15:43:06 +0100 Subject: [PATCH 6/6] making sure whole sequence file is accesible in commandline tool, not just top, important change to hopefully make chain selection and multi-chain validation possible --- conkit/command_line/conkit_validate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conkit/command_line/conkit_validate.py b/conkit/command_line/conkit_validate.py index b4ea6928..71b4e63c 100644 --- a/conkit/command_line/conkit_validate.py +++ b/conkit/command_line/conkit_validate.py @@ -275,7 +275,8 @@ def main(): logger.info(os.linesep + "Working directory: %s", os.getcwd()) logger.info("Reading input sequence: %s", args.seqfile) - sequence = conkit.io.read(args.seqfile, args.seqformat).top + sequencefile = conkit.io.read(args.seqfile, args.seqformat) + sequence = sequencefile.top if len(sequence) < 5: raise ValueError('Cannot validate model with less than 5 residues') @@ -290,6 +291,7 @@ def main(): else: prediction_file = conkit.io.read(args.distfile, args.distformat) prediction = prediction_file.top + logger.info("Reading input PDB model: %s", args.pdbfile) model = conkit.io.read(args.pdbfile, args.pdbformat, distance_cutoff=cutoff, atom_type=rep_atom).top