From bf693f344b06b7902de3662758424ac1568d2587 Mon Sep 17 00:00:00 2001
From: AderikVoorspoels <aderik.voorspoels@liverpool.ac.uk>
Date: Tue, 29 Jul 2025 14:44:20 +0100
Subject: [PATCH 1/6] added BASEPAIRING option for ato;type used to determine
 contacts

---
 conkit/io/pdb.py               | 14 ++++++++++++++
 conkit/plot/modelvalidation.py |  8 ++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/conkit/io/pdb.py b/conkit/io/pdb.py
index 7799c472..5c5eb42b 100644
--- a/conkit/io/pdb.py
+++ b/conkit/io/pdb.py
@@ -112,6 +112,20 @@ def _chain_contacts(self, chain1, chain2):
 
     def _remove_atom(self, chain, type):
         """Tidy up a chain removing all HETATM entries"""
+
+        if type == 'BASEPAIRING':
+            #handle special request for contacts/distances based on basepairing atoms in NA rather than backbone atoms
+            for residue in chain.copy():
+                for atom in residue.copy():
+                    if atom.is_disordered():
+                        chain[residue.id].detach_child(atom.id)
+                    elif atom.id == 'N1' and residue.resname in ['A', 'G', 'DA', 'DG']:
+                        continue
+                    elif atom.id == 'N9' and residue.resname in ['C', 'T', 'U' 'DC', 'DT','DU']:
+                        continue
+                    else:
+                        chain[residue.id].detach_child(atom.id)
+
         for residue in chain.copy():
             for atom in residue.copy():
                 if atom.is_disordered():
diff --git a/conkit/plot/modelvalidation.py b/conkit/plot/modelvalidation.py
index 1bbc4d0b..3397208a 100644
--- a/conkit/plot/modelvalidation.py
+++ b/conkit/plot/modelvalidation.py
@@ -303,7 +303,7 @@ def _parse_data(self, predicted_dict, *metrics):
         self.data['SCORE'] = 0
         self.data['CONTACTS'] = 0        
         self.data['PLDDT'] = 0
-        self.data['Q_IN_ERROR'] = ''
+        self.data['Q_IN_ERROR'] = ''  
 
 
 
@@ -428,9 +428,13 @@ def Run_gesamt_filter(self, experimentfile, predictionfile, gesamt_exe, moltype=
             chain_experiment = chain.get_id()
 
         for region in flagged_regions:
+            print(f'running gesamt on region {region}')
             Q_region = tools.Gesamt_Q_score(predictionfile,experimentfile,region,gesamt_exe=gesamt_exe, chain_experiment = chain_experiment, chain_prediction = 'A', moltype=moltype)
             self.data.loc[ (self.data['RESNUM'] <= region[1]) & (self.data['RESNUM'] >= region[0]), 'Q_IN_ERROR'] = Q_region
         
+
+        Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict()
+        print(Qs)
         return 0
 
 
@@ -487,7 +491,7 @@ def draw(self,RUN_SVM=True,RUN_MAP_ALIGN=True,RUN_FILTERS=True,n_contacts_per_re
 
             if 'Q_IN_ERROR' in self.data.columns:
                 Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict()
-
+                print(Qs)
                 color_scheme = tools.ColorDefinitions.Q_COLORS
                 thresholds = list(color_scheme.keys())
                 thresholds.sort(reverse=True)

From ccabd26994ae1658dd262c3c5b549242edacdab4 Mon Sep 17 00:00:00 2001
From: AderikVoorspoels <aderik.voorspoels@liverpool.ac.uk>
Date: Tue, 29 Jul 2025 14:47:19 +0100
Subject: [PATCH 2/6] re removed printstatments that snuck in during rebasing

---
 conkit/plot/modelvalidation.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/conkit/plot/modelvalidation.py b/conkit/plot/modelvalidation.py
index 3397208a..7c4fb143 100644
--- a/conkit/plot/modelvalidation.py
+++ b/conkit/plot/modelvalidation.py
@@ -428,13 +428,11 @@ def Run_gesamt_filter(self, experimentfile, predictionfile, gesamt_exe, moltype=
             chain_experiment = chain.get_id()
 
         for region in flagged_regions:
-            print(f'running gesamt on region {region}')
+
             Q_region = tools.Gesamt_Q_score(predictionfile,experimentfile,region,gesamt_exe=gesamt_exe, chain_experiment = chain_experiment, chain_prediction = 'A', moltype=moltype)
             self.data.loc[ (self.data['RESNUM'] <= region[1]) & (self.data['RESNUM'] >= region[0]), 'Q_IN_ERROR'] = Q_region
         
 
-        Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict()
-        print(Qs)
         return 0
 
 
@@ -491,7 +489,6 @@ def draw(self,RUN_SVM=True,RUN_MAP_ALIGN=True,RUN_FILTERS=True,n_contacts_per_re
 
             if 'Q_IN_ERROR' in self.data.columns:
                 Qs = self.data.set_index('RESNUM')['Q_IN_ERROR'].to_dict()
-                print(Qs)
                 color_scheme = tools.ColorDefinitions.Q_COLORS
                 thresholds = list(color_scheme.keys())
                 thresholds.sort(reverse=True)

From 99a9102b45492e277be626ff68732204073c72ba Mon Sep 17 00:00:00 2001
From: AderikVoorspoels <aderik.voorspoels@liverpool.ac.uk>
Date: Tue, 29 Jul 2025 15:52:04 +0100
Subject: [PATCH 3/6] fixed bugs introduced by adding basepiring thing

---
 conkit/command_line/conkit_validate.py |  2 +-
 conkit/io/pdb.py                       | 26 +++++++++++++-------------
 conkit/plot/modelvalidation.py         |  4 ++--
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/conkit/command_line/conkit_validate.py b/conkit/command_line/conkit_validate.py
index 83eb201a..db33e67b 100644
--- a/conkit/command_line/conkit_validate.py
+++ b/conkit/command_line/conkit_validate.py
@@ -320,7 +320,7 @@ def main():
     if args.RUN_FILTERS=='yes':
         logger.info(os.linesep + "Running Filters.")
 
-        validation.count_contacts()
+        validation.count_contacts(cutoff=cutoff)
 
         if (prediction.plddt != None) and (args.PLDDT_IN_DISTFILE == 'yes'): ##turn into check for plddt
 
diff --git a/conkit/io/pdb.py b/conkit/io/pdb.py
index 5c5eb42b..57e49d25 100644
--- a/conkit/io/pdb.py
+++ b/conkit/io/pdb.py
@@ -71,6 +71,7 @@ def _build_plddts(self, chain):
         for residue in chain:
             for atom in residue.get_atoms():
                 plddts[residue.get_id()[1]] = atom.get_bfactor()
+
         return plddts
 
 
@@ -119,21 +120,20 @@ def _remove_atom(self, chain, type):
                 for atom in residue.copy():
                     if atom.is_disordered():
                         chain[residue.id].detach_child(atom.id)
-                    elif atom.id == 'N1' and residue.resname in ['A', 'G', 'DA', 'DG']:
-                        continue
-                    elif atom.id == 'N9' and residue.resname in ['C', 'T', 'U' 'DC', 'DT','DU']:
-                        continue
                     else:
+                        atom_needed = (atom.id == 'N1' and residue.resname in ['A', 'G', 'DA', 'DG'])
+                        atom_needed = atom_needed or (atom.id == 'N3' and residue.resname in ['C', 'T', 'U', 'DC', 'DT','DU'])
+                        if not atom_needed:
+                            chain[residue.id].detach_child(atom.id)
+        else:
+            for residue in chain.copy():
+                for atom in residue.copy():
+                    if atom.is_disordered():
+                        chain[residue.id].detach_child(atom.id)
+                    elif residue.resname == "GLY" and type == "CB" and atom.id == "CA":
+                        continue
+                    elif atom.id != type:
                         chain[residue.id].detach_child(atom.id)
-
-        for residue in chain.copy():
-            for atom in residue.copy():
-                if atom.is_disordered():
-                    chain[residue.id].detach_child(atom.id)
-                elif residue.resname == "GLY" and type == "CB" and atom.id == "CA":
-                    continue
-                elif atom.id != type:
-                    chain[residue.id].detach_child(atom.id)
 
     def _remove_hetatm(self, chain):
         """Tidy up a chain removing all HETATM entries"""
diff --git a/conkit/plot/modelvalidation.py b/conkit/plot/modelvalidation.py
index 7c4fb143..5b0bd642 100644
--- a/conkit/plot/modelvalidation.py
+++ b/conkit/plot/modelvalidation.py
@@ -383,9 +383,9 @@ def map_align(self,map_align_exe=None):
         else:
             self.data['MISALIGNED'] = False
 
-    def count_contacts(self):
+    def count_contacts(self,cutoff):
 
-        cmap = self.prediction.as_contactmap()
+        cmap = self.prediction.as_contactmap(distance_cutoff=cutoff)
         cmap_dict = cmap.as_dict()
         self.data['CONTACTS'] = self.data['RESNUM'].apply(lambda x: len(cmap_dict[int(x)]))
 

From 89af5b9963789a82644740bcd297e88e4d5b1da6 Mon Sep 17 00:00:00 2001
From: AderikVoorspoels <aderik.voorspoels@liverpool.ac.uk>
Date: Fri, 1 Aug 2025 11:30:03 +0100
Subject: [PATCH 4/6] changed default behavior to not run svm if provided
 prredicted contacts are derived from a structure

---
 conkit/io/pdb.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conkit/io/pdb.py b/conkit/io/pdb.py
index 57e49d25..ca2965d7 100644
--- a/conkit/io/pdb.py
+++ b/conkit/io/pdb.py
@@ -116,6 +116,7 @@ def _remove_atom(self, chain, type):
 
         if type == 'BASEPAIRING':
             #handle special request for contacts/distances based on basepairing atoms in NA rather than backbone atoms
+            #this could be improved to handle hoogsteen pairs
             for residue in chain.copy():
                 for atom in residue.copy():
                     if atom.is_disordered():

From 4f040af84df6eb8759822a0c355b55cfdd0b89c3 Mon Sep 17 00:00:00 2001
From: AderikVoorspoels <aderik.voorspoels@liverpool.ac.uk>
Date: Fri, 1 Aug 2025 11:30:43 +0100
Subject: [PATCH 5/6] changed default behavior to not run svm if provided
 prredicted contacts are derived from a structure

---
 conkit/command_line/conkit_validate.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/conkit/command_line/conkit_validate.py b/conkit/command_line/conkit_validate.py
index db33e67b..b4ea6928 100644
--- a/conkit/command_line/conkit_validate.py
+++ b/conkit/command_line/conkit_validate.py
@@ -94,7 +94,7 @@ def create_argument_parser():
                         help="Number of iterations")
     parser.add_argument("--moltype", dest="moltype", default="Protein", type=str,
                         help="Type of molecule")
-    parser.add_argument("--run_svm", dest="RUN_SVM", default='yes', type=str,
+    parser.add_argument("--run_svm", dest="RUN_SVM", default='yes if prediction not pdb or mmcif', type=str,
                         help="Whether to run the support vector machine validation")
     parser.add_argument("--run_map_align", dest="RUN_MAP_ALIGN", default='yes', type=str,
                         help="Whether to run the contactmap alignment validation")
@@ -300,6 +300,12 @@ def main():
 
     validation = conkit.plot.ModelValidationFigure(model, prediction, sequence)
 
+    if args.RUN_SVM=='yes if prediction not pdb or mmcif': #don't run the svm if prediction is a structure by default
+        if args.distformat in ['pdb', 'mmcif']:
+            args.RUN_SVM='no'
+        else:
+            args.RUN_SVM='yes'
+
     if args.RUN_SVM=='yes':
         logger.info(os.linesep + "Running Support Vector Machine.")
 

From 81624272a3355a982a7d4942f2de9667773fd710 Mon Sep 17 00:00:00 2001
From: AderikVoorspoels <aderik.voorspoels@liverpool.ac.uk>
Date: Fri, 1 Aug 2025 15:43:06 +0100
Subject: [PATCH 6/6] making sure whole sequence file is accesible in
 commandline tool, not just top, important change to hopefully make chain
 selection and multi-chain validation possible

---
 conkit/command_line/conkit_validate.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/conkit/command_line/conkit_validate.py b/conkit/command_line/conkit_validate.py
index b4ea6928..71b4e63c 100644
--- a/conkit/command_line/conkit_validate.py
+++ b/conkit/command_line/conkit_validate.py
@@ -275,7 +275,8 @@ def main():
 
     logger.info(os.linesep + "Working directory:                           %s", os.getcwd())
     logger.info("Reading input sequence:                      %s", args.seqfile)
-    sequence = conkit.io.read(args.seqfile, args.seqformat).top
+    sequencefile = conkit.io.read(args.seqfile, args.seqformat)
+    sequence = sequencefile.top
 
     if len(sequence) < 5:
         raise ValueError('Cannot validate model with less than 5 residues')
@@ -290,6 +291,7 @@ def main():
     else: 
         prediction_file = conkit.io.read(args.distfile, args.distformat)
         prediction = prediction_file.top
+
     logger.info("Reading input PDB model:                     %s", args.pdbfile)
     model = conkit.io.read(args.pdbfile, args.pdbformat, distance_cutoff=cutoff, atom_type=rep_atom).top