cryptool-org · stefanossala · May 16, 2025 · May 16, 2025 · May 20, 2025 · May 20, 2025
diff --git a/README.md b/README.md
@@ -10,6 +10,9 @@ While the project was focused on ACA ciphers at first, a later extension added t
 
 Users that are experienced in machine learning can use the tools provided in this project to train and evaluate ML models using the `train.py` and `eval.py` scripts. For further information see the following sections *Training* and *Evaluation*. 
 
+The initial models were trained using the *Keras* and *scikit-learn* libraries. In 2025 Stefano Sala converted the code for the FFNN and LSTM machine learning architectures from Keras to *PyTorch* as part of his [Bachelor thesis](https://www.cryptool.org/media/publications/theses/BA_Stefano-Sala.pdf). One of the goals of this conversion was a more flexible code architecture for the definition and training of the machine learning models. 
+With these changes, the FFNN and LSTM machine learning architectures can only be trained with PyTorch, whereas the evaluation still supports both types of model files.
+
 # License
 
 This software and the online version on https://www.cryptool.org/cto/ncid are licensed with the GPLv3 license. Private use of this software is allowed. Software using parts of the code from this repository must not be commercially used and also must be GPLv3 licensed.
@@ -53,7 +56,7 @@ python3 train.py --help
   ```
 
 - ```
-  python3 train.py --architecture=FFNN --dataset_workers=50 --train_dataset_size=64960 --batch_size=512 --max_iter=1000000000 --min_train_len=100 --max_train_len=100 --min_test_len=100 --max_test_len=100 --model_name=t30.h5 > weights/t30.txt 2> weights/err_t30.txt &
+  python3 train.py --architecture=FFNN --dataset_workers=50 --train_dataset_size=64960 --batch_size=512 --max_iter=1000000000 --min_train_len=100 --max_train_len=100 --min_test_len=100 --max_test_len=100 --model_name=t30.pth > weights/t30.txt 2> weights/err_t30.txt &
   ```
 
 
@@ -230,11 +233,15 @@ between the rotor ciphers. This helps with the results since the original models
 
 [Histocrypt 2021: A Massive Machine-Learning Approach For Classical Cipher Type Detection Using Feature Engineering](https://doi.org/10.3384/ecp183)
 
-AusDM 2021: Detection of Classical Cipher Types with Feature-Learning  Approaches
+AusDM 2021: Detection of Classical Cipher Types with Feature-Learning Approaches:
 
 - [Proceedings](https://link.springer.com/book/10.1007/978-981-16-8531-6)
 - [Pre-Print](https://www.cryptool.org/download/ncid/Detect-Classical-Cipher-Types-with-Feature-Learning_AusDM2021_PrePrint.pdf)
 
+PyTorch conversion of FFNN and LSTM machine learning architectures:
+
+[Application of AI for ciphertext identification](https://www.cryptool.org/media/publications/theses/BA_Stefano-Sala.pdf)
+
 ## BibTeX Citation
 
 If you use ncid in a scientific publication, we would appreciate using the following citations:

diff --git a/cipherTypeDetection/config.py b/cipherTypeDetection/config.py
@@ -1,3 +1,5 @@
+from enum import Enum
+
 from cipherImplementations.cipher import INPUT_ALPHABET, UNKNOWN_SYMBOL, UNKNOWN_SYMBOL_NUMBER
 from cipherImplementations.simpleSubstitution import SimpleSubstitution
 from cipherImplementations.hill import Hill
@@ -192,3 +194,9 @@
 # LearningRateSchedulers
 decay = 1e-8
 drop = 0.1
+
+class Backend(Enum):
+    """Differentiate between the Keras and PyTorch backend for model training."""
+    KERAS = 0
+    PYTORCH = 1
+
diff --git a/cipherTypeDetection/ensembleModel.py b/cipherTypeDetection/ensembleModel.py
@@ -1,4 +1,5 @@
 import tensorflow as tf
+import torch
 import pickle
 import numpy as np
 from tensorflow.keras.optimizers import Adam
@@ -7,6 +8,9 @@
 import cipherTypeDetection.config as config
 from cipherTypeDetection.transformer import MultiHeadSelfAttention, TransformerBlock, TokenAndPositionEmbedding
 from cipherImplementations.cipher import OUTPUT_ALPHABET
+from cipherTypeDetection.config import Backend
+from cipherTypeDetection.models.ffnn import FFNN
+from cipherTypeDetection.models.lstm import LSTM
 from util.utils import get_model_input_length
 
 
@@ -37,20 +41,25 @@
 mcc_nb = 0.5294535259111087
 # Cohen's Kappa is not used as these values are almost the same like MCC.
 
+class ModelMetadata:
+    def __init__(self, path, architecture, backend):
+        self.path = path
+        self.architecture = architecture
+        self.backend = backend
 
 class EnsembleModel:
-    def __init__(self, models, architectures, strategy, cipher_indices):
+    def __init__(self, model_metadata, strategy, cipher_indices):
         self.statistics_dict = {
             "FFNN": [f1_ffnn, accuracy_ffnn, recall_ffnn, precision_ffnn, mcc_ffnn],
             "Transformer": [f1_transformer, accuracy_transformer, recall_transformer, precision_transformer, mcc_transformer],
             "LSTM": [f1_lstm, accuracy_lstm, recall_lstm, precision_lstm, mcc_lstm],
             "RF": [f1_rf, accuracy_rf, recall_rf, precision_rf, mcc_rf],
             "NB": [f1_nb, accuracy_nb, recall_nb, precision_nb, mcc_nb]
         }
-        self.models = models
-        self.architectures = architectures
+        self.model_metadata = model_metadata
+        self.models = [None] * len(self.model_metadata)
         self.strategy = strategy
-        if isinstance(models[0], str):
+        if isinstance(model_metadata[0].path, str):
             self.load_model()
         for key in self.statistics_dict:
             statistics = self.statistics_dict[key]
@@ -72,22 +81,53 @@ def __init__(self, models, architectures, strategy, cipher_indices):
                 self.total_votes[i] += network_total_votes[i]
 
     def load_model(self):
-        for j in range(len(self.models)):
-            if self.architectures[j] in ("FFNN", "CNN", "LSTM", "Transformer"):
-                if self.architectures[j] == 'Transformer':
-                    model_ = tf.keras.models.load_model(self.models[j], custom_objects={
-                        'TokenAndPositionEmbedding': TokenAndPositionEmbedding, 'MultiHeadSelfAttention': MultiHeadSelfAttention,
-                        'TransformerBlock': TransformerBlock})
-                else:
-                    model_ = tf.keras.models.load_model(self.models[j])
-                optimizer = Adam(learning_rate=config.learning_rate, beta_1=config.beta_1, beta_2=config.beta_2, epsilon=config.epsilon,
-                                 amsgrad=config.amsgrad)
-                model_.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
-                               metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
-                self.models[j] = model_
+        for i, metadata in enumerate(self.model_metadata):
+            if metadata.backend == Backend.PYTORCH:
+                self.models[i] = self._load_pytorch(metadata.architecture, metadata.path)
+            elif metadata.architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
+                self.models[i] = self._load_keras(metadata.architecture, metadata.path)
             else:
-                with open(self.models[j], "rb") as f:
-                    self.models[j] = pickle.load(f)
+                with open(metadata.path, "rb") as f:
+                    self.models[i] = pickle.load(f)
+
+    def _load_pytorch(self, architecture, path):
+        checkpoint = torch.load(path, map_location=torch.device("cpu"))
+
+        if architecture == "FFNN":
+            model = FFNN(
+                input_size=checkpoint['input_size'],
+                hidden_size=checkpoint['hidden_size'],
+                output_size=checkpoint['output_size'],
+                num_hidden_layers=checkpoint['num_hidden_layers']
+            )
+        elif architecture == "LSTM":
+            model = LSTM(
+                vocab_size=checkpoint['vocab_size'],
+                embed_dim=checkpoint['embed_dim'],
+                hidden_size=checkpoint['hidden_size'],
+                output_size=checkpoint['output_size'],
+                num_layers=checkpoint['num_layers'],
+                dropout=checkpoint['dropout']
+            )
+        else:
+            raise ValueError(f"Unimplemented PyTorch architecutre: {architecture}")
+
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.eval()
+        return model
+
+    def _load_keras(self, architecture, path):
+        if architecture == 'Transformer':
+            model = tf.keras.models.load_model(path, custom_objects={
+                'TokenAndPositionEmbedding': TokenAndPositionEmbedding, 'MultiHeadSelfAttention': MultiHeadSelfAttention,
+                'TransformerBlock': TransformerBlock})
+        else:
+            model = tf.keras.models.load_model(path)
+        optimizer = Adam(learning_rate=config.learning_rate, beta_1=config.beta_1, beta_2=config.beta_2, epsilon=config.epsilon,
+                            amsgrad=config.amsgrad)
+        model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
+                        metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
+        return model
 
     def evaluate(self, batch, batch_ciphertexts, labels, batch_size, metrics, verbose=0):
         correct_all = 0
@@ -111,9 +151,14 @@ def evaluate(self, batch, batch_ciphertexts, labels, batch_size, metrics, verbos
 
     def predict(self, statistics, ciphertexts, batch_size, verbose=0):
         predictions = []
-        for index, model in enumerate(self.models):
-            architecture = self.architectures[index]
-            if architecture == "FFNN":
+        for index, metadata in enumerate(self.model_metadata):
+            model = self.models[index]
+            architecture = metadata.architecture
+            if metadata.backend == Backend.PYTORCH:
+                if isinstance(statistics, tf.Tensor):
+                    np_statistics = statistics.numpy()
+                predictions.append(model.predict(np_statistics, batch_size))
+            elif architecture == "FFNN":
                 predictions.append(model.predict(statistics, batch_size=batch_size, verbose=verbose))
             elif architecture in ("CNN", "LSTM", "Transformer"):
                 input_length = get_model_input_length(model, architecture)
@@ -168,7 +213,7 @@ def predict(self, statistics, ciphertexts, batch_size, verbose=0):
                     scaled[i][j] = scaled[i][j] / len(predictions)
         elif self.strategy == 'weighted':
             for i in range(len(predictions)):
-                statistics = self.statistics_dict[self.architectures[i]]
+                statistics = self.statistics_dict[self.model_metadata[i].architecture]
                 for j in range(len(predictions[i])):
                     for k in range(len(predictions[i][j])):
                         scaled[j][k] += predictions[i][j][k] * statistics[-1][k] / self.total_votes[k]