From 66b23f729bc5750d0b4be08f5a44b4546f614823 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Fri, 16 May 2025 17:07:57 +0200
Subject: [PATCH 01/31] PyTorch Test for FFNN

---
 .../miniBatchEarlyStoppingCallback.py         |  2 +-
 cipherTypeDetection/train.py                  | 40 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/cipherTypeDetection/miniBatchEarlyStoppingCallback.py b/cipherTypeDetection/miniBatchEarlyStoppingCallback.py
index c3e480f..18cdbd2 100644
--- a/cipherTypeDetection/miniBatchEarlyStoppingCallback.py
+++ b/cipherTypeDetection/miniBatchEarlyStoppingCallback.py
@@ -87,7 +87,7 @@ def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=1, mode=
             self.min_delta *= 1
         else:
             self.min_delta *= -1
-        self.best = np.Inf if self.monitor_op is np.less else -np.Inf
+        self.best = np.inf if self.monitor_op is np.less else -np.inf
 
     def on_epoch_end(self, epoch, logs=None):
         current = self.get_monitor_value(logs)
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 7d00dd7..e96a7f5 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -1,6 +1,12 @@
 import multiprocessing
 from pathlib import Path
 
+# ─── PyTorch Imports ───────────────────────────────────────────────────────
+import torch                                # import PyTorch core
+import torch.nn as nn                       # import neural network modules
+import torch.optim as optim                 # import optimizers
+from torch.utils.data import DataLoader, TensorDataset
+
 import argparse
 import sys
 import time
@@ -114,6 +120,7 @@ def create_model(architecture, extend_model, output_layer_size, max_train_len):
         return model
     
     # Create new model based on architecture
+    """
     if architecture == "FFNN":
         model = tf.keras.Sequential()
         model.add(tf.keras.layers.Input(shape=(input_layer_size,)))
@@ -123,6 +130,39 @@ def create_model(architecture, extend_model, output_layer_size, max_train_len):
         model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", 
                     metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
         return model
+    """
+    if architecture == "FFNN":
+        # Define a PyTorch Feed-Forward Neural Network
+        class FFNN(nn.Module):
+            def __init__(self, input_size, hidden_size, num_hidden_layers, output_size):
+                super(FFNN, self).__init__()
+                layers = []
+
+                # input layer
+                layers.append(nn.Linear(input_size, hidden_size))
+                layers.append(nn.ReLU())
+
+                # hidden layers
+                for _ in range(num_hidden_layers - 1):
+                    layers.append(nn.Linear(hidden_size, hidden_size))
+                    layers.append(nn.ReLU())
+
+                # output layer (no softmax; I use CrossEntropyLoss that includes log-softmax)
+                layers.append(nn.Linear(hidden_size, output_size))
+                self.network = nn.Sequential(*layers)
+
+            def forward(self, x):
+                # forward pass through the MLP
+                return self.network(x)
+
+        # instantiate the model
+        model = FFNN(
+            input_size=input_layer_size,
+            hidden_size=hidden_layer_size,
+            num_hidden_layers=config.hidden_layers,
+            output_size=output_layer_size
+        )
+        return model
     
     elif architecture == "CNN":
         config.FEATURE_ENGINEERING = False

From 1bdbde7d6846efee06538eacee154a2deb015e2f Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Fri, 16 May 2025 18:15:42 +0200
Subject: [PATCH 02/31] First implementation of PyTorch for FFNN

---
 cipherTypeDetection/train.py | 155 ++++++++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 4 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index e96a7f5..fd04f8e 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -6,6 +6,8 @@
 import torch.nn as nn                       # import neural network modules
 import torch.optim as optim                 # import optimizers
 from torch.utils.data import DataLoader, TensorDataset
+import numpy as np
+import tensorflow as tf
 
 import argparse
 import sys
@@ -23,6 +25,7 @@
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.svm import SVC
 from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import train_test_split
 import matplotlib.pyplot as plt
 from datetime import datetime
 
@@ -68,7 +71,11 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
                 extend_model = tf.keras.models.load_model(extend_model, compile=False)
             model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-            model.summary()
+        # Keras models have .summary(), PyTorch ones don’t
+            if hasattr(model, "summary"):
+                model.summary()                          # Keras: print structure
+            else:
+                print(model)                             # PyTorch: print via __repr__
     else:
         print("Only one GPU found.")
         strategy = NullDistributionStrategy()
@@ -76,7 +83,11 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
             extend_model = tf.keras.models.load_model(extend_model, compile=False)
         model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-            model.summary()
+        # Keras models have .summary(), PyTorch ones don’t
+            if hasattr(model, "summary"):
+                model.summary()                          # Keras: print structure
+            else:
+                print(model)                             # PyTorch: print via __repr__
 
     print('Model created.\n')
     return model, strategy
@@ -684,6 +695,104 @@ def create_checkpoint_callback():
 
     print('Training model...')
 
+
+    # ─── PyTorch branch for FFNN only ───────────────────────────────────────────
+
+    if args.architecture == "FFNN":
+        # Initialize variables for FFNN
+        start_time = time.time()
+
+        # move model to GPU if available
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+
+        # gather all batches until max_iter
+        stats_list, labels_list = [], []
+        while train_ds.iteration < args.max_iter:
+            for batch in next(train_ds):
+                stats, labs = batch.items()
+                stats_list.append(stats.numpy())
+                labels_list.append(labs.numpy())
+                print(f"Loaded {train_ds.iteration} samples.")  # status log
+        train_ds.stop_outstanding_tasks()
+
+        # concatenate into single arrays
+        stats_arr = np.concatenate(stats_list, axis=0)
+        labels_arr = np.concatenate(labels_list, axis=0)
+
+        # split train / validation
+        train_stats, val_stats, train_labels, val_labels = train_test_split(
+            stats_arr, labels_arr, test_size=0.3, random_state=42
+        )
+
+        # to torch tensors
+        train_tensor = torch.tensor(train_stats, dtype=torch.float32)
+        train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
+        val_tensor = torch.tensor(val_stats, dtype=torch.float32)
+        val_labels_tensor = torch.tensor(val_labels, dtype=torch.long)
+
+        # dataloaders
+        train_loader = DataLoader(
+            TensorDataset(train_tensor, train_labels_tensor),
+            batch_size=args.batch_size, shuffle=True,
+            num_workers=args.dataset_workers
+        )
+        val_loader = DataLoader(
+            TensorDataset(val_tensor, val_labels_tensor),
+            batch_size=args.batch_size, shuffle=False,
+            num_workers=args.dataset_workers
+        )
+
+        # optimizer & loss
+        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
+        criterion = nn.CrossEntropyLoss()
+
+        # training loop
+        for epoch in range(args.epochs):
+            model.train()
+            running_loss = 0.0
+            for X_batch, y_batch in train_loader:
+                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
+                optimizer.zero_grad()                  # zero gradients
+                outputs = model(X_batch)              # forward pass
+                loss = criterion(outputs, y_batch)    # compute loss
+                loss.backward()                       # backward pass
+                optimizer.step()                      # update weights
+                running_loss += loss.item()
+
+            # compute average loss and validation accuracy
+            avg_loss = running_loss / len(train_loader)
+            model.eval()
+            correct, total = 0, 0
+            with torch.no_grad():
+                for X_val, y_val in val_loader:
+                    X_val, y_val = X_val.to(device), y_val.to(device)
+                    preds = model(X_val).argmax(dim=1)
+                    correct += (preds == y_val).sum().item()
+                    total += y_val.size(0)
+            val_acc = correct / total
+            print(f"Epoch {epoch+1}/{args.epochs}, Loss: {avg_loss:.4f}, Val Acc: {val_acc:.4f}")
+
+        # print elapsed time
+        elapsed = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
+        stats_str = (
+            f"Finished PyTorch FFNN in {elapsed.days}d {elapsed.seconds//3600}h "
+            f"{(elapsed.seconds//60)%60}m {elapsed.seconds%60}s "
+            f"with {train_ds.iteration} samples and {args.epochs} epochs.\n"
+        )
+        print(stats_str)
+
+        # create a dummy callback so downstream code can always check .stop_training
+        class DummyCallback:
+            def __init__(self):
+                self.stop_training = False
+
+        dummy_cb = DummyCallback()
+        # return with a dummy callback instead of None
+        return dummy_cb, train_ds.iteration, stats_str
+    # ───────────────────────────────────────────────────────────────────────────────
+
+
     delete_previous_checkpoints()
 
     # Create callbacks for tensorflow models
@@ -863,7 +972,18 @@ def save_model(model, args):
     model_path = os.path.join(args.save_directory, model_name)
 
     if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
-        model.save(model_path)
+        # Keras models have .save(); PyTorch nn.Module ones don't
+        if hasattr(model, "save"):
+            # Keras: save the full model (architecture + weights + optimizer state)
+            model.save(model_path)
+            print(f"Saved Keras model to {model_path}")
+        elif isinstance(model, torch.nn.Module):
+            # PyTorch: save only the state_dict
+            torch.save(model.state_dict(), model_path + ".pt")
+            print(f"Saved PyTorch model state_dict to {model_path}.pt")
+        else:
+            # Fallback for unexpected types
+            raise ValueError(f"Cannot save model of type {type(model)}")
 
     elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN", "SVM-Rotor"):
         with open(model_path, "wb") as f:
@@ -1010,7 +1130,34 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 prediction_metrics["SVM"].add_predictions(labels, model[3].predict_proba(statistics))
                 prediction_metrics["kNN"].add_predictions(labels, model[4].predict_proba(statistics))
             else:
-                prediction = model.predict(statistics, batch_size=args.batch_size, verbose=1)
+                # ─── Branch prediction: Keras vs PyTorch ───────────────────────────────────
+                if hasattr(model, "predict"):
+                    # Keras: can call .predict() directly
+                    prediction = model.predict(statistics,
+                                            batch_size=args.batch_size,
+                                            verbose=1)
+                else:
+                    # select device and move model there
+                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                    model.to(device)
+                    model.eval()  # set eval mode (disables dropout, etc.)
+
+                    # convert TensorFlow tensor (or any array-like) to a NumPy array first,
+                    # then to a PyTorch tensor on the right device
+                    if isinstance(statistics, tf.Tensor):
+                        np_inputs = statistics.numpy()
+                    else:
+                        # fallback for plain lists/NumPy arrays
+                        np_inputs = np.array(statistics)
+                    inputs = torch.tensor(np_inputs, dtype=torch.float32).to(device)
+                    
+                    with torch.no_grad():               # no grad for inference
+                        outputs = model(inputs)         # forward pass
+                    # take the index of the max logit as the predicted class
+                    prediction = outputs.argmax(dim=1)  # Tensor of shape (N,)
+                    # move back to CPU and to NumPy array
+                    prediction = prediction.cpu().numpy()
+
                 prediction_metrics[architecture].add_predictions(labels, prediction)
 
             total_len_prediction += len(prediction)

From 666db705fad4b44fa6bb70f0ebaa23f1c60e6c4e Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Tue, 20 May 2025 12:21:01 +0200
Subject: [PATCH 03/31] Add summary of the model for FFNN with torchsummary

---
 cipherTypeDetection/train.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index fd04f8e..02e68b4 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -8,6 +8,7 @@
 from torch.utils.data import DataLoader, TensorDataset
 import numpy as np
 import tensorflow as tf
+from torchinfo import summary
 
 import argparse
 import sys
@@ -83,11 +84,11 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
             extend_model = tf.keras.models.load_model(extend_model, compile=False)
         model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-        # Keras models have .summary(), PyTorch ones don’t
+        # Keras models have .summary(), PyTorch ones don’t. Use torchinfo or torchsummary
             if hasattr(model, "summary"):
-                model.summary()                          # Keras: print structure
+                model.summary()
             else:
-                print(model)                             # PyTorch: print via __repr__
+                summary(model, input_size=(1, 724))
 
     print('Model created.\n')
     return model, strategy

From 379e345f48cf6ceced50002a69e3622fb4269e19 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Tue, 20 May 2025 18:41:51 +0200
Subject: [PATCH 04/31] Adjust saved PyTorch model format to .pth

---
 cipherTypeDetection/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 02e68b4..82767a2 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -980,8 +980,8 @@ def save_model(model, args):
             print(f"Saved Keras model to {model_path}")
         elif isinstance(model, torch.nn.Module):
             # PyTorch: save only the state_dict
-            torch.save(model.state_dict(), model_path + ".pt")
-            print(f"Saved PyTorch model state_dict to {model_path}.pt")
+            torch.save(model.state_dict(), model_path + ".pth")
+            print(f"Saved PyTorch model state_dict to {model_path}.pth")
         else:
             # Fallback for unexpected types
             raise ValueError(f"Cannot save model of type {type(model)}")

From 0420598caab78185b7cbd60fd9b66244741941fe Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Wed, 21 May 2025 17:36:45 +0200
Subject: [PATCH 05/31] Start Train model after deleting previous checkpoints

---
 cipherTypeDetection/train.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 82767a2..9f74d43 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -74,9 +74,9 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
         # Keras models have .summary(), PyTorch ones don’t
             if hasattr(model, "summary"):
-                model.summary()                          # Keras: print structure
+                model.summary()
             else:
-                print(model)                             # PyTorch: print via __repr__
+                summary(model, input_size=(1, 724))
     else:
         print("Only one GPU found.")
         strategy = NullDistributionStrategy()
@@ -696,6 +696,7 @@ def create_checkpoint_callback():
 
     print('Training model...')
 
+    delete_previous_checkpoints()
 
     # ─── PyTorch branch for FFNN only ───────────────────────────────────────────
 
@@ -794,8 +795,6 @@ def __init__(self):
     # ───────────────────────────────────────────────────────────────────────────────
 
 
-    delete_previous_checkpoints()
-
     # Create callbacks for tensorflow models
     tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='../data/logs', 
                                                           update_freq='epoch')

From b7c7598c2225fbd1dd3db0de024f90185a2c3714 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 22 May 2025 20:33:58 +0200
Subject: [PATCH 06/31] Fix FFNN training, create class for FNNN

---
 cipherTypeDetection/train.py | 316 ++++++++++++++---------------------
 1 file changed, 125 insertions(+), 191 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 9f74d43..55a32eb 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -1,15 +1,6 @@
 import multiprocessing
 from pathlib import Path
 
-# ─── PyTorch Imports ───────────────────────────────────────────────────────
-import torch                                # import PyTorch core
-import torch.nn as nn                       # import neural network modules
-import torch.optim as optim                 # import optimizers
-from torch.utils.data import DataLoader, TensorDataset
-import numpy as np
-import tensorflow as tf
-from torchinfo import summary
-
 import argparse
 import sys
 import time
@@ -19,6 +10,13 @@
 import math
 import pickle
 import functools
+
+# PyTorch
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchinfo import summary
+
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.tree import DecisionTreeClassifier, plot_tree
@@ -26,7 +24,6 @@
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.svm import SVC
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.model_selection import train_test_split
 import matplotlib.pyplot as plt
 from datetime import datetime
 
@@ -51,6 +48,89 @@
 for device in tf.config.list_physical_devices('GPU'):
     tf.config.experimental.set_memory_growth(device, True)
 
+class TorchFFNN(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
+        super().__init__()
+        layers = [nn.Linear(input_size, hidden_size), nn.ReLU()]
+        for _ in range(num_hidden_layers - 1):
+            layers += [nn.Linear(hidden_size, hidden_size), nn.ReLU()]
+        layers.append(nn.Linear(hidden_size, output_size))
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+    
+def train_torch_ffnn(model, args, train_ds):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
+    criterion = nn.CrossEntropyLoss()
+    model.train()
+    train_iter = 0
+    train_epoch = 0
+    start_time = time.time()
+    should_create_validation_data = True
+
+    for epoch in range(args.epochs):
+        #train_ds.iteration = 0
+        while train_ds.iteration < args.max_iter:
+            training_batches = next(train_ds)
+            for training_batch in training_batches:
+                statistics, labels = training_batch.items()
+                x = torch.tensor(statistics.numpy(), dtype=torch.float32).to(device)
+                y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
+                optimizer.zero_grad()
+                outputs = model(x)
+                loss = criterion(outputs, y)
+                loss.backward()
+                optimizer.step()
+                train_iter += len(y)
+                if train_iter >= args.max_iter:
+                    break
+            if train_iter >= args.max_iter:
+                break
+        train_epoch += 1
+        print(f"Epoch: {train_epoch}, Iteration: {train_iter}, Loss: {loss.item():.4f}")
+        if train_iter >= args.max_iter:
+            break
+
+    elapsed_training_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
+    training_stats = ("Finished training in %d days %d hours %d minutes %d seconds "
+                      "with %d iterations and %d epochs.\n" 
+                      % (elapsed_training_time.days, 
+                         elapsed_training_time.seconds // 3600, 
+                         (elapsed_training_time.seconds // 60) % 60,
+                         elapsed_training_time.seconds % 60, 
+                         train_iter, 
+                         train_epoch))
+    print(training_stats)
+    # Return dummy early_stopping_callback for compatibility
+    class DummyEarlyStopping:
+        stop_training = False
+    return DummyEarlyStopping(), train_iter, training_stats
+
+def predict_torch_ffnn(model, test_ds, args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    model.to(device)
+    all_preds = []
+    all_labels = []
+    with torch.no_grad():
+        while test_ds.iteration < args.max_iter:
+            testing_batches = next(test_ds)
+            for testing_batch in testing_batches:
+                statistics, labels = testing_batch.items()
+                x = torch.tensor(statistics.numpy(), dtype=torch.float32).to(device)
+                y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
+                outputs = model(x)
+                preds = torch.softmax(outputs, dim=1).cpu().numpy()
+                all_preds.append(preds)
+                all_labels.append(labels.numpy())
+    # Concatenate all predictions and labels
+    import numpy as np
+    all_preds = np.concatenate(all_preds, axis=0)
+    all_labels = np.concatenate(all_labels, axis=0)
+    return all_preds, all_labels
 
 def str2bool(v):
     return v.lower() in ("yes", "true", "t", "1")
@@ -72,7 +152,6 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
                 extend_model = tf.keras.models.load_model(extend_model, compile=False)
             model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-        # Keras models have .summary(), PyTorch ones don’t
             if hasattr(model, "summary"):
                 model.summary()
             else:
@@ -84,7 +163,6 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
             extend_model = tf.keras.models.load_model(extend_model, compile=False)
         model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-        # Keras models have .summary(), PyTorch ones don’t. Use torchinfo or torchsummary
             if hasattr(model, "summary"):
                 model.summary()
             else:
@@ -132,47 +210,13 @@ def create_model(architecture, extend_model, output_layer_size, max_train_len):
         return model
     
     # Create new model based on architecture
-    """
-    if architecture == "FFNN":
-        model = tf.keras.Sequential()
-        model.add(tf.keras.layers.Input(shape=(input_layer_size,)))
-        for _ in range(config.hidden_layers):
-            model.add(tf.keras.layers.Dense(hidden_layer_size, activation='relu', use_bias=True))
-        model.add(tf.keras.layers.Dense(output_layer_size, activation='softmax'))
-        model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", 
-                    metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
-        return model
-    """
     if architecture == "FFNN":
-        # Define a PyTorch Feed-Forward Neural Network
-        class FFNN(nn.Module):
-            def __init__(self, input_size, hidden_size, num_hidden_layers, output_size):
-                super(FFNN, self).__init__()
-                layers = []
-
-                # input layer
-                layers.append(nn.Linear(input_size, hidden_size))
-                layers.append(nn.ReLU())
-
-                # hidden layers
-                for _ in range(num_hidden_layers - 1):
-                    layers.append(nn.Linear(hidden_size, hidden_size))
-                    layers.append(nn.ReLU())
-
-                # output layer (no softmax; I use CrossEntropyLoss that includes log-softmax)
-                layers.append(nn.Linear(hidden_size, output_size))
-                self.network = nn.Sequential(*layers)
-
-            def forward(self, x):
-                # forward pass through the MLP
-                return self.network(x)
-
-        # instantiate the model
-        model = FFNN(
+        # Use PyTorch for FFNN
+        model = TorchFFNN(
             input_size=input_layer_size,
             hidden_size=hidden_layer_size,
-            num_hidden_layers=config.hidden_layers,
-            output_size=output_layer_size
+            output_size=output_layer_size,
+            num_hidden_layers=config.hidden_layers
         )
         return model
     
@@ -673,6 +717,8 @@ def train_model(model, strategy, args, train_ds):
     -------
     tuple
     """
+    if args.architecture == "FFNN" and isinstance(model, TorchFFNN):
+        return train_torch_ffnn(model, args, train_ds)
 
     checkpoints_dir = Path('../data/checkpoints')
     def delete_previous_checkpoints():
@@ -698,103 +744,6 @@ def create_checkpoint_callback():
 
     delete_previous_checkpoints()
 
-    # ─── PyTorch branch for FFNN only ───────────────────────────────────────────
-
-    if args.architecture == "FFNN":
-        # Initialize variables for FFNN
-        start_time = time.time()
-
-        # move model to GPU if available
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model.to(device)
-
-        # gather all batches until max_iter
-        stats_list, labels_list = [], []
-        while train_ds.iteration < args.max_iter:
-            for batch in next(train_ds):
-                stats, labs = batch.items()
-                stats_list.append(stats.numpy())
-                labels_list.append(labs.numpy())
-                print(f"Loaded {train_ds.iteration} samples.")  # status log
-        train_ds.stop_outstanding_tasks()
-
-        # concatenate into single arrays
-        stats_arr = np.concatenate(stats_list, axis=0)
-        labels_arr = np.concatenate(labels_list, axis=0)
-
-        # split train / validation
-        train_stats, val_stats, train_labels, val_labels = train_test_split(
-            stats_arr, labels_arr, test_size=0.3, random_state=42
-        )
-
-        # to torch tensors
-        train_tensor = torch.tensor(train_stats, dtype=torch.float32)
-        train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
-        val_tensor = torch.tensor(val_stats, dtype=torch.float32)
-        val_labels_tensor = torch.tensor(val_labels, dtype=torch.long)
-
-        # dataloaders
-        train_loader = DataLoader(
-            TensorDataset(train_tensor, train_labels_tensor),
-            batch_size=args.batch_size, shuffle=True,
-            num_workers=args.dataset_workers
-        )
-        val_loader = DataLoader(
-            TensorDataset(val_tensor, val_labels_tensor),
-            batch_size=args.batch_size, shuffle=False,
-            num_workers=args.dataset_workers
-        )
-
-        # optimizer & loss
-        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
-        criterion = nn.CrossEntropyLoss()
-
-        # training loop
-        for epoch in range(args.epochs):
-            model.train()
-            running_loss = 0.0
-            for X_batch, y_batch in train_loader:
-                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
-                optimizer.zero_grad()                  # zero gradients
-                outputs = model(X_batch)              # forward pass
-                loss = criterion(outputs, y_batch)    # compute loss
-                loss.backward()                       # backward pass
-                optimizer.step()                      # update weights
-                running_loss += loss.item()
-
-            # compute average loss and validation accuracy
-            avg_loss = running_loss / len(train_loader)
-            model.eval()
-            correct, total = 0, 0
-            with torch.no_grad():
-                for X_val, y_val in val_loader:
-                    X_val, y_val = X_val.to(device), y_val.to(device)
-                    preds = model(X_val).argmax(dim=1)
-                    correct += (preds == y_val).sum().item()
-                    total += y_val.size(0)
-            val_acc = correct / total
-            print(f"Epoch {epoch+1}/{args.epochs}, Loss: {avg_loss:.4f}, Val Acc: {val_acc:.4f}")
-
-        # print elapsed time
-        elapsed = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
-        stats_str = (
-            f"Finished PyTorch FFNN in {elapsed.days}d {elapsed.seconds//3600}h "
-            f"{(elapsed.seconds//60)%60}m {elapsed.seconds%60}s "
-            f"with {train_ds.iteration} samples and {args.epochs} epochs.\n"
-        )
-        print(stats_str)
-
-        # create a dummy callback so downstream code can always check .stop_training
-        class DummyCallback:
-            def __init__(self):
-                self.stop_training = False
-
-        dummy_cb = DummyCallback()
-        # return with a dummy callback instead of None
-        return dummy_cb, train_ds.iteration, stats_str
-    # ───────────────────────────────────────────────────────────────────────────────
-
-
     # Create callbacks for tensorflow models
     tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='../data/logs', 
                                                           update_freq='epoch')
@@ -971,19 +920,12 @@ def save_model(model, args):
         model_name = args.model_name
     model_path = os.path.join(args.save_directory, model_name)
 
-    if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
-        # Keras models have .save(); PyTorch nn.Module ones don't
-        if hasattr(model, "save"):
-            # Keras: save the full model (architecture + weights + optimizer state)
-            model.save(model_path)
-            print(f"Saved Keras model to {model_path}")
-        elif isinstance(model, torch.nn.Module):
-            # PyTorch: save only the state_dict
-            torch.save(model.state_dict(), model_path + ".pth")
-            print(f"Saved PyTorch model state_dict to {model_path}.pth")
-        else:
-            # Fallback for unexpected types
-            raise ValueError(f"Cannot save model of type {type(model)}")
+    if architecture == "FFNN":
+        if isinstance(model, TorchFFNN):
+            torch.save(model.state_dict(), model_path.replace('.h5', '.pth'))
+    
+    elif architecture in ("CNN", "LSTM", "Transformer"):
+        model.save(model_path)
 
     elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN", "SVM-Rotor"):
         with open(model_path, "wb") as f:
@@ -1012,9 +954,13 @@ def save_model(model, args):
     if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
         logs_destination = '../data/' + model_name.split('.')[0] + '_tensorboard_logs'
         try:
-            if os.path.exists(logs_destination):
-                shutil.rmtree(logs_destination)
-            shutil.move('../data/logs', logs_destination)
+            if os.path.exists('../data/logs'):
+                if os.path.exists(logs_destination):
+                    shutil.rmtree(logs_destination)
+                shutil.move('../data/logs', logs_destination)
+            else:
+                print("No logs to move from '../data/logs'.")
+            
         except Exception:
             print(f"Could not remove logs of previous run. Move of current logs "
                   f"from '../data/logs' to '{logs_destination}' failed.")
@@ -1061,6 +1007,21 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
     test_iter = 0
     test_epoch = 0
 
+    # PyTorch FFNN prediction
+    if architecture == "FFNN" and isinstance(model, TorchFFNN):
+        preds, labels = predict_torch_ffnn(model, test_ds, args)
+        # You may want to adapt this to your PredictionPerformanceMetrics usage:
+        prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
+        prediction_metrics[architecture].add_predictions(labels, preds)
+        for metrics in prediction_metrics.values():
+            metrics.print_evaluation()
+        elapsed_prediction_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
+        prediction_stats = 'Prediction time: %d days %d hours %d minutes %d seconds.' % (
+            elapsed_prediction_time.days, elapsed_prediction_time.seconds // 3600, 
+            (elapsed_prediction_time.seconds // 60) % 60,
+            elapsed_prediction_time.seconds % 60)
+        return prediction_stats
+
     # Determine the number of iterations to use for evaluating the model
     prediction_dataset_factor = 10
     if early_stopping_callback.stop_training:
@@ -1130,34 +1091,7 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 prediction_metrics["SVM"].add_predictions(labels, model[3].predict_proba(statistics))
                 prediction_metrics["kNN"].add_predictions(labels, model[4].predict_proba(statistics))
             else:
-                # ─── Branch prediction: Keras vs PyTorch ───────────────────────────────────
-                if hasattr(model, "predict"):
-                    # Keras: can call .predict() directly
-                    prediction = model.predict(statistics,
-                                            batch_size=args.batch_size,
-                                            verbose=1)
-                else:
-                    # select device and move model there
-                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-                    model.to(device)
-                    model.eval()  # set eval mode (disables dropout, etc.)
-
-                    # convert TensorFlow tensor (or any array-like) to a NumPy array first,
-                    # then to a PyTorch tensor on the right device
-                    if isinstance(statistics, tf.Tensor):
-                        np_inputs = statistics.numpy()
-                    else:
-                        # fallback for plain lists/NumPy arrays
-                        np_inputs = np.array(statistics)
-                    inputs = torch.tensor(np_inputs, dtype=torch.float32).to(device)
-                    
-                    with torch.no_grad():               # no grad for inference
-                        outputs = model(inputs)         # forward pass
-                    # take the index of the max logit as the predicted class
-                    prediction = outputs.argmax(dim=1)  # Tensor of shape (N,)
-                    # move back to CPU and to NumPy array
-                    prediction = prediction.cpu().numpy()
-
+                prediction = model.predict(statistics, batch_size=args.batch_size, verbose=1)
                 prediction_metrics[architecture].add_predictions(labels, prediction)
 
             total_len_prediction += len(prediction)

From 81c0c7c18235ee380335afb9e53a5598909a553e Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Tue, 27 May 2025 15:45:40 +0200
Subject: [PATCH 07/31] Fix save_model extensions

---
 cipherTypeDetection/train.py | 41 ++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 55a32eb..0370f9e 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -113,6 +113,7 @@ def predict_torch_ffnn(model, test_ds, args):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
     model.to(device)
+    criterion = nn.CrossEntropyLoss()
     all_preds = []
     all_labels = []
     with torch.no_grad():
@@ -123,6 +124,7 @@ def predict_torch_ffnn(model, test_ds, args):
                 x = torch.tensor(statistics.numpy(), dtype=torch.float32).to(device)
                 y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
                 outputs = model(x)
+                loss = criterion(outputs, y)
                 preds = torch.softmax(outputs, dim=1).cpu().numpy()
                 all_preds.append(preds)
                 all_labels.append(labels.numpy())
@@ -906,51 +908,56 @@ def create_checkpoint_callback():
         
 def save_model(model, args):
     """Writes the model and the commandline arguments to disk."""
-
     print('Saving model...')
     architecture = args.architecture
+
     if not os.path.exists(args.save_directory):
         os.mkdir(args.save_directory)
+
+    # Gestione nome modello
     if args.model_name == 'm.h5':
         i = 1
-        while os.path.exists(os.path.join(args.save_directory, args.model_name.split('.')[0] + str(i) + '.h5')):
+        base_name = args.model_name.split('.')[0]
+        extension = '.pth' if architecture == "FFNN" else '.h5'
+        while os.path.exists(os.path.join(args.save_directory, base_name + str(i) + extension)):
             i += 1
-        model_name = args.model_name.split('.')[0] + str(i) + '.h5'
+        model_name = base_name + str(i) + extension
     else:
         model_name = args.model_name
+        # per FFNN forziamo il cambio estensione
+        if architecture == "FFNN":
+            model_name = model_name.replace('.h5', '.pth')
+
     model_path = os.path.join(args.save_directory, model_name)
 
+    # Salvataggio modello
     if architecture == "FFNN":
         if isinstance(model, TorchFFNN):
-            torch.save(model.state_dict(), model_path.replace('.h5', '.pth'))
-    
+            torch.save(model.state_dict(), model_path)
+
     elif architecture in ("CNN", "LSTM", "Transformer"):
         model.save(model_path)
 
     elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN", "SVM-Rotor"):
         with open(model_path, "wb") as f:
-            # this gets very large
             pickle.dump(model, f)
 
     elif architecture == "[FFNN,NB]":
         model[0].save('../data/models/' + model_path.split('.')[0] + "_ffnn.h5")
         with open('../data/models/' + model_path.split('.')[0] + "_nb.h5", "wb") as f:
-            # this gets very large
             pickle.dump(model[1], f)
 
     elif architecture == "[DT,ET,RF,SVM,kNN]":
-        for index, name in enumerate(["dt","et","rf","svm","knn"]):
-            # TODO: Are these files actually in the h5 format? Probably not!
+        for index, name in enumerate(["dt", "et", "rf", "svm", "knn"]):
             with open('../data/models/' + model_path.split('.')[0] + f"_{name}.h5", "wb") as f:
-                # this gets very large
                 pickle.dump(model[index], f)
 
-    # Write user provided commandline arguments into mode path
+    # Salvataggio parametri
     with open('../data/' + model_path.split('.')[0] + '_parameters.txt', 'w') as f:
         for arg in vars(args):
             f.write("{:23s}= {:s}\n".format(arg, str(getattr(args, arg))))
 
-    # Remove logs of previous run
+    # Gestione logs
     if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
         logs_destination = '../data/' + model_name.split('.')[0] + '_tensorboard_logs'
         try:
@@ -958,15 +965,13 @@ def save_model(model, args):
                 if os.path.exists(logs_destination):
                     shutil.rmtree(logs_destination)
                 shutil.move('../data/logs', logs_destination)
-            else:
-                print("No logs to move from '../data/logs'.")
-            
         except Exception:
-            print(f"Could not remove logs of previous run. Move of current logs "
-                  f"from '../data/logs' to '{logs_destination}' failed.")
-            
+            print(f"Could not move logs from '../data/logs' to '{logs_destination}'.")
+
     print('Model saved.\n')
 
+
+
 def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter):
     """
     Testing the predictions of the model.

From 4a7468577b393b94fd57df60da1a4dea46d2ee5a Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 29 May 2025 11:19:44 +0200
Subject: [PATCH 08/31] Add params for saved ffnn models like input size,
 output, ...

---
 cipherTypeDetection/train.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 0370f9e..0e44e11 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -924,16 +924,21 @@ def save_model(model, args):
         model_name = base_name + str(i) + extension
     else:
         model_name = args.model_name
-        # per FFNN forziamo il cambio estensione
         if architecture == "FFNN":
             model_name = model_name.replace('.h5', '.pth')
 
     model_path = os.path.join(args.save_directory, model_name)
 
-    # Salvataggio modello
     if architecture == "FFNN":
         if isinstance(model, TorchFFNN):
-            torch.save(model.state_dict(), model_path)
+            torch.save({
+                'model_state_dict': model.state_dict(),
+                'input_size': model.input_size,
+                'hidden_size': model.hidden_size,
+                'output_size': model.output_size,
+                'num_hidden_layers': model.num_hidden_layers
+            }, model_path)
+
 
     elif architecture in ("CNN", "LSTM", "Transformer"):
         model.save(model_path)
@@ -1241,4 +1246,4 @@ def main():
     print(prediction_stats)
 
 if __name__ == "__main__":
-    main()    
+    main()    
\ No newline at end of file

From 8eb306809bbc7b7d6880db66d57aca05b47e75e9 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 29 May 2025 18:56:35 +0200
Subject: [PATCH 09/31] Add PyTorch evaluation possibility

---
 cipherTypeDetection/eval.py  | 72 ++++++++++++++++++++++++++++++------
 cipherTypeDetection/train.py | 12 +++++-
 2 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/cipherTypeDetection/eval.py b/cipherTypeDetection/eval.py
index 9381e96..747eb49 100755
--- a/cipherTypeDetection/eval.py
+++ b/cipherTypeDetection/eval.py
@@ -9,6 +9,10 @@
 import numpy as np
 from datetime import datetime
 
+import torch
+import torch.nn.functional as F
+import torch.optim as optim
+
 # This environ variable must be set before all tensorflow imports!
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 import tensorflow as tf
@@ -133,8 +137,24 @@ def find_ciphertext_paths_in_dir(folder_path):
             statistics, labels, ciphertexts = batch.items()
 
             if architecture == "FFNN":
-                results.append(model.evaluate(statistics, labels, batch_size=args.batch_size, verbose=1))
-            if architecture in ("CNN", "LSTM", "Transformer"):
+                if hasattr(model, "evaluate"):  # Keras model
+                    results.append(model.evaluate(statistics, labels, batch_size=args.batch_size, verbose=1))
+                else:  # PyTorch model
+                    x = torch.tensor(statistics.numpy(), dtype=torch.float32)
+                    y = torch.tensor(labels.numpy(), dtype=torch.long)
+                    with torch.no_grad():
+                        outputs = model(x)
+                        loss = F.cross_entropy(outputs, y)
+                        top1 = torch.argmax(outputs, dim=1)
+                        acc = (top1 == y).float().mean()
+
+                        # Calc top-3
+                        top3 = torch.topk(outputs, k=3, dim=1).indices  # shape: (batch_size, 3)
+                        y_expanded = y.unsqueeze(1).expand_as(top3)     # shape: (batch_size, 3)
+                        k3_acc = (top3 == y_expanded).any(dim=1).float().mean()
+                        results.append((loss.item(), acc.item(), k3_acc.item()))
+
+            elif architecture in ("CNN", "LSTM", "Transformer"):
                 results.append(model.evaluate(ciphertexts, labels, batch_size=args.batch_size, verbose=1))
             elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN"):
                 results.append(model.score(statistics, labels))
@@ -399,7 +419,26 @@ def load_model(architecture, args, model_path, cipher_types):
     
     model = None
 
-    if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
+    if architecture == "FFNN" and model_path.endswith(".pth"):
+        from cipherTypeDetection.train import TorchFFNN
+
+        checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
+
+        model = TorchFFNN(
+            input_size=checkpoint['input_size'],
+            hidden_size=checkpoint['hidden_size'],
+            output_size=checkpoint['output_size'],
+            num_hidden_layers=checkpoint['num_hidden_layers']
+        )
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.eval()
+
+        config.FEATURE_ENGINEERING = True
+        config.PAD_INPUT = False
+
+        return model
+
+    elif architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
         if architecture == 'Transformer':
             if not hasattr(config, "maxlen"):
                 raise ValueError("maxlen must be defined in the config when loading a Transformer model!")
@@ -431,12 +470,23 @@ def load_model(architecture, args, model_path, cipher_types):
     else:
         raise ValueError("Unknown architecture: %s" % architecture)
     
-    rotor_only_model_path = args.rotor_only_model
-    with open(rotor_only_model_path, "rb") as f:
-        rotor_only_model = pickle.load(f)
+    # Controlla se ci sono cifrari rotor tra quelli richiesti
+    has_rotor_ciphers = any(c in config.ROTOR_CIPHER_TYPES for c in cipher_types)
+
+    # Se ci sono cifrari rotor, carica anche il modello rotor_only
+    if has_rotor_ciphers:
+        rotor_only_model_path = args.rotor_only_model
+        if not os.path.exists(rotor_only_model_path):
+            raise FileNotFoundError(f"Rotor-only model is required but not found at {rotor_only_model_path}")
+        with open(rotor_only_model_path, "rb") as f:
+            rotor_only_model = pickle.load(f)
+        return RotorDifferentiationEnsemble(architecture, model, rotor_only_model)
+
+    # Se non ci sono cifrari rotor:
+    # - se è un ensemble, restituisci direttamente l'ensemble
+    # - altrimenti restituisci il modello normale
+    return model
 
-    # Embed all models in RotorDifferentiationEnsemble to improve recognition of rotor ciphers
-    return RotorDifferentiationEnsemble(architecture, model, rotor_only_model)
 
 def expand_cipher_groups(cipher_types):
     """Turn cipher group identifiers (ACA, MTC3) into a list of their ciphers"""
@@ -573,8 +623,8 @@ def main():
     for arg in vars(args):
         print("{:23s}= {:s}".format(arg, str(getattr(args, arg))))
     m = os.path.splitext(args.model)
-    if len(os.path.splitext(args.model)) != 2 or os.path.splitext(args.model)[1] != '.h5':
-        print('ERROR: The model name must have the ".h5" extension!', file=sys.stderr)
+    if os.path.splitext(args.model)[1] not in ('.h5', '.pth'):
+        print('ERROR: The model must have extension ".h5" (for Keras) or ".pth" (for PyTorch FFNN).', file=sys.stderr)
         sys.exit(1)
 
     architecture = args.architecture
@@ -613,7 +663,7 @@ def main():
     print("Model Loaded.")
 
     # Model is now always an ensemble
-    architecture = "Ensemble"
+    #architecture = "Ensemble"
 
     # the program was started as in benchmark mode.
     if args.download_dataset is not None:
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 0e44e11..227a98f 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -51,6 +51,13 @@
 class TorchFFNN(nn.Module):
     def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
         super().__init__()
+
+        # saves parameters so that they can be saved and loaded later
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.num_hidden_layers = num_hidden_layers
+
         layers = [nn.Linear(input_size, hidden_size), nn.ReLU()]
         for _ in range(num_hidden_layers - 1):
             layers += [nn.Linear(hidden_size, hidden_size), nn.ReLU()]
@@ -59,6 +66,7 @@ def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
 
     def forward(self, x):
         return self.net(x)
+
     
 def train_torch_ffnn(model, args, train_ds):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -1188,8 +1196,8 @@ def main():
     extend_model = args.extend_model
 
     # Validate inputs
-    if len(os.path.splitext(args.model_name)) != 2 or os.path.splitext(args.model_name)[1] != '.h5':
-        print('ERROR: The model name must have the ".h5" extension!', file=sys.stderr)
+    if os.path.splitext(args.model_name)[1] not in ('.h5', '.pth'):
+        print('ERROR: The model must have extension ".h5" (for Keras) or ".pth" (for PyTorch FFNN).', file=sys.stderr)
         sys.exit(1)
 
     if extend_model is not None:

From 8d0c30a83bf9abf617361f299a61afc6ec274283 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Tue, 3 Jun 2025 19:11:54 +0200
Subject: [PATCH 10/31] Fix Adam optimizer, add validation, add early stopping
 for ffnn

---
 cipherTypeDetection/train.py | 97 ++++++++++++++++++++++++++----------
 1 file changed, 72 insertions(+), 25 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 227a98f..f8ca398 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -69,53 +69,100 @@ def forward(self, x):
 
     
 def train_torch_ffnn(model, args, train_ds):
+    import numpy as np
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
-    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
+
+    optimizer = optim.Adam(
+        model.parameters(),
+        lr=config.learning_rate,
+        betas=(config.beta_1, config.beta_2),
+        eps=config.epsilon,
+        amsgrad=config.amsgrad
+    )
     criterion = nn.CrossEntropyLoss()
     model.train()
+
+    best_val_acc = 0
+    patience_counter = 0
+    patience_limit = 5
+
     train_iter = 0
     train_epoch = 0
     start_time = time.time()
-    should_create_validation_data = True
 
     for epoch in range(args.epochs):
-        #train_ds.iteration = 0
         while train_ds.iteration < args.max_iter:
             training_batches = next(train_ds)
             for training_batch in training_batches:
                 statistics, labels = training_batch.items()
-                x = torch.tensor(statistics.numpy(), dtype=torch.float32).to(device)
-                y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
+                stats_np = statistics.numpy()
+                labels_np = labels.numpy()
+
+                # --- Normalize the input features ---
+                mean = stats_np.mean(axis=0)
+                std = stats_np.std(axis=0) + 1e-8
+                stats_np = (stats_np - mean) / std
+
+                # --- Split into train/val ---
+                x_train, x_val, y_train, y_val = train_test_split(stats_np, labels_np, test_size=0.3)
+
+                x_train = torch.tensor(x_train, dtype=torch.float32).to(device)
+                y_train = torch.tensor(y_train, dtype=torch.long).to(device)
+                x_val = torch.tensor(x_val, dtype=torch.float32).to(device)
+                y_val = torch.tensor(y_val, dtype=torch.long).to(device)
+
+                # --- Training step ---
                 optimizer.zero_grad()
-                outputs = model(x)
-                loss = criterion(outputs, y)
+                outputs = model(x_train)
+                loss = criterion(outputs, y_train)
                 loss.backward()
                 optimizer.step()
-                train_iter += len(y)
+
+                train_iter += len(y_train)
+
+                # --- Validation step ---
+                model.eval()
+                with torch.no_grad():
+                    val_outputs = model(x_val)
+                    val_loss = criterion(val_outputs, y_val)
+                    val_pred = torch.argmax(val_outputs, dim=1)
+                    val_acc = (val_pred == y_val).float().mean().item()
+
+                    # top-3 accuracy
+                    top3 = torch.topk(val_outputs, k=3, dim=1).indices
+                    y_val_exp = y_val.unsqueeze(1).expand_as(top3)
+                    val_k3 = (top3 == y_val_exp).any(dim=1).float().mean().item()
+
+                print(f"Epoch: {epoch+1}, Iteration: {train_iter}, "
+                      f"Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, "
+                      f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
+                model.train()
+
+                # --- Early stopping check ---
+                if val_acc > best_val_acc:
+                    best_val_acc = val_acc
+                    patience_counter = 0
+                else:
+                    patience_counter += 1
+                    if patience_counter >= patience_limit:
+                        print("Early stopping triggered.")
+                        elapsed = time.time() - start_time
+                        print(f"Training time: {elapsed:.2f} seconds.")
+                        class DummyEarlyStopping: stop_training = True
+                        return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
+
                 if train_iter >= args.max_iter:
                     break
             if train_iter >= args.max_iter:
                 break
         train_epoch += 1
-        print(f"Epoch: {train_epoch}, Iteration: {train_iter}, Loss: {loss.item():.4f}")
-        if train_iter >= args.max_iter:
-            break
 
-    elapsed_training_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
-    training_stats = ("Finished training in %d days %d hours %d minutes %d seconds "
-                      "with %d iterations and %d epochs.\n" 
-                      % (elapsed_training_time.days, 
-                         elapsed_training_time.seconds // 3600, 
-                         (elapsed_training_time.seconds // 60) % 60,
-                         elapsed_training_time.seconds % 60, 
-                         train_iter, 
-                         train_epoch))
-    print(training_stats)
-    # Return dummy early_stopping_callback for compatibility
-    class DummyEarlyStopping:
-        stop_training = False
-    return DummyEarlyStopping(), train_iter, training_stats
+    elapsed = time.time() - start_time
+    print(f"Finished training in {elapsed:.2f} seconds with {train_iter} iterations.")
+    class DummyEarlyStopping: stop_training = False
+    return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
+
 
 def predict_torch_ffnn(model, test_ds, args):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

From c44a34bf4d81e54b80768cd521e73e7594581430 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 5 Jun 2025 19:17:32 +0200
Subject: [PATCH 11/31] Remove normalization, change training time
 visualization

---
 cipherTypeDetection/train.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index f8ca398..0bf3083 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -16,6 +16,7 @@
 import torch.nn as nn
 import torch.optim as optim
 from torchinfo import summary
+import numpy as np
 
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -67,9 +68,7 @@ def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
     def forward(self, x):
         return self.net(x)
 
-    
 def train_torch_ffnn(model, args, train_ds):
-    import numpy as np
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
 
@@ -85,7 +84,7 @@ def train_torch_ffnn(model, args, train_ds):
 
     best_val_acc = 0
     patience_counter = 0
-    patience_limit = 5
+    patience_limit = 250
 
     train_iter = 0
     train_epoch = 0
@@ -99,11 +98,6 @@ def train_torch_ffnn(model, args, train_ds):
                 stats_np = statistics.numpy()
                 labels_np = labels.numpy()
 
-                # --- Normalize the input features ---
-                mean = stats_np.mean(axis=0)
-                std = stats_np.std(axis=0) + 1e-8
-                stats_np = (stats_np - mean) / std
-
                 # --- Split into train/val ---
                 x_train, x_val, y_train, y_val = train_test_split(stats_np, labels_np, test_size=0.3)
 
@@ -148,7 +142,8 @@ def train_torch_ffnn(model, args, train_ds):
                     if patience_counter >= patience_limit:
                         print("Early stopping triggered.")
                         elapsed = time.time() - start_time
-                        print(f"Training time: {elapsed:.2f} seconds.")
+                        t = time.gmtime(elapsed)
+                        print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
                         class DummyEarlyStopping: stop_training = True
                         return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
 
@@ -159,7 +154,8 @@ class DummyEarlyStopping: stop_training = True
         train_epoch += 1
 
     elapsed = time.time() - start_time
-    print(f"Finished training in {elapsed:.2f} seconds with {train_iter} iterations.")
+    t = time.gmtime(elapsed)
+    print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
     class DummyEarlyStopping: stop_training = False
     return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
 
@@ -184,7 +180,7 @@ def predict_torch_ffnn(model, test_ds, args):
                 all_preds.append(preds)
                 all_labels.append(labels.numpy())
     # Concatenate all predictions and labels
-    import numpy as np
+
     all_preds = np.concatenate(all_preds, axis=0)
     all_labels = np.concatenate(all_labels, axis=0)
     return all_preds, all_labels

From ce170f4840a69956529927a5c6bb1b8f55e3b52a Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 5 Jun 2025 19:42:31 +0200
Subject: [PATCH 12/31] Split validation set just once per training

---
 cipherTypeDetection/train.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 0bf3083..3b31d6a 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -90,6 +90,10 @@ def train_torch_ffnn(model, args, train_ds):
     train_epoch = 0
     start_time = time.time()
 
+    # Variabales for validation data
+    val_data_created = False
+    x_val = y_val = None
+
     for epoch in range(args.epochs):
         while train_ds.iteration < args.max_iter:
             training_batches = next(train_ds)
@@ -98,13 +102,20 @@ def train_torch_ffnn(model, args, train_ds):
                 stats_np = statistics.numpy()
                 labels_np = labels.numpy()
 
-                # --- Split into train/val ---
-                x_train, x_val, y_train, y_val = train_test_split(stats_np, labels_np, test_size=0.3)
+                if not val_data_created:
+                    # Initial split: 70% train, 30% validation
+                    x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(
+                        stats_np, labels_np, test_size=0.3
+                    )
+                    x_val = torch.tensor(x_val_np, dtype=torch.float32).to(device)
+                    y_val = torch.tensor(y_val_np, dtype=torch.long).to(device)
+                    val_data_created = True
+                else:
+                    x_train_np = stats_np
+                    y_train_np = labels_np
 
-                x_train = torch.tensor(x_train, dtype=torch.float32).to(device)
-                y_train = torch.tensor(y_train, dtype=torch.long).to(device)
-                x_val = torch.tensor(x_val, dtype=torch.float32).to(device)
-                y_val = torch.tensor(y_val, dtype=torch.long).to(device)
+                x_train = torch.tensor(x_train_np, dtype=torch.float32).to(device)
+                y_train = torch.tensor(y_train_np, dtype=torch.long).to(device)
 
                 # --- Training step ---
                 optimizer.zero_grad()
@@ -160,6 +171,7 @@ class DummyEarlyStopping: stop_training = False
     return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
 
 
+
 def predict_torch_ffnn(model, test_ds, args):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()

From 5d2e46001345c9ed30c0d149e39c70d982f40318 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Mon, 9 Jun 2025 17:00:26 +0200
Subject: [PATCH 13/31] Add nomralization feature to training and evaluation
 processes

---
 cipherTypeDetection/eval.py  | 30 +++++++++++++++++++++++----
 cipherTypeDetection/train.py | 39 ++++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/cipherTypeDetection/eval.py b/cipherTypeDetection/eval.py
index 747eb49..1b2038d 100755
--- a/cipherTypeDetection/eval.py
+++ b/cipherTypeDetection/eval.py
@@ -124,12 +124,17 @@ def find_ciphertext_paths_in_dir(folder_path):
     print("Datasets loaded.\n")
 
     print('Evaluating model...')
+
     import time
     start_time = time.time()
     iteration = 0
     epoch = 0
     results = []
     prediction_metrics = PredictionPerformanceMetrics(model_name=architecture)
+
+    saved_mean = None
+    saved_std = None
+
     while dataset.iteration < args.max_iter:
         batches = next(dataset)
         
@@ -140,8 +145,23 @@ def find_ciphertext_paths_in_dir(folder_path):
                 if hasattr(model, "evaluate"):  # Keras model
                     results.append(model.evaluate(statistics, labels, batch_size=args.batch_size, verbose=1))
                 else:  # PyTorch model
-                    x = torch.tensor(statistics.numpy(), dtype=torch.float32)
+                    stats_np = statistics.numpy()
+
+                    # Normalization: solo al primo batch
+                    if saved_mean is None or saved_std is None:
+                        mean = stats_np.mean(axis=0)
+                        std = stats_np.std(axis=0) + 1e-8
+                        saved_mean = mean.copy()
+                        saved_std = std.copy()
+                    else:
+                        mean = saved_mean
+                        std = saved_std
+
+                    stats_np = (stats_np - mean) / std
+
+                    x = torch.tensor(stats_np, dtype=torch.float32)
                     y = torch.tensor(labels.numpy(), dtype=torch.long)
+
                     with torch.no_grad():
                         outputs = model(x)
                         loss = F.cross_entropy(outputs, y)
@@ -149,11 +169,13 @@ def find_ciphertext_paths_in_dir(folder_path):
                         acc = (top1 == y).float().mean()
 
                         # Calc top-3
-                        top3 = torch.topk(outputs, k=3, dim=1).indices  # shape: (batch_size, 3)
-                        y_expanded = y.unsqueeze(1).expand_as(top3)     # shape: (batch_size, 3)
+                        top3 = torch.topk(outputs, k=3, dim=1).indices
+                        y_expanded = y.unsqueeze(1).expand_as(top3)
                         k3_acc = (top3 == y_expanded).any(dim=1).float().mean()
+
                         results.append((loss.item(), acc.item(), k3_acc.item()))
 
+
             elif architecture in ("CNN", "LSTM", "Transformer"):
                 results.append(model.evaluate(ciphertexts, labels, batch_size=args.batch_size, verbose=1))
             elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN"):
@@ -423,7 +445,7 @@ def load_model(architecture, args, model_path, cipher_types):
         from cipherTypeDetection.train import TorchFFNN
 
         checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
-
+        
         model = TorchFFNN(
             input_size=checkpoint['input_size'],
             hidden_size=checkpoint['hidden_size'],
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 3b31d6a..7ab8097 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -94,6 +94,9 @@ def train_torch_ffnn(model, args, train_ds):
     val_data_created = False
     x_val = y_val = None
 
+    saved_mean = None
+    saved_std = None
+
     for epoch in range(args.epochs):
         while train_ds.iteration < args.max_iter:
             training_batches = next(train_ds)
@@ -102,6 +105,17 @@ def train_torch_ffnn(model, args, train_ds):
                 stats_np = statistics.numpy()
                 labels_np = labels.numpy()
 
+                # Normalization per batch → calculate mean and std for first batch
+                # and use them for all following batches.
+                if saved_mean is None or saved_std is None:
+                    mean = stats_np.mean(axis=0)
+                    std = stats_np.std(axis=0) + 1e-8
+                    saved_mean = mean.copy()
+                    saved_std = std.copy()
+                else:
+                    mean = saved_mean
+                    std = saved_std
+
                 if not val_data_created:
                     # Initial split: 70% train, 30% validation
                     x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(
@@ -177,26 +191,47 @@ def predict_torch_ffnn(model, test_ds, args):
     model.eval()
     model.to(device)
     criterion = nn.CrossEntropyLoss()
+
     all_preds = []
     all_labels = []
+
+    saved_mean = None
+    saved_std = None
+
     with torch.no_grad():
         while test_ds.iteration < args.max_iter:
             testing_batches = next(test_ds)
             for testing_batch in testing_batches:
                 statistics, labels = testing_batch.items()
-                x = torch.tensor(statistics.numpy(), dtype=torch.float32).to(device)
+                stats_np = statistics.numpy()
+
+                # Normalization per batch → calculate mean and std for first batch
+                # and use them for all following batches.
+                if saved_mean is None or saved_std is None:
+                    mean = stats_np.mean(axis=0)
+                    std = stats_np.std(axis=0) + 1e-8
+                    saved_mean = mean.copy()
+                    saved_std = std.copy()
+                else:
+                    mean = saved_mean
+                    std = saved_std
+
+                x = torch.tensor(stats_np, dtype=torch.float32).to(device)
                 y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
+
                 outputs = model(x)
                 loss = criterion(outputs, y)
+
                 preds = torch.softmax(outputs, dim=1).cpu().numpy()
                 all_preds.append(preds)
                 all_labels.append(labels.numpy())
-    # Concatenate all predictions and labels
 
     all_preds = np.concatenate(all_preds, axis=0)
     all_labels = np.concatenate(all_labels, axis=0)
+
     return all_preds, all_labels
 
+
 def str2bool(v):
     return v.lower() in ("yes", "true", "t", "1")
 

From 0e9cfbdd5e2e0e832e1ed8302adfd60225728f2f Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Fri, 4 Jul 2025 15:49:45 +0200
Subject: [PATCH 14/31] Fix optimizer and remove normalization feature

---
 cipherTypeDetection/eval.py  |  4 +-
 cipherTypeDetection/train.py | 84 ++++++++++++++++++++----------------
 2 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/cipherTypeDetection/eval.py b/cipherTypeDetection/eval.py
index 1b2038d..f2541a9 100755
--- a/cipherTypeDetection/eval.py
+++ b/cipherTypeDetection/eval.py
@@ -147,6 +147,7 @@ def find_ciphertext_paths_in_dir(folder_path):
                 else:  # PyTorch model
                     stats_np = statistics.numpy()
 
+                    """
                     # Normalization: solo al primo batch
                     if saved_mean is None or saved_std is None:
                         mean = stats_np.mean(axis=0)
@@ -158,7 +159,8 @@ def find_ciphertext_paths_in_dir(folder_path):
                         std = saved_std
 
                     stats_np = (stats_np - mean) / std
-
+                    """
+                    
                     x = torch.tensor(stats_np, dtype=torch.float32)
                     y = torch.tensor(labels.numpy(), dtype=torch.long)
 
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 7ab8097..771d6b7 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -17,6 +17,7 @@
 import torch.optim as optim
 from torchinfo import summary
 import numpy as np
+from torch.utils.data import TensorDataset, DataLoader
 
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -90,7 +91,6 @@ def train_torch_ffnn(model, args, train_ds):
     train_epoch = 0
     start_time = time.time()
 
-    # Variabales for validation data
     val_data_created = False
     x_val = y_val = None
 
@@ -105,8 +105,7 @@ def train_torch_ffnn(model, args, train_ds):
                 stats_np = statistics.numpy()
                 labels_np = labels.numpy()
 
-                # Normalization per batch → calculate mean and std for first batch
-                # and use them for all following batches.
+                """# Normalizzazione (prima batch)
                 if saved_mean is None or saved_std is None:
                     mean = stats_np.mean(axis=0)
                     std = stats_np.std(axis=0) + 1e-8
@@ -115,9 +114,11 @@ def train_torch_ffnn(model, args, train_ds):
                 else:
                     mean = saved_mean
                     std = saved_std
-
+                """
+                
+                #stats_np = (stats_np - mean) / std
+                
                 if not val_data_created:
-                    # Initial split: 70% train, 30% validation
                     x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(
                         stats_np, labels_np, test_size=0.3
                     )
@@ -128,17 +129,29 @@ def train_torch_ffnn(model, args, train_ds):
                     x_train_np = stats_np
                     y_train_np = labels_np
 
-                x_train = torch.tensor(x_train_np, dtype=torch.float32).to(device)
-                y_train = torch.tensor(y_train_np, dtype=torch.long).to(device)
+                # Use DataLoader for creating minibatch
+                x_train = torch.tensor(x_train_np, dtype=torch.float32)
+                y_train = torch.tensor(y_train_np, dtype=torch.long)
+
+                train_dataset = TensorDataset(x_train, y_train)
+                train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
 
-                # --- Training step ---
-                optimizer.zero_grad()
-                outputs = model(x_train)
-                loss = criterion(outputs, y_train)
-                loss.backward()
-                optimizer.step()
+                batch_losses = []
 
-                train_iter += len(y_train)
+                for x_batch, y_batch in train_loader:
+                    x_batch = x_batch.to(device)
+                    y_batch = y_batch.to(device)
+
+                    optimizer.zero_grad()
+                    outputs = model(x_batch)
+                    loss = criterion(outputs, y_batch)
+                    loss.backward()
+                    optimizer.step()
+
+                    batch_losses.append(loss.item())
+                    train_iter += len(y_batch)
+
+                epoch_loss = sum(batch_losses) / len(batch_losses)
 
                 # --- Validation step ---
                 model.eval()
@@ -148,13 +161,12 @@ def train_torch_ffnn(model, args, train_ds):
                     val_pred = torch.argmax(val_outputs, dim=1)
                     val_acc = (val_pred == y_val).float().mean().item()
 
-                    # top-3 accuracy
                     top3 = torch.topk(val_outputs, k=3, dim=1).indices
                     y_val_exp = y_val.unsqueeze(1).expand_as(top3)
                     val_k3 = (top3 == y_val_exp).any(dim=1).float().mean().item()
 
                 print(f"Epoch: {epoch+1}, Iteration: {train_iter}, "
-                      f"Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, "
+                      f"Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss.item():.4f}, "
                       f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
                 model.train()
 
@@ -185,7 +197,6 @@ class DummyEarlyStopping: stop_training = False
     return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
 
 
-
 def predict_torch_ffnn(model, test_ds, args):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
@@ -205,6 +216,7 @@ def predict_torch_ffnn(model, test_ds, args):
                 statistics, labels = testing_batch.items()
                 stats_np = statistics.numpy()
 
+                """"
                 # Normalization per batch → calculate mean and std for first batch
                 # and use them for all following batches.
                 if saved_mean is None or saved_std is None:
@@ -215,7 +227,8 @@ def predict_torch_ffnn(model, test_ds, args):
                 else:
                     mean = saved_mean
                     std = saved_std
-
+                """
+                
                 x = torch.tensor(stats_np, dtype=torch.float32).to(device)
                 y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
 
@@ -817,8 +830,6 @@ def train_model(model, strategy, args, train_ds):
     -------
     tuple
     """
-    if args.architecture == "FFNN" and isinstance(model, TorchFFNN):
-        return train_torch_ffnn(model, args, train_ds)
 
     checkpoints_dir = Path('../data/checkpoints')
     def delete_previous_checkpoints():
@@ -867,6 +878,9 @@ def create_checkpoint_callback():
     classes = list(range(len(config.CIPHER_TYPES)))
     should_create_validation_data = True
 
+    if args.architecture == "FFNN" and isinstance(model, TorchFFNN):
+        return train_torch_ffnn(model, args, train_ds)
+
     # Perform main training loop while the iterations don't exceed the user provided max_iter
     while train_ds.iteration < args.max_iter:
         training_batches = next(train_ds)
@@ -1114,22 +1128,7 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
     cntr = 0
     test_iter = 0
     test_epoch = 0
-
-    # PyTorch FFNN prediction
-    if architecture == "FFNN" and isinstance(model, TorchFFNN):
-        preds, labels = predict_torch_ffnn(model, test_ds, args)
-        # You may want to adapt this to your PredictionPerformanceMetrics usage:
-        prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
-        prediction_metrics[architecture].add_predictions(labels, preds)
-        for metrics in prediction_metrics.values():
-            metrics.print_evaluation()
-        elapsed_prediction_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
-        prediction_stats = 'Prediction time: %d days %d hours %d minutes %d seconds.' % (
-            elapsed_prediction_time.days, elapsed_prediction_time.seconds // 3600, 
-            (elapsed_prediction_time.seconds // 60) % 60,
-            elapsed_prediction_time.seconds % 60)
-        return prediction_stats
-
+        
     # Determine the number of iterations to use for evaluating the model
     prediction_dataset_factor = 10
     if early_stopping_callback.stop_training:
@@ -1198,6 +1197,19 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 prediction_metrics["RF"].add_predictions(labels, model[2].predict_proba(statistics))
                 prediction_metrics["SVM"].add_predictions(labels, model[3].predict_proba(statistics))
                 prediction_metrics["kNN"].add_predictions(labels, model[4].predict_proba(statistics))
+            elif architecture == "FFNN" and isinstance(model, TorchFFNN):
+                preds, labels = predict_torch_ffnn(model, test_ds, args)
+                # You may want to adapt this to your PredictionPerformanceMetrics usage:
+                prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
+                prediction_metrics[architecture].add_predictions(labels, preds)
+                for metrics in prediction_metrics.values():
+                    metrics.print_evaluation()
+                elapsed_prediction_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
+                prediction_stats = 'Prediction time: %d days %d hours %d minutes %d seconds.' % (
+                    elapsed_prediction_time.days, elapsed_prediction_time.seconds // 3600, 
+                    (elapsed_prediction_time.seconds // 60) % 60,
+                    elapsed_prediction_time.seconds % 60)
+                return prediction_stats
             else:
                 prediction = model.predict(statistics, batch_size=args.batch_size, verbose=1)
                 prediction_metrics[architecture].add_predictions(labels, prediction)

From e736c22ae9f16fbbdeba8f3903acc404ae8e6e5c Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Wed, 9 Jul 2025 14:07:34 +0200
Subject: [PATCH 15/31] Add torch library

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 9e688c7..3815935 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ tensorflow_datasets==4.9.4
 scikit_learn==1.4.0 # do not change this to be able to load old models!
 h5py==3.10.0
 pandas==2.2.0
+torch==2.7.0

From 58372b9744e13dce03813226c55f5af8af44f929 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Wed, 9 Jul 2025 14:50:58 +0200
Subject: [PATCH 16/31] Add torchinfo library

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 3815935..d5d7893 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ scikit_learn==1.4.0 # do not change this to be able to load old models!
 h5py==3.10.0
 pandas==2.2.0
 torch==2.7.0
+torchinfo==0.1.0

From fddaadfe050c45cbe19f227a30261987b412769f Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Sun, 20 Jul 2025 18:18:05 +0200
Subject: [PATCH 17/31] Add LSTM PyTorch implementation

---
 cipherTypeDetection/train.py | 275 +++++++++++++++++++++++++++--------
 1 file changed, 217 insertions(+), 58 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 771d6b7..b48de6a 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -14,6 +14,7 @@
 # PyTorch
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 import torch.optim as optim
 from torchinfo import summary
 import numpy as np
@@ -50,7 +51,7 @@
 for device in tf.config.list_physical_devices('GPU'):
     tf.config.experimental.set_memory_growth(device, True)
 
-class TorchFFNN(nn.Module):
+class FFNN(nn.Module):
     def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
         super().__init__()
 
@@ -69,6 +70,31 @@ def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
     def forward(self, x):
         return self.net(x)
 
+class LSTM(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
+        super().__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.num_layers = num_layers
+
+        self.lstm = nn.LSTM(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True
+        )
+        self.fc = nn.Linear(hidden_size, output_size)
+
+    def forward(self, x):
+        if x.dim() == 2:
+            x = x.unsqueeze(1)
+        x = x.float()
+        output, (hidden, _) = self.lstm(x)
+        logits = self.fc(hidden[-1])
+        return logits
+
+        
 def train_torch_ffnn(model, args, train_ds):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
@@ -94,9 +120,6 @@ def train_torch_ffnn(model, args, train_ds):
     val_data_created = False
     x_val = y_val = None
 
-    saved_mean = None
-    saved_std = None
-
     for epoch in range(args.epochs):
         while train_ds.iteration < args.max_iter:
             training_batches = next(train_ds)
@@ -104,19 +127,6 @@ def train_torch_ffnn(model, args, train_ds):
                 statistics, labels = training_batch.items()
                 stats_np = statistics.numpy()
                 labels_np = labels.numpy()
-
-                """# Normalizzazione (prima batch)
-                if saved_mean is None or saved_std is None:
-                    mean = stats_np.mean(axis=0)
-                    std = stats_np.std(axis=0) + 1e-8
-                    saved_mean = mean.copy()
-                    saved_std = std.copy()
-                else:
-                    mean = saved_mean
-                    std = saved_std
-                """
-                
-                #stats_np = (stats_np - mean) / std
                 
                 if not val_data_created:
                     x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(
@@ -197,6 +207,115 @@ class DummyEarlyStopping: stop_training = False
     return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
 
 
+def train_torch_lstm(model, args, train_ds):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+
+    optimizer = optim.Adam(
+        model.parameters(),
+        lr=config.learning_rate,
+        betas=(config.beta_1, config.beta_2),
+        eps=config.epsilon,
+        amsgrad=config.amsgrad
+    )
+    criterion = nn.CrossEntropyLoss()
+    model.train()
+
+    best_val_acc = 0
+    patience_counter = 0
+    patience_limit = 250
+
+    train_iter = 0
+    train_epoch = 0
+    start_time = time.time()
+
+    val_data_created = False
+    x_val = y_val = None
+
+    for epoch in range(args.epochs):
+        while train_ds.iteration < args.max_iter:
+            training_batches = next(train_ds)
+            for training_batch in training_batches:
+                statistics, labels = training_batch.items()
+                stats_np = statistics.numpy().astype(int)
+                labels_np = labels.numpy()
+
+                if not val_data_created:
+                    x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(stats_np, labels_np, test_size=0.3)
+                    x_val = torch.tensor(x_val_np, dtype=torch.long).to(device)
+                    y_val = torch.tensor(y_val_np, dtype=torch.long).to(device)
+                    val_data_created = True
+                else:
+                    x_train_np = stats_np
+                    y_train_np = labels_np
+
+                x_train = torch.tensor(x_train_np, dtype=torch.long)
+                y_train = torch.tensor(y_train_np, dtype=torch.long)
+
+                train_dataset = TensorDataset(x_train, y_train)
+                train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
+
+                batch_losses = []
+                for x_batch, y_batch in train_loader:
+                    x_batch = x_batch.to(device)
+                    y_batch = y_batch.to(device)
+
+                    optimizer.zero_grad()
+                    outputs = model(x_batch)
+                    loss = criterion(outputs, y_batch)
+                    loss.backward()
+                    optimizer.step()
+
+                    batch_losses.append(loss.item())
+                    train_iter += len(y_batch)
+
+                epoch_loss = sum(batch_losses) / len(batch_losses)
+                
+                # --- Validation step ---
+                model.eval()
+                with torch.no_grad():
+                    val_outputs = model(x_val)
+                    val_loss = criterion(val_outputs, y_val)
+                    val_pred = torch.argmax(val_outputs, dim=1)
+                    val_acc = (val_pred == y_val).float().mean().item()
+
+                    top3 = torch.topk(val_outputs, k=3, dim=1).indices
+                    y_val_exp = y_val.unsqueeze(1).expand_as(top3)
+                    val_k3 = (top3 == y_val_exp).any(dim=1).float().mean().item()
+
+                print(f"Epoch: {epoch+1}, Iteration: {train_iter}, "
+                      f"Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss.item():.4f}, "
+                      f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
+                model.train()
+
+                # --- Early stopping check ---
+                if val_acc > best_val_acc:
+                    best_val_acc = val_acc
+                    patience_counter = 0
+                else:
+                    patience_counter += 1
+                    if patience_counter >= patience_limit:
+                        print("Early stopping triggered.")
+                        elapsed = time.time() - start_time
+                        t = time.gmtime(elapsed)
+                        print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
+                        class DummyEarlyStopping: stop_training = True
+                        return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
+
+                if train_iter >= args.max_iter:
+                    break
+            if train_iter >= args.max_iter:
+                break
+        train_epoch += 1
+
+    elapsed = time.time() - start_time
+    t = time.gmtime(elapsed)
+    print(f"Finished {train_epoch} epochs in {t.tm_hour}h {t.tm_min}m {t.tm_sec}s")
+    class DummyEarlyStopping: stop_training = False
+    return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
+
+
+
 def predict_torch_ffnn(model, test_ds, args):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
@@ -206,28 +325,12 @@ def predict_torch_ffnn(model, test_ds, args):
     all_preds = []
     all_labels = []
 
-    saved_mean = None
-    saved_std = None
-
     with torch.no_grad():
         while test_ds.iteration < args.max_iter:
             testing_batches = next(test_ds)
             for testing_batch in testing_batches:
                 statistics, labels = testing_batch.items()
                 stats_np = statistics.numpy()
-
-                """"
-                # Normalization per batch → calculate mean and std for first batch
-                # and use them for all following batches.
-                if saved_mean is None or saved_std is None:
-                    mean = stats_np.mean(axis=0)
-                    std = stats_np.std(axis=0) + 1e-8
-                    saved_mean = mean.copy()
-                    saved_std = std.copy()
-                else:
-                    mean = saved_mean
-                    std = saved_std
-                """
                 
                 x = torch.tensor(stats_np, dtype=torch.float32).to(device)
                 y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
@@ -235,6 +338,49 @@ def predict_torch_ffnn(model, test_ds, args):
                 outputs = model(x)
                 loss = criterion(outputs, y)
 
+                pred_top1 = torch.argmax(outputs, dim=1)
+                acc = (pred_top1 == y).float().mean().item()
+
+                top3 = torch.topk(outputs, k=3, dim=1).indices
+                y_expanded = y.unsqueeze(1).expand_as(top3)
+                k3_acc = (top3 == y_expanded).any(dim=1).float().mean().item()
+
+                print(f"Eval → Loss: {loss.item():.4f}, Accuracy: {acc:.4f}, Top-3 Accuracy: {k3_acc:.4f}")
+
+                preds = torch.softmax(outputs, dim=1).cpu().numpy()
+                all_preds.append(preds)
+                all_labels.append(labels.numpy())
+
+    all_preds = np.concatenate(all_preds, axis=0)
+    all_labels = np.concatenate(all_labels, axis=0)
+
+    return all_preds, all_labels
+
+
+
+
+def predict_torch_lstm(model, test_ds, args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    model.to(device)
+    criterion = nn.CrossEntropyLoss()
+
+    all_preds = []
+    all_labels = []
+
+    with torch.no_grad():
+        while test_ds.iteration < args.max_iter:
+            testing_batches = next(test_ds)
+            for testing_batch in testing_batches:
+                statistics, labels = testing_batch.items()
+
+                stats_np = statistics.numpy().astype(int)  # input tokenizzati
+                x = torch.tensor(stats_np, dtype=torch.long).to(device)
+                y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
+
+                outputs = model(x)
+                loss = criterion(outputs, y)
+
                 preds = torch.softmax(outputs, dim=1).cpu().numpy()
                 all_preds.append(preds)
                 all_labels.append(labels.numpy())
@@ -245,6 +391,7 @@ def predict_torch_ffnn(model, test_ds, args):
     return all_preds, all_labels
 
 
+
 def str2bool(v):
     return v.lower() in ("yes", "true", "t", "1")
 
@@ -325,7 +472,7 @@ def create_model(architecture, extend_model, output_layer_size, max_train_len):
     # Create new model based on architecture
     if architecture == "FFNN":
         # Use PyTorch for FFNN
-        model = TorchFFNN(
+        model = FFNN(
             input_size=input_layer_size,
             hidden_size=hidden_layer_size,
             output_size=output_layer_size,
@@ -351,18 +498,14 @@ def create_model(architecture, extend_model, output_layer_size, max_train_len):
         return model
     
     elif architecture == "LSTM":
-        config.FEATURE_ENGINEERING = False
-        config.PAD_INPUT = True
-        model = tf.keras.Sequential()
-        model.add(tf.keras.layers.Embedding(56, 64, input_length=max_train_len))
-        # model_.add(tf.keras.layers.Dropout(0.2))
-        model.add(tf.keras.layers.LSTM(config.lstm_units))
-        # model_.add(tf.keras.layers.Dropout(0.2))
-        model.add(tf.keras.layers.Flatten())
-        model.add(tf.keras.layers.Dense(output_layer_size, activation='softmax'))
-        model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", 
-                    metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
+        model = LSTM(
+            input_size=input_layer_size,
+            hidden_size=config.lstm_units,
+            output_size=output_layer_size,
+            num_layers=1 # could this be hidden_layers?
+        )
         return model
+
     
     elif architecture == "DT":
         return DecisionTreeClassifier(criterion=config.criterion, ccp_alpha=config.ccp_alpha)
@@ -878,8 +1021,12 @@ def create_checkpoint_callback():
     classes = list(range(len(config.CIPHER_TYPES)))
     should_create_validation_data = True
 
-    if args.architecture == "FFNN" and isinstance(model, TorchFFNN):
+    if args.architecture == "FFNN" and isinstance(model, FFNN):
         return train_torch_ffnn(model, args, train_ds)
+    
+    elif args.architecture == "LSTM" and isinstance(model, LSTM):
+        return train_torch_lstm(model, args, train_ds)
+    
 
     # Perform main training loop while the iterations don't exceed the user provided max_iter
     while train_ds.iteration < args.max_iter:
@@ -1041,18 +1188,16 @@ def save_model(model, args):
 
     model_path = os.path.join(args.save_directory, model_name)
 
-    if architecture == "FFNN":
-        if isinstance(model, TorchFFNN):
-            torch.save({
-                'model_state_dict': model.state_dict(),
-                'input_size': model.input_size,
-                'hidden_size': model.hidden_size,
-                'output_size': model.output_size,
-                'num_hidden_layers': model.num_hidden_layers
-            }, model_path)
-
+    if architecture in ("FFNN", "LSTM"):
+        torch.save({
+            'model_state_dict': model.state_dict(),
+            'input_size': model.input_size,
+            'hidden_size': model.hidden_size,
+            'output_size': model.output_size,
+            'num_layers': model.num_layers if hasattr(model, 'num_layers') else model.num_hidden_layers
+        }, model_path)
 
-    elif architecture in ("CNN", "LSTM", "Transformer"):
+    elif architecture in ("CNN", "Transformer"):
         model.save(model_path)
 
     elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN", "SVM-Rotor"):
@@ -1197,7 +1342,8 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 prediction_metrics["RF"].add_predictions(labels, model[2].predict_proba(statistics))
                 prediction_metrics["SVM"].add_predictions(labels, model[3].predict_proba(statistics))
                 prediction_metrics["kNN"].add_predictions(labels, model[4].predict_proba(statistics))
-            elif architecture == "FFNN" and isinstance(model, TorchFFNN):
+                
+            elif architecture == "FFNN" and isinstance(model, FFNN):
                 preds, labels = predict_torch_ffnn(model, test_ds, args)
                 # You may want to adapt this to your PredictionPerformanceMetrics usage:
                 prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
@@ -1210,6 +1356,19 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                     (elapsed_prediction_time.seconds // 60) % 60,
                     elapsed_prediction_time.seconds % 60)
                 return prediction_stats
+            
+            elif architecture == "LSTM" and isinstance(model, LSTM):
+                preds, labels = predict_torch_lstm(model, test_ds, args)
+                prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
+                prediction_metrics[architecture].add_predictions(labels, preds)
+                for metrics in prediction_metrics.values():
+                    metrics.print_evaluation()
+                elapsed_prediction_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
+                prediction_stats = 'Prediction time: %d days %d hours %d minutes %d seconds.' % (
+                    elapsed_prediction_time.days, elapsed_prediction_time.seconds // 3600, 
+                    (elapsed_prediction_time.seconds // 60) % 60,
+                    elapsed_prediction_time.seconds % 60)
+                return prediction_stats
             else:
                 prediction = model.predict(statistics, batch_size=args.batch_size, verbose=1)
                 prediction_metrics[architecture].add_predictions(labels, prediction)

From bf97781cebe0a04c239a8d91342ba9379828de0c Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Mon, 21 Jul 2025 13:08:16 +0200
Subject: [PATCH 18/31] Remove early stopping callback for PyTorch FFNN

---
 cipherTypeDetection/train.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index b48de6a..931a74a 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -288,6 +288,7 @@ def train_torch_lstm(model, args, train_ds):
                       f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
                 model.train()
 
+                """
                 # --- Early stopping check ---
                 if val_acc > best_val_acc:
                     best_val_acc = val_acc
@@ -301,7 +302,8 @@ def train_torch_lstm(model, args, train_ds):
                         print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
                         class DummyEarlyStopping: stop_training = True
                         return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
-
+                """
+                
                 if train_iter >= args.max_iter:
                     break
             if train_iter >= args.max_iter:

From fc27fdf31087df8f2706b86a6f2ae507875c7864 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Mon, 21 Jul 2025 14:07:44 +0200
Subject: [PATCH 19/31] Remove early stopping callback for PyTorch FFNN

---
 cipherTypeDetection/train.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 931a74a..1cc6ffc 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -180,6 +180,7 @@ def train_torch_ffnn(model, args, train_ds):
                       f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
                 model.train()
 
+                """
                 # --- Early stopping check ---
                 if val_acc > best_val_acc:
                     best_val_acc = val_acc
@@ -193,7 +194,8 @@ def train_torch_ffnn(model, args, train_ds):
                         print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
                         class DummyEarlyStopping: stop_training = True
                         return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
-
+                """
+                
                 if train_iter >= args.max_iter:
                     break
             if train_iter >= args.max_iter:

From 570508411451cdde8117221f4e473327ce3996fd Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 31 Jul 2025 15:56:10 +0200
Subject: [PATCH 20/31] Add attempt to fix LSTM architecture

---
 cipherTypeDetection/train.py | 68 ++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 1cc6ffc..5434828 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -70,30 +70,44 @@ def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
     def forward(self, x):
         return self.net(x)
 
+
 class LSTM(nn.Module):
-    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
+    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, num_layers=1, dropout=0.0):
         super().__init__()
-        self.input_size = input_size
+
+        # saves parameters so that they can be saved and loaded later
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
         self.hidden_size = hidden_size
         self.output_size = output_size
         self.num_layers = num_layers
+        self.dropout = dropout
 
+        # Layers
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size,
+            embedding_dim=embed_dim,
+            padding_idx=0
+        )
         self.lstm = nn.LSTM(
-            input_size=input_size,
+            input_size=embed_dim,
             hidden_size=hidden_size,
             num_layers=num_layers,
-            batch_first=True
+            batch_first=True,
+            dropout=dropout if num_layers > 1 else 0.0
         )
         self.fc = nn.Linear(hidden_size, output_size)
 
     def forward(self, x):
-        if x.dim() == 2:
-            x = x.unsqueeze(1)
-        x = x.float()
-        output, (hidden, _) = self.lstm(x)
-        logits = self.fc(hidden[-1])
+        # x: LongTensor of shape [B, L] or [B, L, 1]
+        if x.dim()==3 and x.size(2)==1:
+            x = x.squeeze(2)                     # remove channel dim → [B, L]
+        emb     = self.embedding(x)              # → [B, L, D]
+        output, (hidden, _) = self.lstm(emb)     # hidden: [num_layers, B, H]
+        logits  = self.fc(hidden[-1])            # take last layer’s hidden state → [B, C]
         return logits
 
+
         
 def train_torch_ffnn(model, args, train_ds):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -419,7 +433,12 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
             if hasattr(model, "summary"):
                 model.summary()
             else:
-                summary(model, input_size=(1, 724))
+                # for LSTM use a LongTensor dummy input of shape (1, max_train_len)
+                if architecture == "LSTM":
+                    summary(model, input_size=(1, max_train_len), dtypes=[torch.long])
+                else:
+                    summary(model, input_size=(1, 724))
+
     else:
         print("Only one GPU found.")
         strategy = NullDistributionStrategy()
@@ -430,7 +449,12 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
             if hasattr(model, "summary"):
                 model.summary()
             else:
-                summary(model, input_size=(1, 724))
+                # for LSTM use a LongTensor dummy input of shape (1, max_train_len)
+                if architecture == "LSTM":
+                    summary(model, input_size=(1, max_train_len), dtypes=[torch.long])
+                else:
+                    summary(model, input_size=(1, 724))
+
 
     print('Model created.\n')
     return model, strategy
@@ -502,11 +526,15 @@ def create_model(architecture, extend_model, output_layer_size, max_train_len):
         return model
     
     elif architecture == "LSTM":
+        config.FEATURE_ENGINEERING = False
+        config.PAD_INPUT = True
         model = LSTM(
-            input_size=input_layer_size,
+            vocab_size=56,
+            embed_dim=64,
             hidden_size=config.lstm_units,
             output_size=output_layer_size,
-            num_layers=1 # could this be hidden_layers?
+            num_layers=1,
+            dropout=0.0
         )
         return model
 
@@ -1195,10 +1223,13 @@ def save_model(model, args):
     if architecture in ("FFNN", "LSTM"):
         torch.save({
             'model_state_dict': model.state_dict(),
-            'input_size': model.input_size,
+            # FFNN use input_size, but LSTM use vocab/embed/hidden...
+            **({'input_size': model.input_size} if architecture=="FFNN" else {}),
+            **({'vocab_size': model.vocab_size, 'embed_dim': model.embed_dim} if architecture=="LSTM" else {}),
             'hidden_size': model.hidden_size,
             'output_size': model.output_size,
-            'num_layers': model.num_layers if hasattr(model, 'num_layers') else model.num_hidden_layers
+            'num_layers': model.num_layers,
+            **({'dropout': model.dropout} if architecture=="LSTM" else {}),
         }, model_path)
 
     elif architecture in ("CNN", "Transformer"):
@@ -1505,10 +1536,9 @@ def main():
     output_layer_size = max([config.CIPHER_TYPES.index(type) for type in cipher_types]) + 1
 
     # Create a model and allow for distributed training on multi-GPU machines
-    model, strategy = create_model_with_distribution_strategy(architecture, 
-                                                    extend_model, 
-                                                    output_layer_size=output_layer_size, 
-                                                    max_train_len=args.max_train_len)
+    model, strategy = create_model_with_distribution_strategy(
+    architecture, extend_model, output_layer_size=output_layer_size, max_train_len=args.max_train_len)
+
     
     early_stopping_callback, train_iter, training_stats = train_model(model, strategy, 
                                                                       args, train_ds)

From 1396dabcb93bcdaef4d006483985ea4ae41f4ab0 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Fri, 1 Aug 2025 10:50:20 +0200
Subject: [PATCH 21/31] Add comments on LSTM Class for clarity

---
 cipherTypeDetection/train.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 5434828..4b1b87f 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -98,13 +98,31 @@ def __init__(self, vocab_size, embed_dim, hidden_size, output_size, num_layers=1
         )
         self.fc = nn.Linear(hidden_size, output_size)
 
+    # B: Batch size           – number of sequences processed in parallel
+    # L: Sequence length      – number of time steps (tokens) in each sequence
+    # D: Embedding dimension  – size of each token’s embedding vector
+    # H: Hidden size          – number of features in the LSTM hidden state
+    # C: Number of classes    – dimensionality of the output logits
+    
     def forward(self, x):
         # x: LongTensor of shape [B, L] or [B, L, 1]
-        if x.dim()==3 and x.size(2)==1:
-            x = x.squeeze(2)                     # remove channel dim → [B, L]
-        emb     = self.embedding(x)              # → [B, L, D]
-        output, (hidden, _) = self.lstm(emb)     # hidden: [num_layers, B, H]
-        logits  = self.fc(hidden[-1])            # take last layer’s hidden state → [B, C]
+        if x.dim() == 3 and x.size(2) == 1:
+            x = x.squeeze(2)                  # remove channel dimension → [B, L]
+
+        emb = self.embedding(x)              # embeddings → [B, L, D]
+
+        # LSTM returns:
+        # - output: hidden state at each time step → [B, L, H]
+        # - hidden: final hidden state for each layer → [num_layers, B, H]
+        output, (hidden, _) = self.lstm(emb)
+
+        # hidden[-1] selects the final hidden state of the top (last) layer 
+        # at the last time step → [B, H]
+        last_hidden = hidden[-1]
+
+        # apply the fully-connected layer to get logits → [B, C]
+        logits = self.fc(last_hidden)
+
         return logits
 
 

From 13fa14b292ee5f8301f7f0282534d3c1b89d16ae Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 7 Aug 2025 21:57:22 +0200
Subject: [PATCH 22/31] Add shellscript for benchmarks

---
 cipherTypeDetection/benchmark_torch.sh | 23 +++++++++++++++++++
 cipherTypeDetection/train.py           | 31 +++++++++++++++++---------
 2 files changed, 43 insertions(+), 11 deletions(-)
 create mode 100644 cipherTypeDetection/benchmark_torch.sh

diff --git a/cipherTypeDetection/benchmark_torch.sh b/cipherTypeDetection/benchmark_torch.sh
new file mode 100644
index 0000000..3186b28
--- /dev/null
+++ b/cipherTypeDetection/benchmark_torch.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+BASE_DIR=benchmark_logs/torch
+mkdir -p "$BASE_DIR"
+
+COMMON_ARGS="--download_dataset=False --plaintext_input_directory=../data/gutenberg_en --train_dataset_size=244 --batch_size=128 --min_train_len=100 --max_train_len=1000 --min_test_len=100 --max_test_len=1000 --max_iter=1000 --epochs=1 --ciphers=all"
+
+# Function to run the benchmark for a given architecture
+run_benchmark () {
+  ARCH=$1
+  for i in {1..3}; do
+    RUN_DIR="$BASE_DIR/${ARCH}_run_$i"
+    mkdir -p "$RUN_DIR"
+    echo "Launching $ARCH run $i..."
+    python train.py --architecture=$ARCH $COMMON_ARGS --model_name=${ARCH}_run_$i.pth > "$RUN_DIR/train_log.txt" 2>&1
+  done
+}
+
+# FFNN
+run_benchmark FFNN
+
+# LSTM
+run_benchmark LSTM
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 4b1b87f..fbcc039 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -114,6 +114,7 @@ def forward(self, x):
         # LSTM returns:
         # - output: hidden state at each time step → [B, L, H]
         # - hidden: final hidden state for each layer → [num_layers, B, H]
+        # not used as we only need the last hidden state, but can be useful for debugging
         output, (hidden, _) = self.lstm(emb)
 
         # hidden[-1] selects the final hidden state of the top (last) layer 
@@ -1239,16 +1240,23 @@ def save_model(model, args):
     model_path = os.path.join(args.save_directory, model_name)
 
     if architecture in ("FFNN", "LSTM"):
-        torch.save({
+        state_dict = {
             'model_state_dict': model.state_dict(),
-            # FFNN use input_size, but LSTM use vocab/embed/hidden...
-            **({'input_size': model.input_size} if architecture=="FFNN" else {}),
-            **({'vocab_size': model.vocab_size, 'embed_dim': model.embed_dim} if architecture=="LSTM" else {}),
             'hidden_size': model.hidden_size,
             'output_size': model.output_size,
-            'num_layers': model.num_layers,
-            **({'dropout': model.dropout} if architecture=="LSTM" else {}),
-        }, model_path)
+        }
+
+        if architecture == "FFNN":
+            state_dict['input_size'] = model.input_size
+            state_dict['num_hidden_layers'] = model.num_hidden_layers
+        elif architecture == "LSTM":
+            state_dict['vocab_size'] = model.vocab_size
+            state_dict['embed_dim'] = model.embed_dim
+            state_dict['num_layers'] = model.num_layers
+            state_dict['dropout'] = model.dropout
+
+        torch.save(state_dict, model_path)
+
 
     elif architecture in ("CNN", "Transformer"):
         model.save(model_path)
@@ -1266,13 +1274,14 @@ def save_model(model, args):
         for index, name in enumerate(["dt", "et", "rf", "svm", "knn"]):
             with open('../data/models/' + model_path.split('.')[0] + f"_{name}.h5", "wb") as f:
                 pickle.dump(model[index], f)
-
-    # Salvataggio parametri
+    
+    """
+    # Saving parameters
     with open('../data/' + model_path.split('.')[0] + '_parameters.txt', 'w') as f:
         for arg in vars(args):
             f.write("{:23s}= {:s}\n".format(arg, str(getattr(args, arg))))
 
-    # Gestione logs
+    # Managing logs
     if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
         logs_destination = '../data/' + model_name.split('.')[0] + '_tensorboard_logs'
         try:
@@ -1282,7 +1291,7 @@ def save_model(model, args):
                 shutil.move('../data/logs', logs_destination)
         except Exception:
             print(f"Could not move logs from '../data/logs' to '{logs_destination}'.")
-
+    """
     print('Model saved.\n')
 
 

From 92790b9eac9c266e3c3b51a4ffa03824372b5416 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Thu, 7 Aug 2025 22:49:01 +0200
Subject: [PATCH 23/31] Fix benchmark

---
 cipherTypeDetection/benchmark_torch.sh | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/cipherTypeDetection/benchmark_torch.sh b/cipherTypeDetection/benchmark_torch.sh
index 3186b28..d64a210 100644
--- a/cipherTypeDetection/benchmark_torch.sh
+++ b/cipherTypeDetection/benchmark_torch.sh
@@ -1,23 +1,39 @@
 #!/bin/bash
 
-BASE_DIR=benchmark_logs/torch
+BASE_DIR=benchmark_logs
 mkdir -p "$BASE_DIR"
 
-COMMON_ARGS="--download_dataset=False --plaintext_input_directory=../data/gutenberg_en --train_dataset_size=244 --batch_size=128 --min_train_len=100 --max_train_len=1000 --min_test_len=100 --max_test_len=1000 --max_iter=1000 --epochs=1 --ciphers=all"
+COMMON_ARGS="--download_dataset=False \
+    --plaintext_input_directory=../data/gutenberg_en \
+    --rotor_input_directory=../data/rotor_ciphertexts \
+    --train_dataset_size=244 \
+    --batch_size=128 \
+    --max_iter=1000 \
+    --min_train_len=100 \
+    --max_train_len=1000 \
+    --min_test_len=100 \
+    --max_test_len=1000 \
+    --ciphers=all"
 
-# Function to run the benchmark for a given architecture
 run_benchmark () {
   ARCH=$1
   for i in {1..3}; do
     RUN_DIR="$BASE_DIR/${ARCH}_run_$i"
     mkdir -p "$RUN_DIR"
     echo "Launching $ARCH run $i..."
-    python train.py --architecture=$ARCH $COMMON_ARGS --model_name=${ARCH}_run_$i.pth > "$RUN_DIR/train_log.txt" 2>&1
+    python train.py \
+      --architecture=$ARCH \
+      $COMMON_ARGS \
+      --save_directory="$RUN_DIR" \
+      --model_name="${ARCH}_run_$i.pth" \
+      > "$RUN_DIR/${ARCH}_var_10000000_run_${i}_${DATE}.txt" \
+      2> "$RUN_DIR/err_${ARCH}_var_10000000_run_${i}_${DATE}.txt"
   done
 }
 
 # FFNN
 run_benchmark FFNN
 
+
 # LSTM
 run_benchmark LSTM

From 6608a6e127507c24f60b0aa200a4591e5e41e9a5 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Fri, 8 Aug 2025 15:35:47 +0200
Subject: [PATCH 24/31] Add

---
 cipherTypeDetection/benchmark_torch.sh | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cipherTypeDetection/benchmark_torch.sh b/cipherTypeDetection/benchmark_torch.sh
index d64a210..3200a23 100644
--- a/cipherTypeDetection/benchmark_torch.sh
+++ b/cipherTypeDetection/benchmark_torch.sh
@@ -1,18 +1,22 @@
 #!/bin/bash
 
-BASE_DIR=benchmark_logs
+BASE_DIR=output
 mkdir -p "$BASE_DIR"
 
+DATE=$(date "+%Y%m%d")
+
 COMMON_ARGS="--download_dataset=False \
     --plaintext_input_directory=../data/gutenberg_en \
     --rotor_input_directory=../data/rotor_ciphertexts \
-    --train_dataset_size=244 \
-    --batch_size=128 \
-    --max_iter=1000 \
+    --train_dataset_size=976 \
+    --dataset_workers=16 \
+    --batch_size=64 \
+    --max_iter=10000000 \
     --min_train_len=100 \
     --max_train_len=1000 \
     --min_test_len=100 \
     --max_test_len=1000 \
+    --epochs=1 \
     --ciphers=all"
 
 run_benchmark () {

From d1b8cad7abec772b856144c0d815e3cb970a79df Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Mon, 6 Oct 2025 09:33:54 +0200
Subject: [PATCH 25/31] Add benchmark Keras script

---
 cipherTypeDetection/benchmark_keras.sh | 39 ++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 cipherTypeDetection/benchmark_keras.sh

diff --git a/cipherTypeDetection/benchmark_keras.sh b/cipherTypeDetection/benchmark_keras.sh
new file mode 100644
index 0000000..ca4d8c4
--- /dev/null
+++ b/cipherTypeDetection/benchmark_keras.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+BASE_DIR=benchmark_logs
+mkdir -p "$BASE_DIR"
+
+COMMON_ARGS="--download_dataset=False \
+    --plaintext_input_directory=../data/gutenberg_en \
+    --rotor_input_directory=../data/rotor_ciphertexts \
+    --train_dataset_size=244 \
+    --batch_size=128 \
+    --max_iter=1000 \
+    --min_train_len=100 \
+    --max_train_len=1000 \
+    --min_test_len=100 \
+    --max_test_len=1000 \
+    --epochs=1 \
+    --ciphers=all"
+
+run_benchmark () {
+  ARCH=$1
+  for i in {1..3}; do
+    RUN_DIR="$BASE_DIR/${ARCH}_run_$i"
+    mkdir -p "$RUN_DIR"
+    echo "Launching $ARCH run $i..."
+    python train.py \
+      --architecture=$ARCH \
+      $COMMON_ARGS \
+      --save_directory="$RUN_DIR" \
+      --model_name="${ARCH}_run_$i.h5" \
+      > "$RUN_DIR/${ARCH}_var_10000000_run_${i}_${DATE}.txt" \
+      2> "$RUN_DIR/err_${ARCH}_var_10000000_run_${i}_${DATE}.txt"
+  done
+}
+
+# FFNN
+run_benchmark FFNN
+
+# LSTM
+run_benchmark LSTM

From 45073a1103fcae4c2ce423c4690ffc8143f260d2 Mon Sep 17 00:00:00 2001
From: Stefano Sala <stefano.sala022@gmail.com>
Date: Wed, 26 Nov 2025 11:31:52 +0100
Subject: [PATCH 26/31] combine train and prediction functions in a single one

---
 cipherTypeDetection/benchmark_keras.sh |  39 ------
 cipherTypeDetection/benchmark_torch.sh |  43 ------
 cipherTypeDetection/eval.py            |  25 +---
 cipherTypeDetection/train.py           | 180 +++----------------------
 4 files changed, 21 insertions(+), 266 deletions(-)
 delete mode 100644 cipherTypeDetection/benchmark_keras.sh
 delete mode 100644 cipherTypeDetection/benchmark_torch.sh

diff --git a/cipherTypeDetection/benchmark_keras.sh b/cipherTypeDetection/benchmark_keras.sh
deleted file mode 100644
index ca4d8c4..0000000
--- a/cipherTypeDetection/benchmark_keras.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-BASE_DIR=benchmark_logs
-mkdir -p "$BASE_DIR"
-
-COMMON_ARGS="--download_dataset=False \
-    --plaintext_input_directory=../data/gutenberg_en \
-    --rotor_input_directory=../data/rotor_ciphertexts \
-    --train_dataset_size=244 \
-    --batch_size=128 \
-    --max_iter=1000 \
-    --min_train_len=100 \
-    --max_train_len=1000 \
-    --min_test_len=100 \
-    --max_test_len=1000 \
-    --epochs=1 \
-    --ciphers=all"
-
-run_benchmark () {
-  ARCH=$1
-  for i in {1..3}; do
-    RUN_DIR="$BASE_DIR/${ARCH}_run_$i"
-    mkdir -p "$RUN_DIR"
-    echo "Launching $ARCH run $i..."
-    python train.py \
-      --architecture=$ARCH \
-      $COMMON_ARGS \
-      --save_directory="$RUN_DIR" \
-      --model_name="${ARCH}_run_$i.h5" \
-      > "$RUN_DIR/${ARCH}_var_10000000_run_${i}_${DATE}.txt" \
-      2> "$RUN_DIR/err_${ARCH}_var_10000000_run_${i}_${DATE}.txt"
-  done
-}
-
-# FFNN
-run_benchmark FFNN
-
-# LSTM
-run_benchmark LSTM
diff --git a/cipherTypeDetection/benchmark_torch.sh b/cipherTypeDetection/benchmark_torch.sh
deleted file mode 100644
index 3200a23..0000000
--- a/cipherTypeDetection/benchmark_torch.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-BASE_DIR=output
-mkdir -p "$BASE_DIR"
-
-DATE=$(date "+%Y%m%d")
-
-COMMON_ARGS="--download_dataset=False \
-    --plaintext_input_directory=../data/gutenberg_en \
-    --rotor_input_directory=../data/rotor_ciphertexts \
-    --train_dataset_size=976 \
-    --dataset_workers=16 \
-    --batch_size=64 \
-    --max_iter=10000000 \
-    --min_train_len=100 \
-    --max_train_len=1000 \
-    --min_test_len=100 \
-    --max_test_len=1000 \
-    --epochs=1 \
-    --ciphers=all"
-
-run_benchmark () {
-  ARCH=$1
-  for i in {1..3}; do
-    RUN_DIR="$BASE_DIR/${ARCH}_run_$i"
-    mkdir -p "$RUN_DIR"
-    echo "Launching $ARCH run $i..."
-    python train.py \
-      --architecture=$ARCH \
-      $COMMON_ARGS \
-      --save_directory="$RUN_DIR" \
-      --model_name="${ARCH}_run_$i.pth" \
-      > "$RUN_DIR/${ARCH}_var_10000000_run_${i}_${DATE}.txt" \
-      2> "$RUN_DIR/err_${ARCH}_var_10000000_run_${i}_${DATE}.txt"
-  done
-}
-
-# FFNN
-run_benchmark FFNN
-
-
-# LSTM
-run_benchmark LSTM
diff --git a/cipherTypeDetection/eval.py b/cipherTypeDetection/eval.py
index f2541a9..aacb61a 100755
--- a/cipherTypeDetection/eval.py
+++ b/cipherTypeDetection/eval.py
@@ -10,8 +10,7 @@
 from datetime import datetime
 
 import torch
-import torch.nn.functional as F
-import torch.optim as optim
+from cipherTypeDetection.train import FFNN
 
 # This environ variable must be set before all tensorflow imports!
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
@@ -132,9 +131,6 @@ def find_ciphertext_paths_in_dir(folder_path):
     results = []
     prediction_metrics = PredictionPerformanceMetrics(model_name=architecture)
 
-    saved_mean = None
-    saved_std = None
-
     while dataset.iteration < args.max_iter:
         batches = next(dataset)
         
@@ -146,20 +142,6 @@ def find_ciphertext_paths_in_dir(folder_path):
                     results.append(model.evaluate(statistics, labels, batch_size=args.batch_size, verbose=1))
                 else:  # PyTorch model
                     stats_np = statistics.numpy()
-
-                    """
-                    # Normalization: solo al primo batch
-                    if saved_mean is None or saved_std is None:
-                        mean = stats_np.mean(axis=0)
-                        std = stats_np.std(axis=0) + 1e-8
-                        saved_mean = mean.copy()
-                        saved_std = std.copy()
-                    else:
-                        mean = saved_mean
-                        std = saved_std
-
-                    stats_np = (stats_np - mean) / std
-                    """
                     
                     x = torch.tensor(stats_np, dtype=torch.float32)
                     y = torch.tensor(labels.numpy(), dtype=torch.long)
@@ -444,11 +426,10 @@ def load_model(architecture, args, model_path, cipher_types):
     model = None
 
     if architecture == "FFNN" and model_path.endswith(".pth"):
-        from cipherTypeDetection.train import TorchFFNN
 
         checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
         
-        model = TorchFFNN(
+        model = FFNN(
             input_size=checkpoint['input_size'],
             hidden_size=checkpoint['hidden_size'],
             output_size=checkpoint['output_size'],
@@ -687,7 +668,7 @@ def main():
     print("Model Loaded.")
 
     # Model is now always an ensemble
-    #architecture = "Ensemble"
+    architecture = "Ensemble"
 
     # the program was started as in benchmark mode.
     if args.download_dataset is not None:
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index fbcc039..6fdea78 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -14,7 +14,6 @@
 # PyTorch
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 import torch.optim as optim
 from torchinfo import summary
 import numpy as np
@@ -126,9 +125,8 @@ def forward(self, x):
 
         return logits
 
-
-        
-def train_torch_ffnn(model, args, train_ds):
+      
+def train_torch(model, args, train_ds):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
 
@@ -158,7 +156,10 @@ def train_torch_ffnn(model, args, train_ds):
             training_batches = next(train_ds)
             for training_batch in training_batches:
                 statistics, labels = training_batch.items()
-                stats_np = statistics.numpy()
+                if args.architecture == "LSTM":
+                    stats_np = statistics.numpy().astype(int)
+                else:
+                    stats_np = statistics.numpy()
                 labels_np = labels.numpy()
                 
                 if not val_data_created:
@@ -213,7 +214,6 @@ def train_torch_ffnn(model, args, train_ds):
                       f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
                 model.train()
 
-                """
                 # --- Early stopping check ---
                 if val_acc > best_val_acc:
                     best_val_acc = val_acc
@@ -227,7 +227,6 @@ def train_torch_ffnn(model, args, train_ds):
                         print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
                         class DummyEarlyStopping: stop_training = True
                         return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
-                """
                 
                 if train_iter >= args.max_iter:
                     break
@@ -242,118 +241,7 @@ class DummyEarlyStopping: stop_training = False
     return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
 
 
-def train_torch_lstm(model, args, train_ds):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.to(device)
-
-    optimizer = optim.Adam(
-        model.parameters(),
-        lr=config.learning_rate,
-        betas=(config.beta_1, config.beta_2),
-        eps=config.epsilon,
-        amsgrad=config.amsgrad
-    )
-    criterion = nn.CrossEntropyLoss()
-    model.train()
-
-    best_val_acc = 0
-    patience_counter = 0
-    patience_limit = 250
-
-    train_iter = 0
-    train_epoch = 0
-    start_time = time.time()
-
-    val_data_created = False
-    x_val = y_val = None
-
-    for epoch in range(args.epochs):
-        while train_ds.iteration < args.max_iter:
-            training_batches = next(train_ds)
-            for training_batch in training_batches:
-                statistics, labels = training_batch.items()
-                stats_np = statistics.numpy().astype(int)
-                labels_np = labels.numpy()
-
-                if not val_data_created:
-                    x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(stats_np, labels_np, test_size=0.3)
-                    x_val = torch.tensor(x_val_np, dtype=torch.long).to(device)
-                    y_val = torch.tensor(y_val_np, dtype=torch.long).to(device)
-                    val_data_created = True
-                else:
-                    x_train_np = stats_np
-                    y_train_np = labels_np
-
-                x_train = torch.tensor(x_train_np, dtype=torch.long)
-                y_train = torch.tensor(y_train_np, dtype=torch.long)
-
-                train_dataset = TensorDataset(x_train, y_train)
-                train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
-
-                batch_losses = []
-                for x_batch, y_batch in train_loader:
-                    x_batch = x_batch.to(device)
-                    y_batch = y_batch.to(device)
-
-                    optimizer.zero_grad()
-                    outputs = model(x_batch)
-                    loss = criterion(outputs, y_batch)
-                    loss.backward()
-                    optimizer.step()
-
-                    batch_losses.append(loss.item())
-                    train_iter += len(y_batch)
-
-                epoch_loss = sum(batch_losses) / len(batch_losses)
-                
-                # --- Validation step ---
-                model.eval()
-                with torch.no_grad():
-                    val_outputs = model(x_val)
-                    val_loss = criterion(val_outputs, y_val)
-                    val_pred = torch.argmax(val_outputs, dim=1)
-                    val_acc = (val_pred == y_val).float().mean().item()
-
-                    top3 = torch.topk(val_outputs, k=3, dim=1).indices
-                    y_val_exp = y_val.unsqueeze(1).expand_as(top3)
-                    val_k3 = (top3 == y_val_exp).any(dim=1).float().mean().item()
-
-                print(f"Epoch: {epoch+1}, Iteration: {train_iter}, "
-                      f"Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss.item():.4f}, "
-                      f"Val Acc: {val_acc:.4f}, Val Top-3 Acc: {val_k3:.4f}")
-                model.train()
-
-                """
-                # --- Early stopping check ---
-                if val_acc > best_val_acc:
-                    best_val_acc = val_acc
-                    patience_counter = 0
-                else:
-                    patience_counter += 1
-                    if patience_counter >= patience_limit:
-                        print("Early stopping triggered.")
-                        elapsed = time.time() - start_time
-                        t = time.gmtime(elapsed)
-                        print(f"Finished training in {t.tm_yday - 1} days {t.tm_hour} hours {t.tm_min} minutes {t.tm_sec} seconds with {train_iter} iterations.")
-                        class DummyEarlyStopping: stop_training = True
-                        return DummyEarlyStopping(), train_iter, f"Early stopped at epoch {epoch+1}"
-                """
-                
-                if train_iter >= args.max_iter:
-                    break
-            if train_iter >= args.max_iter:
-                break
-        train_epoch += 1
-
-    elapsed = time.time() - start_time
-    t = time.gmtime(elapsed)
-    print(f"Finished {train_epoch} epochs in {t.tm_hour}h {t.tm_min}m {t.tm_sec}s")
-    class DummyEarlyStopping: stop_training = False
-    return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
-
-
-
-def predict_torch_ffnn(model, test_ds, args):
+def predict_torch(model, test_ds, args):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
     model.to(device)
@@ -367,8 +255,11 @@ def predict_torch_ffnn(model, test_ds, args):
             testing_batches = next(test_ds)
             for testing_batch in testing_batches:
                 statistics, labels = testing_batch.items()
-                stats_np = statistics.numpy()
-                
+                if args.architecture == "LSTM":
+                    stats_np = statistics.numpy().astype(int)
+                else:
+                    stats_np = statistics.numpy()
+
                 x = torch.tensor(stats_np, dtype=torch.float32).to(device)
                 y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
 
@@ -394,41 +285,6 @@ def predict_torch_ffnn(model, test_ds, args):
     return all_preds, all_labels
 
 
-
-
-def predict_torch_lstm(model, test_ds, args):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.eval()
-    model.to(device)
-    criterion = nn.CrossEntropyLoss()
-
-    all_preds = []
-    all_labels = []
-
-    with torch.no_grad():
-        while test_ds.iteration < args.max_iter:
-            testing_batches = next(test_ds)
-            for testing_batch in testing_batches:
-                statistics, labels = testing_batch.items()
-
-                stats_np = statistics.numpy().astype(int)  # input tokenizzati
-                x = torch.tensor(stats_np, dtype=torch.long).to(device)
-                y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
-
-                outputs = model(x)
-                loss = criterion(outputs, y)
-
-                preds = torch.softmax(outputs, dim=1).cpu().numpy()
-                all_preds.append(preds)
-                all_labels.append(labels.numpy())
-
-    all_preds = np.concatenate(all_preds, axis=0)
-    all_labels = np.concatenate(all_labels, axis=0)
-
-    return all_preds, all_labels
-
-
-
 def str2bool(v):
     return v.lower() in ("yes", "true", "t", "1")
 
@@ -667,7 +523,7 @@ def parse_arguments():
                              'When interrupting, the current model is \n'
                              'saved as interrupted_...')
     parser.add_argument('--model_name', default='m.h5', type=str,
-                        help='Name of the output model file. The file must \nhave the .h5 extension.')
+                        help='Name of the output model file. The file must \nhave the .h5 or .pth extension.')
     parser.add_argument('--ciphers', default='all', type=str,
                         help='A comma seperated list of the ciphers to be created.\n'
                              'Be careful to not use spaces or use \' to define the string.\n'
@@ -1073,10 +929,10 @@ def create_checkpoint_callback():
     should_create_validation_data = True
 
     if args.architecture == "FFNN" and isinstance(model, FFNN):
-        return train_torch_ffnn(model, args, train_ds)
+        return train_torch(model, args, train_ds)
     
     elif args.architecture == "LSTM" and isinstance(model, LSTM):
-        return train_torch_lstm(model, args, train_ds)
+        return train_torch(model, args, train_ds)
     
 
     # Perform main training loop while the iterations don't exceed the user provided max_iter
@@ -1406,7 +1262,7 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 prediction_metrics["kNN"].add_predictions(labels, model[4].predict_proba(statistics))
                 
             elif architecture == "FFNN" and isinstance(model, FFNN):
-                preds, labels = predict_torch_ffnn(model, test_ds, args)
+                preds, labels = predict_torch(model, test_ds, args)
                 # You may want to adapt this to your PredictionPerformanceMetrics usage:
                 prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
                 prediction_metrics[architecture].add_predictions(labels, preds)
@@ -1420,7 +1276,7 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 return prediction_stats
             
             elif architecture == "LSTM" and isinstance(model, LSTM):
-                preds, labels = predict_torch_lstm(model, test_ds, args)
+                preds, labels = predict_torch(model, test_ds, args)
                 prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
                 prediction_metrics[architecture].add_predictions(labels, preds)
                 for metrics in prediction_metrics.values():
@@ -1520,7 +1376,7 @@ def main():
 
     # Validate inputs
     if os.path.splitext(args.model_name)[1] not in ('.h5', '.pth'):
-        print('ERROR: The model must have extension ".h5" (for Keras) or ".pth" (for PyTorch FFNN).', file=sys.stderr)
+        print('ERROR: The model must have extension ".h5" (for Keras) or ".pth" (for PyTorch).', file=sys.stderr)
         sys.exit(1)
 
     if extend_model is not None:

From 003c986af91a5769b4dcbe9d720bd0cb58f5a27b Mon Sep 17 00:00:00 2001
From: MaikBastian <2962185+MaikBastian@users.noreply.github.com>
Date: Fri, 12 Dec 2025 13:58:22 +0100
Subject: [PATCH 27/31] Move FFNN and LSTM definitions into their own file

---
 cipherTypeDetection/eval.py        |  3 +-
 cipherTypeDetection/models/ffnn.py | 24 +++++++++
 cipherTypeDetection/models/lstm.py | 60 ++++++++++++++++++++++
 cipherTypeDetection/train.py       | 80 ++----------------------------
 4 files changed, 90 insertions(+), 77 deletions(-)
 create mode 100644 cipherTypeDetection/models/ffnn.py
 create mode 100644 cipherTypeDetection/models/lstm.py

diff --git a/cipherTypeDetection/eval.py b/cipherTypeDetection/eval.py
index aacb61a..b391880 100755
--- a/cipherTypeDetection/eval.py
+++ b/cipherTypeDetection/eval.py
@@ -10,7 +10,6 @@
 from datetime import datetime
 
 import torch
-from cipherTypeDetection.train import FFNN
 
 # This environ variable must be set before all tensorflow imports!
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
@@ -29,6 +28,8 @@
 from cipherTypeDetection.transformer import MultiHeadSelfAttention, TransformerBlock, TokenAndPositionEmbedding
 from util.utils import get_model_input_length
 from cipherImplementations.cipher import OUTPUT_ALPHABET, UNKNOWN_SYMBOL_NUMBER
+from cipherTypeDetection.models.ffnn import FFNN
+from cipherTypeDetection.models.lstm import LSTM
 tf.debugging.set_log_device_placement(enabled=False)
 # always flush after print as some architectures like RF need very long time before printing anything.
 print = functools.partial(print, flush=True)
diff --git a/cipherTypeDetection/models/ffnn.py b/cipherTypeDetection/models/ffnn.py
new file mode 100644
index 0000000..9dac6c2
--- /dev/null
+++ b/cipherTypeDetection/models/ffnn.py
@@ -0,0 +1,24 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class FFNN(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
+        super().__init__()
+
+        # saves parameters so that they can be saved and loaded later
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.num_hidden_layers = num_hidden_layers
+
+        layers = [nn.Linear(input_size, hidden_size), nn.ReLU()]
+        for _ in range(num_hidden_layers - 1):
+            layers += [nn.Linear(hidden_size, hidden_size), nn.ReLU()]
+        layers.append(nn.Linear(hidden_size, output_size))
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
diff --git a/cipherTypeDetection/models/lstm.py b/cipherTypeDetection/models/lstm.py
new file mode 100644
index 0000000..9e67187
--- /dev/null
+++ b/cipherTypeDetection/models/lstm.py
@@ -0,0 +1,60 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class LSTM(nn.Module):
+    def __init__(
+        self, vocab_size, embed_dim, hidden_size, output_size, num_layers=1, dropout=0.0
+    ):
+        super().__init__()
+
+        # saves parameters so that they can be saved and loaded later
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.num_layers = num_layers
+        self.dropout = dropout
+
+        # Layers
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size, embedding_dim=embed_dim, padding_idx=0
+        )
+        self.lstm = nn.LSTM(
+            input_size=embed_dim,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout if num_layers > 1 else 0.0,
+        )
+        self.fc = nn.Linear(hidden_size, output_size)
+
+    # B: Batch size           – number of sequences processed in parallel
+    # L: Sequence length      – number of time steps (tokens) in each sequence
+    # D: Embedding dimension  – size of each token’s embedding vector
+    # H: Hidden size          – number of features in the LSTM hidden state
+    # C: Number of classes    – dimensionality of the output logits
+
+    def forward(self, x):
+        # x: LongTensor of shape [B, L] or [B, L, 1]
+        if x.dim() == 3 and x.size(2) == 1:
+            x = x.squeeze(2)  # remove channel dimension → [B, L]
+
+        emb = self.embedding(x)  # embeddings → [B, L, D]
+
+        # LSTM returns:
+        # - output: hidden state at each time step → [B, L, H]
+        # - hidden: final hidden state for each layer → [num_layers, B, H]
+        # not used as we only need the last hidden state, but can be useful for debugging
+        output, (hidden, _) = self.lstm(emb)
+
+        # hidden[-1] selects the final hidden state of the top (last) layer
+        # at the last time step → [B, H]
+        last_hidden = hidden[-1]
+
+        # apply the fully-connected layer to get logits → [B, C]
+        logits = self.fc(last_hidden)
+
+        return logits
+
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 6fdea78..83830d0 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -44,87 +44,15 @@
 from cipherTypeDetection.miniBatchEarlyStoppingCallback import MiniBatchEarlyStopping
 from cipherTypeDetection.transformer import TransformerBlock, TokenAndPositionEmbedding
 from cipherTypeDetection.learningRateSchedulers import TimeBasedDecayLearningRateScheduler, CustomStepDecayLearningRateScheduler
+from cipherTypeDetection.models.ffnn import FFNN
+from cipherTypeDetection.models.lstm import LSTM
+from cipherTypeDetection.config import Backend
+
 tf.debugging.set_log_device_placement(enabled=False)
 # always flush after print as some architectures like RF need very long time before printing anything.
 print = functools.partial(print, flush=True)
 for device in tf.config.list_physical_devices('GPU'):
     tf.config.experimental.set_memory_growth(device, True)
-
-class FFNN(nn.Module):
-    def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
-        super().__init__()
-
-        # saves parameters so that they can be saved and loaded later
-        self.input_size = input_size
-        self.hidden_size = hidden_size
-        self.output_size = output_size
-        self.num_hidden_layers = num_hidden_layers
-
-        layers = [nn.Linear(input_size, hidden_size), nn.ReLU()]
-        for _ in range(num_hidden_layers - 1):
-            layers += [nn.Linear(hidden_size, hidden_size), nn.ReLU()]
-        layers.append(nn.Linear(hidden_size, output_size))
-        self.net = nn.Sequential(*layers)
-
-    def forward(self, x):
-        return self.net(x)
-
-
-class LSTM(nn.Module):
-    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, num_layers=1, dropout=0.0):
-        super().__init__()
-
-        # saves parameters so that they can be saved and loaded later
-        self.vocab_size = vocab_size
-        self.embed_dim = embed_dim
-        self.hidden_size = hidden_size
-        self.output_size = output_size
-        self.num_layers = num_layers
-        self.dropout = dropout
-
-        # Layers
-        self.embedding = nn.Embedding(
-            num_embeddings=vocab_size,
-            embedding_dim=embed_dim,
-            padding_idx=0
-        )
-        self.lstm = nn.LSTM(
-            input_size=embed_dim,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout if num_layers > 1 else 0.0
-        )
-        self.fc = nn.Linear(hidden_size, output_size)
-
-    # B: Batch size           – number of sequences processed in parallel
-    # L: Sequence length      – number of time steps (tokens) in each sequence
-    # D: Embedding dimension  – size of each token’s embedding vector
-    # H: Hidden size          – number of features in the LSTM hidden state
-    # C: Number of classes    – dimensionality of the output logits
-    
-    def forward(self, x):
-        # x: LongTensor of shape [B, L] or [B, L, 1]
-        if x.dim() == 3 and x.size(2) == 1:
-            x = x.squeeze(2)                  # remove channel dimension → [B, L]
-
-        emb = self.embedding(x)              # embeddings → [B, L, D]
-
-        # LSTM returns:
-        # - output: hidden state at each time step → [B, L, H]
-        # - hidden: final hidden state for each layer → [num_layers, B, H]
-        # not used as we only need the last hidden state, but can be useful for debugging
-        output, (hidden, _) = self.lstm(emb)
-
-        # hidden[-1] selects the final hidden state of the top (last) layer 
-        # at the last time step → [B, H]
-        last_hidden = hidden[-1]
-
-        # apply the fully-connected layer to get logits → [B, C]
-        logits = self.fc(last_hidden)
-
-        return logits
-
       
 def train_torch(model, args, train_ds):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

From f1ee9f2d6c356fec4a3cbf4049769d67f20e47d5 Mon Sep 17 00:00:00 2001
From: MaikBastian <2962185+MaikBastian@users.noreply.github.com>
Date: Fri, 12 Dec 2025 14:04:19 +0100
Subject: [PATCH 28/31] Integrate `train_torch` and `predict_torch` functions
 more cleanly

---
 cipherTypeDetection/config.py |   8 +++
 cipherTypeDetection/train.py  | 123 ++++++++++++++--------------------
 2 files changed, 58 insertions(+), 73 deletions(-)

diff --git a/cipherTypeDetection/config.py b/cipherTypeDetection/config.py
index 5a3ea4e..70b77ab 100755
--- a/cipherTypeDetection/config.py
+++ b/cipherTypeDetection/config.py
@@ -1,3 +1,5 @@
+from enum import Enum
+
 from cipherImplementations.cipher import INPUT_ALPHABET, UNKNOWN_SYMBOL, UNKNOWN_SYMBOL_NUMBER
 from cipherImplementations.simpleSubstitution import SimpleSubstitution
 from cipherImplementations.hill import Hill
@@ -192,3 +194,9 @@
 # LearningRateSchedulers
 decay = 1e-8
 drop = 0.1
+
+class Backend(Enum):
+    """Differentiate between the Keras and PyTorch backend for model training."""
+    KERAS = 0
+    PYTORCH = 1
+
diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index 83830d0..cf046e2 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -54,7 +54,7 @@
 for device in tf.config.list_physical_devices('GPU'):
     tf.config.experimental.set_memory_growth(device, True)
       
-def train_torch(model, args, train_ds):
+def train_torch(model, args, train_ds, feature_engineering):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
 
@@ -84,25 +84,28 @@ def train_torch(model, args, train_ds):
             training_batches = next(train_ds)
             for training_batch in training_batches:
                 statistics, labels = training_batch.items()
-                if args.architecture == "LSTM":
-                    stats_np = statistics.numpy().astype(int)
-                else:
-                    stats_np = statistics.numpy()
-                labels_np = labels.numpy()
+                statistics = statistics.numpy()
+                labels = labels.numpy()
+                if not feature_engineering:
+                    statistics = statistics.astype(int)
                 
                 if not val_data_created:
                     x_train_np, x_val_np, y_train_np, y_val_np = train_test_split(
-                        stats_np, labels_np, test_size=0.3
+                        statistics, labels, test_size=0.3
                     )
                     x_val = torch.tensor(x_val_np, dtype=torch.float32).to(device)
+                    if not feature_engineering:
+                        x_val = x_val.int()
                     y_val = torch.tensor(y_val_np, dtype=torch.long).to(device)
                     val_data_created = True
                 else:
-                    x_train_np = stats_np
-                    y_train_np = labels_np
+                    x_train_np = statistics
+                    y_train_np = labels
 
                 # Use DataLoader for creating minibatch
                 x_train = torch.tensor(x_train_np, dtype=torch.float32)
+                if not feature_engineering:
+                    x_train = x_train.int()
                 y_train = torch.tensor(y_train_np, dtype=torch.long)
 
                 train_dataset = TensorDataset(x_train, y_train)
@@ -169,48 +172,35 @@ class DummyEarlyStopping: stop_training = False
     return DummyEarlyStopping(), train_iter, f"Trained for {train_epoch} epochs"
 
 
-def predict_torch(model, test_ds, args):
+def predict_torch(model, args, statistics, labels, feature_engineering):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
     model.to(device)
     criterion = nn.CrossEntropyLoss()
 
-    all_preds = []
-    all_labels = []
-
     with torch.no_grad():
-        while test_ds.iteration < args.max_iter:
-            testing_batches = next(test_ds)
-            for testing_batch in testing_batches:
-                statistics, labels = testing_batch.items()
-                if args.architecture == "LSTM":
-                    stats_np = statistics.numpy().astype(int)
-                else:
-                    stats_np = statistics.numpy()
-
-                x = torch.tensor(stats_np, dtype=torch.float32).to(device)
-                y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
+        statistics = statistics.numpy()
+        x = torch.tensor(statistics, dtype=torch.float32).to(device)
+        if not feature_engineering:
+            x = x.int()
 
-                outputs = model(x)
-                loss = criterion(outputs, y)
+        y = torch.tensor(labels.numpy(), dtype=torch.long).to(device)
 
-                pred_top1 = torch.argmax(outputs, dim=1)
-                acc = (pred_top1 == y).float().mean().item()
+        outputs = model(x)
+        loss = criterion(outputs, y)
 
-                top3 = torch.topk(outputs, k=3, dim=1).indices
-                y_expanded = y.unsqueeze(1).expand_as(top3)
-                k3_acc = (top3 == y_expanded).any(dim=1).float().mean().item()
+        pred_top1 = torch.argmax(outputs, dim=1)
+        acc = (pred_top1 == y).float().mean().item()
 
-                print(f"Eval → Loss: {loss.item():.4f}, Accuracy: {acc:.4f}, Top-3 Accuracy: {k3_acc:.4f}")
+        top3 = torch.topk(outputs, k=3, dim=1).indices
+        y_expanded = y.unsqueeze(1).expand_as(top3)
+        k3_acc = (top3 == y_expanded).any(dim=1).float().mean().item()
 
-                preds = torch.softmax(outputs, dim=1).cpu().numpy()
-                all_preds.append(preds)
-                all_labels.append(labels.numpy())
+        print(f"Eval → Loss: {loss.item():.4f}, Accuracy: {acc:.4f}, Top-3 Accuracy: {k3_acc:.4f}")
 
-    all_preds = np.concatenate(all_preds, axis=0)
-    all_labels = np.concatenate(all_labels, axis=0)
+        preds = torch.softmax(outputs, dim=1).cpu().numpy()
 
-    return all_preds, all_labels
+        return preds, labels.numpy()
 
 
 def str2bool(v):
@@ -856,13 +846,6 @@ def create_checkpoint_callback():
     classes = list(range(len(config.CIPHER_TYPES)))
     should_create_validation_data = True
 
-    if args.architecture == "FFNN" and isinstance(model, FFNN):
-        return train_torch(model, args, train_ds)
-    
-    elif args.architecture == "LSTM" and isinstance(model, LSTM):
-        return train_torch(model, args, train_ds)
-    
-
     # Perform main training loop while the iterations don't exceed the user provided max_iter
     while train_ds.iteration < args.max_iter:
         training_batches = next(train_ds)
@@ -1188,33 +1171,12 @@ def predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
                 prediction_metrics["RF"].add_predictions(labels, model[2].predict_proba(statistics))
                 prediction_metrics["SVM"].add_predictions(labels, model[3].predict_proba(statistics))
                 prediction_metrics["kNN"].add_predictions(labels, model[4].predict_proba(statistics))
-                
-            elif architecture == "FFNN" and isinstance(model, FFNN):
-                preds, labels = predict_torch(model, test_ds, args)
-                # You may want to adapt this to your PredictionPerformanceMetrics usage:
-                prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
-                prediction_metrics[architecture].add_predictions(labels, preds)
-                for metrics in prediction_metrics.values():
-                    metrics.print_evaluation()
-                elapsed_prediction_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
-                prediction_stats = 'Prediction time: %d days %d hours %d minutes %d seconds.' % (
-                    elapsed_prediction_time.days, elapsed_prediction_time.seconds // 3600, 
-                    (elapsed_prediction_time.seconds // 60) % 60,
-                    elapsed_prediction_time.seconds % 60)
-                return prediction_stats
-            
-            elif architecture == "LSTM" and isinstance(model, LSTM):
-                preds, labels = predict_torch(model, test_ds, args)
-                prediction_metrics = {architecture: PredictionPerformanceMetrics(model_name=architecture)}
-                prediction_metrics[architecture].add_predictions(labels, preds)
-                for metrics in prediction_metrics.values():
-                    metrics.print_evaluation()
-                elapsed_prediction_time = datetime.fromtimestamp(time.time()) - datetime.fromtimestamp(start_time)
-                prediction_stats = 'Prediction time: %d days %d hours %d minutes %d seconds.' % (
-                    elapsed_prediction_time.days, elapsed_prediction_time.seconds // 3600, 
-                    (elapsed_prediction_time.seconds // 60) % 60,
-                    elapsed_prediction_time.seconds % 60)
-                return prediction_stats
+            elif architecture == "FFNN":
+                prediction, labels = predict_torch(model, args, statistics, labels, feature_engineering=True)
+                prediction_metrics[architecture].add_predictions(labels, prediction)
+            elif architecture == "LSTM":
+                prediction, labels = predict_torch(model, args, statistics, labels, feature_engineering=False)
+                prediction_metrics[architecture].add_predictions(labels, prediction)
             else:
                 prediction = model.predict(statistics, batch_size=args.batch_size, verbose=1)
                 prediction_metrics[architecture].add_predictions(labels, prediction)
@@ -1302,10 +1264,19 @@ def main():
     architecture = args.architecture
     extend_model = args.extend_model
 
+    backend = Backend.KERAS
+    if architecture == "FFNN" or architecture == "LSTM":
+        backend = Backend.PYTORCH
+
     # Validate inputs
     if os.path.splitext(args.model_name)[1] not in ('.h5', '.pth'):
         print('ERROR: The model must have extension ".h5" (for Keras) or ".pth" (for PyTorch).', file=sys.stderr)
         sys.exit(1)
+    
+    if backend == Backend.PYTORCH and os.path.splitext(args.model_name)[1] != ".pth":
+        print("ERROR: PyTorch models must have .pth file extension.")
+        sys.exit(1)
+
 
     if extend_model is not None:
         if architecture not in ('FFNN', 'CNN', 'LSTM'):
@@ -1351,9 +1322,15 @@ def main():
     architecture, extend_model, output_layer_size=output_layer_size, max_train_len=args.max_train_len)
 
     
-    early_stopping_callback, train_iter, training_stats = train_model(model, strategy, 
+    if backend == Backend.KERAS:
+        early_stopping_callback, train_iter, training_stats = train_model(model, strategy, 
                                                                       args, train_ds)
     save_model(model, args)
+    elif backend == Backend.PYTORCH:
+        early_stopping_callback, train_iter, training_stats = train_torch(model, args, train_ds, config.FEATURE_ENGINEERING)
+    else:
+        raise ValueError(f"Unkown backend: {backend}")
+    
     prediction_stats = predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
     
     print(training_stats)

From 8649cde4327241d5d165287bb23989c4206ded27 Mon Sep 17 00:00:00 2001
From: MaikBastian <2962185+MaikBastian@users.noreply.github.com>
Date: Fri, 12 Dec 2025 14:08:22 +0100
Subject: [PATCH 29/31] Clean model summary and saving code

---
 cipherTypeDetection/train.py | 72 +++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/cipherTypeDetection/train.py b/cipherTypeDetection/train.py
index cf046e2..f117d22 100755
--- a/cipherTypeDetection/train.py
+++ b/cipherTypeDetection/train.py
@@ -207,7 +207,19 @@ def str2bool(v):
     return v.lower() in ("yes", "true", "t", "1")
 
 
-def create_model_with_distribution_strategy(architecture, extend_model, output_layer_size, max_train_len):
+def print_model_summary(architecture, model, backend, max_train_len):
+    if backend == Backend.KERAS:
+        model.summary()
+    elif backend == Backend.PYTORCH:
+        # for LSTM use a LongTensor dummy input of shape (1, max_train_len)
+        if architecture == "LSTM":
+            summary(model, input_size=(1, max_train_len), dtypes=[torch.long])
+        else:
+            summary(model, input_size=(1, 724))
+    else:
+        raise ValueError(f"Unknown backend {backend}")
+
+def create_model_with_distribution_strategy(architecture, backend, extend_model, output_layer_size, max_train_len):
     """Creates models depending on the GPU count and on extend_model"""
     print('Creating model...')
 
@@ -223,14 +235,7 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
                 extend_model = tf.keras.models.load_model(extend_model, compile=False)
             model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-            if hasattr(model, "summary"):
-                model.summary()
-            else:
-                # for LSTM use a LongTensor dummy input of shape (1, max_train_len)
-                if architecture == "LSTM":
-                    summary(model, input_size=(1, max_train_len), dtypes=[torch.long])
-                else:
-                    summary(model, input_size=(1, 724))
+            print_model_summary(architecture, model, backend, max_train_len)
 
     else:
         print("Only one GPU found.")
@@ -239,14 +244,7 @@ def create_model_with_distribution_strategy(architecture, extend_model, output_l
             extend_model = tf.keras.models.load_model(extend_model, compile=False)
         model = create_model(architecture, extend_model, output_layer_size, max_train_len)
         if architecture in ("FFNN", "CNN", "LSTM", "Transformer") and extend_model is None:
-            if hasattr(model, "summary"):
-                model.summary()
-            else:
-                # for LSTM use a LongTensor dummy input of shape (1, max_train_len)
-                if architecture == "LSTM":
-                    summary(model, input_size=(1, max_train_len), dtypes=[torch.long])
-                else:
-                    summary(model, input_size=(1, 724))
+            print_model_summary(architecture, model, backend, max_train_len)
 
 
     print('Model created.\n')
@@ -440,8 +438,10 @@ def parse_arguments():
                         help='Directory for saving generated models. \n'
                              'When interrupting, the current model is \n'
                              'saved as interrupted_...')
-    parser.add_argument('--model_name', default='m.h5', type=str,
-                        help='Name of the output model file. The file must \nhave the .h5 or .pth extension.')
+    parser.add_argument('--model_name', type=str,
+                        help='Name of the output model file. The file must \n'
+                             'have the .h5 or .pth extension for Keras models or \n'
+                             'PyTorch models, respectively.')
     parser.add_argument('--ciphers', default='all', type=str,
                         help='A comma seperated list of the ciphers to be created.\n'
                              'Be careful to not use spaces or use \' to define the string.\n'
@@ -983,7 +983,7 @@ def create_checkpoint_callback():
     print(training_stats)
     return early_stopping_callback, train_iter, training_stats
         
-def save_model(model, args):
+def save_model(model, args, backend):
     """Writes the model and the commandline arguments to disk."""
     print('Saving model...')
     architecture = args.architecture
@@ -991,18 +991,9 @@ def save_model(model, args):
     if not os.path.exists(args.save_directory):
         os.mkdir(args.save_directory)
 
-    # Gestione nome modello
-    if args.model_name == 'm.h5':
-        i = 1
-        base_name = args.model_name.split('.')[0]
-        extension = '.pth' if architecture == "FFNN" else '.h5'
-        while os.path.exists(os.path.join(args.save_directory, base_name + str(i) + extension)):
-            i += 1
-        model_name = base_name + str(i) + extension
-    else:
-        model_name = args.model_name
-        if architecture == "FFNN":
-            model_name = model_name.replace('.h5', '.pth')
+    model_name = args.model_name
+    if backend == Backend.PYTORCH and not model_name.endswith(".pth"):
+        model_name = model_name + '.pth'
 
     model_path = os.path.join(args.save_directory, model_name)
 
@@ -1030,26 +1021,29 @@ def save_model(model, args):
 
     elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN", "SVM-Rotor"):
         with open(model_path, "wb") as f:
+            # this gets very large
             pickle.dump(model, f)
 
     elif architecture == "[FFNN,NB]":
         model[0].save('../data/models/' + model_path.split('.')[0] + "_ffnn.h5")
         with open('../data/models/' + model_path.split('.')[0] + "_nb.h5", "wb") as f:
+            # this gets very large
             pickle.dump(model[1], f)
 
     elif architecture == "[DT,ET,RF,SVM,kNN]":
         for index, name in enumerate(["dt", "et", "rf", "svm", "knn"]):
+            # TODO: Are these files actually in the h5 format? Probably not!
             with open('../data/models/' + model_path.split('.')[0] + f"_{name}.h5", "wb") as f:
+                # this gets very large
                 pickle.dump(model[index], f)
     
-    """
-    # Saving parameters
+    # Write user provided commandline arguments into model path
     with open('../data/' + model_path.split('.')[0] + '_parameters.txt', 'w') as f:
         for arg in vars(args):
             f.write("{:23s}= {:s}\n".format(arg, str(getattr(args, arg))))
 
     # Managing logs
-    if architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
+    if architecture in ("CNN", "Transformer"):
         logs_destination = '../data/' + model_name.split('.')[0] + '_tensorboard_logs'
         try:
             if os.path.exists('../data/logs'):
@@ -1058,7 +1052,7 @@ def save_model(model, args):
                 shutil.move('../data/logs', logs_destination)
         except Exception:
             print(f"Could not move logs from '../data/logs' to '{logs_destination}'.")
-    """
+
     print('Model saved.\n')
 
 
@@ -1279,7 +1273,7 @@ def main():
 
 
     if extend_model is not None:
-        if architecture not in ('FFNN', 'CNN', 'LSTM'):
+        if architecture not in ('CNN'):
             print('ERROR: Models with the architecture %s can not be extended!' % architecture,
                   file=sys.stderr)
             sys.exit(1)
@@ -1319,18 +1313,18 @@ def main():
 
     # Create a model and allow for distributed training on multi-GPU machines
     model, strategy = create_model_with_distribution_strategy(
-    architecture, extend_model, output_layer_size=output_layer_size, max_train_len=args.max_train_len)
+    architecture, backend, extend_model, output_layer_size=output_layer_size, max_train_len=args.max_train_len)
 
     
     if backend == Backend.KERAS:
         early_stopping_callback, train_iter, training_stats = train_model(model, strategy, 
                                                                       args, train_ds)
-    save_model(model, args)
     elif backend == Backend.PYTORCH:
         early_stopping_callback, train_iter, training_stats = train_torch(model, args, train_ds, config.FEATURE_ENGINEERING)
     else:
         raise ValueError(f"Unkown backend: {backend}")
     
+    save_model(model, args, backend)
     prediction_stats = predict_test_data(test_ds, model, args, early_stopping_callback, train_iter)
     
     print(training_stats)

From fcb7246f303fd79adab00606150341d66459a2bc Mon Sep 17 00:00:00 2001
From: MaikBastian <2962185+MaikBastian@users.noreply.github.com>
Date: Fri, 12 Dec 2025 14:10:17 +0100
Subject: [PATCH 30/31] Enable evaluation of the PyTorch LSTM and ensembles
 with PyTorch models

---
 cipherTypeDetection/ensembleModel.py          |  91 +++++++++----
 cipherTypeDetection/eval.py                   | 124 +++++++++++-------
 cipherTypeDetection/models/ffnn.py            |  12 ++
 cipherTypeDetection/models/lstm.py            |  12 ++
 .../rotorDifferentiationEnsemble.py           |  14 +-
 5 files changed, 179 insertions(+), 74 deletions(-)

diff --git a/cipherTypeDetection/ensembleModel.py b/cipherTypeDetection/ensembleModel.py
index 990f5fb..5ef1ef4 100644
--- a/cipherTypeDetection/ensembleModel.py
+++ b/cipherTypeDetection/ensembleModel.py
@@ -1,4 +1,5 @@
 import tensorflow as tf
+import torch
 import pickle
 import numpy as np
 from tensorflow.keras.optimizers import Adam
@@ -7,6 +8,9 @@
 import cipherTypeDetection.config as config
 from cipherTypeDetection.transformer import MultiHeadSelfAttention, TransformerBlock, TokenAndPositionEmbedding
 from cipherImplementations.cipher import OUTPUT_ALPHABET
+from cipherTypeDetection.config import Backend
+from cipherTypeDetection.models.ffnn import FFNN
+from cipherTypeDetection.models.lstm import LSTM
 from util.utils import get_model_input_length
 
 
@@ -37,9 +41,14 @@
 mcc_nb = 0.5294535259111087
 # Cohen's Kappa is not used as these values are almost the same like MCC.
 
+class ModelMetadata:
+    def __init__(self, path, architecture, backend):
+        self.path = path
+        self.architecture = architecture
+        self.backend = backend
 
 class EnsembleModel:
-    def __init__(self, models, architectures, strategy, cipher_indices):
+    def __init__(self, model_metadata, strategy, cipher_indices):
         self.statistics_dict = {
             "FFNN": [f1_ffnn, accuracy_ffnn, recall_ffnn, precision_ffnn, mcc_ffnn],
             "Transformer": [f1_transformer, accuracy_transformer, recall_transformer, precision_transformer, mcc_transformer],
@@ -47,10 +56,10 @@ def __init__(self, models, architectures, strategy, cipher_indices):
             "RF": [f1_rf, accuracy_rf, recall_rf, precision_rf, mcc_rf],
             "NB": [f1_nb, accuracy_nb, recall_nb, precision_nb, mcc_nb]
         }
-        self.models = models
-        self.architectures = architectures
+        self.model_metadata = model_metadata
+        self.models = [None] * len(self.model_metadata)
         self.strategy = strategy
-        if isinstance(models[0], str):
+        if isinstance(model_metadata[0].path, str):
             self.load_model()
         for key in self.statistics_dict:
             statistics = self.statistics_dict[key]
@@ -72,22 +81,53 @@ def __init__(self, models, architectures, strategy, cipher_indices):
                 self.total_votes[i] += network_total_votes[i]
 
     def load_model(self):
-        for j in range(len(self.models)):
-            if self.architectures[j] in ("FFNN", "CNN", "LSTM", "Transformer"):
-                if self.architectures[j] == 'Transformer':
-                    model_ = tf.keras.models.load_model(self.models[j], custom_objects={
-                        'TokenAndPositionEmbedding': TokenAndPositionEmbedding, 'MultiHeadSelfAttention': MultiHeadSelfAttention,
-                        'TransformerBlock': TransformerBlock})
-                else:
-                    model_ = tf.keras.models.load_model(self.models[j])
-                optimizer = Adam(learning_rate=config.learning_rate, beta_1=config.beta_1, beta_2=config.beta_2, epsilon=config.epsilon,
-                                 amsgrad=config.amsgrad)
-                model_.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
-                               metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
-                self.models[j] = model_
+        for i, metadata in enumerate(self.model_metadata):
+            if metadata.backend == Backend.PYTORCH:
+                self.models[i] = self._load_pytorch(metadata.architecture, metadata.path)
+            elif metadata.architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
+                self.models[i] = self._load_keras(metadata.architecture, metadata.path)
             else:
-                with open(self.models[j], "rb") as f:
-                    self.models[j] = pickle.load(f)
+                with open(metadata.path, "rb") as f:
+                    self.models[i] = pickle.load(f)
+    
+    def _load_pytorch(self, architecture, path):
+        checkpoint = torch.load(path, map_location=torch.device("cpu"))
+        
+        if architecture == "FFNN":
+            model = FFNN(
+                input_size=checkpoint['input_size'],
+                hidden_size=checkpoint['hidden_size'],
+                output_size=checkpoint['output_size'],
+                num_hidden_layers=checkpoint['num_hidden_layers']
+            )
+        elif architecture == "LSTM":
+            model = LSTM(
+                vocab_size=checkpoint['vocab_size'],
+                embed_dim=checkpoint['embed_dim'],
+                hidden_size=checkpoint['hidden_size'],
+                output_size=checkpoint['output_size'],
+                num_layers=checkpoint['num_layers'],
+                dropout=checkpoint['dropout']
+            )
+        else:
+            raise ValueError(f"Unimplemented PyTorch architecutre: {architecture}")
+        
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.eval()
+        return model
+    
+    def _load_keras(self, architecture, path):
+        if architecture == 'Transformer':
+            model = tf.keras.models.load_model(path, custom_objects={
+                'TokenAndPositionEmbedding': TokenAndPositionEmbedding, 'MultiHeadSelfAttention': MultiHeadSelfAttention,
+                'TransformerBlock': TransformerBlock})
+        else:
+            model = tf.keras.models.load_model(path)
+        optimizer = Adam(learning_rate=config.learning_rate, beta_1=config.beta_1, beta_2=config.beta_2, epsilon=config.epsilon,
+                            amsgrad=config.amsgrad)
+        model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
+                        metrics=["accuracy", SparseTopKCategoricalAccuracy(k=3, name="k3_accuracy")])
+        return model
 
     def evaluate(self, batch, batch_ciphertexts, labels, batch_size, metrics, verbose=0):
         correct_all = 0
@@ -111,9 +151,14 @@ def evaluate(self, batch, batch_ciphertexts, labels, batch_size, metrics, verbos
 
     def predict(self, statistics, ciphertexts, batch_size, verbose=0):
         predictions = []
-        for index, model in enumerate(self.models):
-            architecture = self.architectures[index]
-            if architecture == "FFNN":
+        for index, metadata in enumerate(self.model_metadata):
+            model = self.models[index]
+            architecture = metadata.architecture
+            if metadata.backend == Backend.PYTORCH:
+                if isinstance(statistics, tf.Tensor):
+                    np_statistics = statistics.numpy()
+                predictions.append(model.predict(np_statistics, batch_size))
+            elif architecture == "FFNN":
                 predictions.append(model.predict(statistics, batch_size=batch_size, verbose=verbose))
             elif architecture in ("CNN", "LSTM", "Transformer"):
                 input_length = get_model_input_length(model, architecture)
@@ -168,7 +213,7 @@ def predict(self, statistics, ciphertexts, batch_size, verbose=0):
                     scaled[i][j] = scaled[i][j] / len(predictions)
         elif self.strategy == 'weighted':
             for i in range(len(predictions)):
-                statistics = self.statistics_dict[self.architectures[i]]
+                statistics = self.statistics_dict[self.model_metadata[i].architecture]
                 for j in range(len(predictions[i])):
                     for k in range(len(predictions[i][j])):
                         scaled[j][k] += predictions[i][j][k] * statistics[-1][k] / self.total_votes[k]
diff --git a/cipherTypeDetection/eval.py b/cipherTypeDetection/eval.py
index b391880..7b09228 100755
--- a/cipherTypeDetection/eval.py
+++ b/cipherTypeDetection/eval.py
@@ -7,9 +7,12 @@
 import pickle
 import functools
 import numpy as np
+import time
 from datetime import datetime
+from enum import Enum
 
 import torch
+import torch.nn.functional as F
 
 # This environ variable must be set before all tensorflow imports!
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
@@ -24,10 +27,11 @@
 from cipherTypeDetection.cipherStatisticsDataset import CipherStatisticsDataset, PlaintextPathsDatasetParameters, RotorCiphertextsDatasetParameters, calculate_statistics, pad_sequences
 from cipherTypeDetection.predictionPerformanceMetrics import PredictionPerformanceMetrics
 from cipherTypeDetection.rotorDifferentiationEnsemble import RotorDifferentiationEnsemble
-from cipherTypeDetection.ensembleModel import EnsembleModel
+from cipherTypeDetection.ensembleModel import EnsembleModel, ModelMetadata
 from cipherTypeDetection.transformer import MultiHeadSelfAttention, TransformerBlock, TokenAndPositionEmbedding
 from util.utils import get_model_input_length
 from cipherImplementations.cipher import OUTPUT_ALPHABET, UNKNOWN_SYMBOL_NUMBER
+from cipherTypeDetection.config import Backend
 from cipherTypeDetection.models.ffnn import FFNN
 from cipherTypeDetection.models.lstm import LSTM
 tf.debugging.set_log_device_placement(enabled=False)
@@ -40,8 +44,23 @@
 def str2bool(v):
     return v.lower() in ("yes", "true", "t", "1")
 
+def evaluate_torch(model, inputs, labels, batch_size):
+    with torch.no_grad():
+        outputs = model.predict(inputs, batch_size)
+        y = torch.tensor(labels.numpy(), dtype=torch.long)
 
-def benchmark(args, model, architecture):
+        loss = F.cross_entropy(outputs, y)
+        top1 = torch.argmax(outputs, dim=1)
+        acc = (top1 == y).float().mean()
+
+        # Calc top-3
+        top3 = torch.topk(outputs, k=3, dim=1).indices
+        y_expanded = y.unsqueeze(1).expand_as(top3)
+        k3_acc = (top3 == y_expanded).any(dim=1).float().mean()
+
+        return loss.item(), acc.item(), k3_acc.item()
+
+def benchmark(args, model, architecture, backend):
     cipher_types = args.ciphers
     args.plaintext_folder = os.path.abspath(args.plaintext_folder)
     if args.dataset_size * args.dataset_workers > args.max_iter:
@@ -125,7 +144,6 @@ def find_ciphertext_paths_in_dir(folder_path):
 
     print('Evaluating model...')
 
-    import time
     start_time = time.time()
     iteration = 0
     epoch = 0
@@ -138,30 +156,13 @@ def find_ciphertext_paths_in_dir(folder_path):
         for index, batch in enumerate(batches):
             statistics, labels, ciphertexts = batch.items()
 
-            if architecture == "FFNN":
-                if hasattr(model, "evaluate"):  # Keras model
-                    results.append(model.evaluate(statistics, labels, batch_size=args.batch_size, verbose=1))
-                else:  # PyTorch model
-                    stats_np = statistics.numpy()
-                    
-                    x = torch.tensor(stats_np, dtype=torch.float32)
-                    y = torch.tensor(labels.numpy(), dtype=torch.long)
-
-                    with torch.no_grad():
-                        outputs = model(x)
-                        loss = F.cross_entropy(outputs, y)
-                        top1 = torch.argmax(outputs, dim=1)
-                        acc = (top1 == y).float().mean()
-
-                        # Calc top-3
-                        top3 = torch.topk(outputs, k=3, dim=1).indices
-                        y_expanded = y.unsqueeze(1).expand_as(top3)
-                        k3_acc = (top3 == y_expanded).any(dim=1).float().mean()
-
-                        results.append((loss.item(), acc.item(), k3_acc.item()))
-
-
-            elif architecture in ("CNN", "LSTM", "Transformer"):
+            if architecture == "FFNN" and backend == Backend.KERAS:
+                results.append(model.evaluate(statistics, labels, batch_size=args.batch_size, verbose=1))
+            elif architecture == "FFNN" and backend == Backend.PYTORCH:
+                results.append(evaluate_torch(model, statistics, labels, batch_size=args.batch_size))
+            elif architecture == "LSTM" and backend == Backend.PYTORCH:
+                results.append(evaluate_torch(model, ciphertexts, labels, batch_size=args.batch_size))
+            elif architecture in ("CNN", "Transformer"):
                 results.append(model.evaluate(ciphertexts, labels, batch_size=args.batch_size, verbose=1))
             elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN"):
                 results.append(model.score(statistics, labels))
@@ -221,7 +222,7 @@ def evaluate(args, model, architecture):
         if iterations > args.max_iter:
             break
         path = os.path.join(args.data_folder, name)
-        if os.path.isfile(path):
+        if os.path.isfile(path) and path.endswith(".txt"):
             if iterations > args.max_iter:
                 break
             batch = []
@@ -330,7 +331,7 @@ def evaluate(args, model, architecture):
         print("\n\nAverage evaluation results from %d iterations: avg_test_acc=%f" % (iterations, avg_test_acc))
 
 
-def predict_single_line(args, model, architecture):
+def predict_single_line(args, model, architecture, backend):
     cipher_id_result = ''
     ciphertexts = []
     result = []
@@ -365,8 +366,12 @@ def predict_single_line(args, model, architecture):
             print("\n")
             continue
         results = None
-        if architecture == "FFNN":
+        if architecture == "FFNN" and backend == Backend.KERAS:
             result = model.predict(tf.convert_to_tensor([statistics]), args.batch_size, verbose=0)
+        elif architecture == "FFNN" and backend == Backend.PYTORCH:
+            result = model.predict([statistics], args.batch_size)
+        elif architecture == "LSTM" and backend == Backend.PYTORCH:
+            result = model.predict([ciphertext], args.batch_size)
         elif architecture in ("CNN", "LSTM", "Transformer"):
             input_length = get_model_input_length(model, architecture)
             if len(ciphertext) < input_length:
@@ -427,7 +432,6 @@ def load_model(architecture, args, model_path, cipher_types):
     model = None
 
     if architecture == "FFNN" and model_path.endswith(".pth"):
-
         checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
         
         model = FFNN(
@@ -441,9 +445,21 @@ def load_model(architecture, args, model_path, cipher_types):
 
         config.FEATURE_ENGINEERING = True
         config.PAD_INPUT = False
+    elif architecture == "LSTM" and model_path.endswith(".pth"):
+        checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
+        model = LSTM(
+            vocab_size=checkpoint['vocab_size'],
+            embed_dim=checkpoint['embed_dim'],
+            hidden_size=checkpoint['hidden_size'],
+            output_size=checkpoint['output_size'],
+            num_layers=checkpoint['num_layers'],
+            dropout=checkpoint['dropout']
+        )
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.eval()
 
-        return model
-
+        config.FEATURE_ENGINEERING = True
+        config.PAD_INPUT = False
     elif architecture in ("FFNN", "CNN", "LSTM", "Transformer"):
         if architecture == 'Transformer':
             if not hasattr(config, "maxlen"):
@@ -472,26 +488,30 @@ def load_model(architecture, args, model_path, cipher_types):
         cipher_indices = []
         for cipher_type in cipher_types:
             cipher_indices.append(config.CIPHER_TYPES.index(cipher_type))
-        model = EnsembleModel(model_list, architecture_list, strategy, cipher_indices)
+        model_metadata = []
+        for i, model in enumerate(model_list):
+            metadata = ModelMetadata(model, architecture_list[i], Backend.PYTORCH if model.endswith(".pth") else Backend.KERAS)
+            model_metadata.append(metadata)
+        model = EnsembleModel(model_metadata, strategy, cipher_indices)
     else:
         raise ValueError("Unknown architecture: %s" % architecture)
     
-    # Controlla se ci sono cifrari rotor tra quelli richiesti
+    # Check if there are rotor ciphers among those requested
     has_rotor_ciphers = any(c in config.ROTOR_CIPHER_TYPES for c in cipher_types)
 
-    # Se ci sono cifrari rotor, carica anche il modello rotor_only
+    # If there are rotor ciphers, also load the rotor_only model.
     if has_rotor_ciphers:
         rotor_only_model_path = args.rotor_only_model
         if not os.path.exists(rotor_only_model_path):
             raise FileNotFoundError(f"Rotor-only model is required but not found at {rotor_only_model_path}")
         with open(rotor_only_model_path, "rb") as f:
             rotor_only_model = pickle.load(f)
-        return RotorDifferentiationEnsemble(architecture, model, rotor_only_model)
+        return RotorDifferentiationEnsemble(architecture, model, rotor_only_model), "Ensemble"
 
-    # Se non ci sono cifrari rotor:
-    # - se è un ensemble, restituisci direttamente l'ensemble
-    # - altrimenti restituisci il modello normale
-    return model
+    # If there are no rotor ciphers:
+    # - if it’s an ensemble, return the ensemble directly
+    # - otherwise return the normal model
+    return model, architecture
 
 
 def expand_cipher_groups(cipher_types):
@@ -647,12 +667,15 @@ def main():
         for i in range(len(args.models)):
             model = args.models[i]
             arch = args.architectures[i]
-            if not os.path.exists(os.path.abspath(model)):
-                raise ValueError("Model in %s does not exist." % os.path.abspath(model))
+            abs_path = os.path.abspath(model)
+            if not os.path.exists(abs_path):
+                raise ValueError("Model in %s does not exist." % abs_path)
             if arch not in ('FFNN', 'CNN', 'LSTM', 'DT', 'NB', 'RF', 'ET', 'Transformer', 'SVM', 'kNN'):
                 raise ValueError("Unallowed architecture %s" % arch)
-            if arch in ('FFNN', 'CNN', 'LSTM', 'Transformer') and not os.path.abspath(model).endswith('.h5'):
-                raise ValueError("Model names of the types %s must have the .h5 extension." % ['FFNN', 'CNN', 'LSTM', 'Transformer'])
+            if arch in ('CNN', 'Transformer') and not abs_path.endswith('.h5'):
+                raise ValueError("Model names of the types %s must have the .h5 extension." % ['CNN', 'Transformer'])
+            if arch in ('FFNN', 'LSTM') and not (abs_path.endswith('.h5') or abs_path.endswith('.pth')):
+                raise ValueError("Model names of the types %s must have the .h5 or .pth extension." % ['FFNN', 'LSTM'])
     elif args.models is not None or args.architectures is not None:
         raise ValueError("It is only allowed to use the --models and --architectures with the Ensemble architecture.")
 
@@ -665,18 +688,19 @@ def main():
     #         model = load_model()
     # else:
     #     model = load_model()
-    model = load_model(architecture, args, model_path, cipher_types)
+    model, architecture = load_model(architecture, args, model_path, cipher_types)
     print("Model Loaded.")
 
-    # Model is now always an ensemble
-    architecture = "Ensemble"
+    backend = Backend.KERAS
+    if architecture != "Ensemble" and model_path.endswith(".pth"):
+        backend = Backend.PYTORCH
 
     # the program was started as in benchmark mode.
     if args.download_dataset is not None:
-        benchmark(args, model, architecture)
+        benchmark(args, model, architecture, backend)
     # the program was started in single_line mode.
     elif args.ciphertext is not None or args.file is not None:
-        predict_single_line(args, model, architecture)
+        predict_single_line(args, model, architecture, backend)
     # the program was started in prediction mode.
     else:
         evaluate(args, model, architecture)
diff --git a/cipherTypeDetection/models/ffnn.py b/cipherTypeDetection/models/ffnn.py
index 9dac6c2..89412c6 100644
--- a/cipherTypeDetection/models/ffnn.py
+++ b/cipherTypeDetection/models/ffnn.py
@@ -22,3 +22,15 @@ def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
     def forward(self, x):
         return self.net(x)
 
+    @torch.no_grad
+    def predict(self, input, batch_size):
+        x = torch.tensor(input, dtype=torch.float32)
+
+        outputs = []
+        for i in range(0, len(x), batch_size):
+            batch = x[i : i + batch_size]
+            out = self(batch)
+            outputs.append(out)
+        outputs = torch.cat(outputs, dim=0)
+
+        return F.softmax(outputs, dim=1)
diff --git a/cipherTypeDetection/models/lstm.py b/cipherTypeDetection/models/lstm.py
index 9e67187..b4633ad 100644
--- a/cipherTypeDetection/models/lstm.py
+++ b/cipherTypeDetection/models/lstm.py
@@ -58,3 +58,15 @@ def forward(self, x):
 
         return logits
 
+    @torch.no_grad
+    def predict(self, input, batch_size):
+        x = torch.tensor(input, dtype=torch.int)
+
+        outputs = []
+        for i in range(0, len(x), batch_size):
+            batch = x[i : i + batch_size]
+            out = self(batch)
+            outputs.append(out)
+        outputs = torch.cat(outputs, dim=0)
+
+        return F.softmax(outputs, dim=1)
diff --git a/cipherTypeDetection/rotorDifferentiationEnsemble.py b/cipherTypeDetection/rotorDifferentiationEnsemble.py
index 8f33088..3ece34c 100644
--- a/cipherTypeDetection/rotorDifferentiationEnsemble.py
+++ b/cipherTypeDetection/rotorDifferentiationEnsemble.py
@@ -1,10 +1,12 @@
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+from torch.nn import Module
 from cipherImplementations.cipher import OUTPUT_ALPHABET
 import cipherTypeDetection.config as config
 from cipherTypeDetection.featureCalculations import calculate_rotor_statistics
 from util.utils import get_model_input_length
+from cipherTypeDetection.config import Backend
 
 class RotorDifferentiationEnsemble:
     """
@@ -37,6 +39,11 @@ def __init__(self, general_model_architecture, general_model, rotor_only_model):
         """
         self._general_architecture = general_model_architecture
         self._general_model = general_model
+        self._general_model_backend = (
+            Backend.PYTORCH 
+            if isinstance(self._general_model, Module) 
+            else Backend.KERAS
+        )
         self._rotor_only_model = rotor_only_model
 
     def predict(self, statistics, ciphertexts, batch_size, verbose=0):
@@ -69,7 +76,12 @@ def predict(self, statistics, ciphertexts, batch_size, verbose=0):
         
         # Perform full prediction for all ciphers
         architecture = self._general_architecture
-        if architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN"):
+        backend = self._general_model_backend
+        if backend == Backend.PYTORCH:
+            if isinstance(statistics, tf.Tensor):
+                statistics = statistics.numpy()
+            predictions = self._general_model.predict(statistics, batch_size).numpy()
+        elif architecture in ("DT", "NB", "RF", "ET", "SVM", "kNN"):
             predictions = self._general_model.predict_proba(statistics)
         elif architecture == "Ensemble":
             predictions = self._general_model.predict(statistics, 

From 864702705b1b54a4a3f12828cbe8dd7473a2eff8 Mon Sep 17 00:00:00 2001
From: MaikBastian <2962185+MaikBastian@users.noreply.github.com>
Date: Fri, 12 Dec 2025 14:11:14 +0100
Subject: [PATCH 31/31] Update README with details about PyTorch conversion of
 FFNN and LSTM

---
 README.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index a6f9d59..881ec45 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,9 @@ While the project was focused on ACA ciphers at first, a later extension added t
 
 Users that are experienced in machine learning can use the tools provided in this project to train and evaluate ML models using the `train.py` and `eval.py` scripts. For further information see the following sections *Training* and *Evaluation*. 
 
+The initial models were trained using the *Keras* and *scikit-learn* libraries. In 2025 Stefano Sala converted the code for the FFNN and LSTM machine learning architectures from Keras to *PyTorch* as part of his [Bachelor thesis](https://www.cryptool.org/media/publications/theses/BA_Stefano-Sala.pdf). One of the goals of this conversion was a more flexible code architecture for the definition and training of the machine learning models. 
+With these changes, the FFNN and LSTM machine learning architectures can only be trained with PyTorch, whereas the evaluation still supports both types of model files.
+
 # License
 
 This software and the online version on https://www.cryptool.org/cto/ncid are licensed with the GPLv3 license. Private use of this software is allowed. Software using parts of the code from this repository must not be commercially used and also must be GPLv3 licensed.
@@ -53,7 +56,7 @@ python3 train.py --help
   ```
 
 - ```
-  python3 train.py --architecture=FFNN --dataset_workers=50 --train_dataset_size=64960 --batch_size=512 --max_iter=1000000000 --min_train_len=100 --max_train_len=100 --min_test_len=100 --max_test_len=100 --model_name=t30.h5 > weights/t30.txt 2> weights/err_t30.txt &
+  python3 train.py --architecture=FFNN --dataset_workers=50 --train_dataset_size=64960 --batch_size=512 --max_iter=1000000000 --min_train_len=100 --max_train_len=100 --min_test_len=100 --max_test_len=100 --model_name=t30.pth > weights/t30.txt 2> weights/err_t30.txt &
   ```
 
 
@@ -230,11 +233,15 @@ between the rotor ciphers. This helps with the results since the original models
 
 [Histocrypt 2021: A Massive Machine-Learning Approach For Classical Cipher Type Detection Using Feature Engineering](https://doi.org/10.3384/ecp183)
 
-AusDM 2021: Detection of Classical Cipher Types with Feature-Learning  Approaches
+AusDM 2021: Detection of Classical Cipher Types with Feature-Learning Approaches:
 
 - [Proceedings](https://link.springer.com/book/10.1007/978-981-16-8531-6)
 - [Pre-Print](https://www.cryptool.org/download/ncid/Detect-Classical-Cipher-Types-with-Feature-Learning_AusDM2021_PrePrint.pdf)
 
+PyTorch conversion of FFNN and LSTM machine learning architectures:
+
+[Application of AI for ciphertext identification](https://www.cryptool.org/media/publications/theses/BA_Stefano-Sala.pdf)
+
 ## BibTeX Citation
 
 If you use ncid in a scientific publication, we would appreciate using the following citations: