From f0aaeccf97a28c49d51da5c422a9f6bc43b23ca5 Mon Sep 17 00:00:00 2001 From: test Date: Sun, 24 Aug 2025 16:23:43 -0700 Subject: [PATCH] If merged this commit does the following: * Remove subpath from pretrain_llama.py * Install toml package * Adjust --gres=gpu:8 to number of user specified devices Signed-off-by: Sean Smith --- large_language_model_pretraining/nemo/Dockerfile | 1 + large_language_model_pretraining/nemo/pretrain_llama31.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/large_language_model_pretraining/nemo/Dockerfile b/large_language_model_pretraining/nemo/Dockerfile index a609ea89e..ee30dfd2d 100644 --- a/large_language_model_pretraining/nemo/Dockerfile +++ b/large_language_model_pretraining/nemo/Dockerfile @@ -18,6 +18,7 @@ FROM ${NEMO_BASE_IMAGE} AS nemo-base-image RUN pip uninstall transformers -y RUN pip install transformers==4.47.1 blobfile==3.0.0 RUN pip install prettytable==3.12.0 +RUN pip install toml==0.10.2 RUN pip install git+https://github.com/mlcommons/logging.git@4.1.0-rc3 # setup workspace diff --git a/large_language_model_pretraining/nemo/pretrain_llama31.py b/large_language_model_pretraining/nemo/pretrain_llama31.py index 2a58dd6cd..6ae450b3b 100644 --- a/large_language_model_pretraining/nemo/pretrain_llama31.py +++ b/large_language_model_pretraining/nemo/pretrain_llama31.py @@ -75,8 +75,8 @@ def slurm_executor( gpus_per_node=devices, mem="0", exclusive=True, - gres="gpu:8", - packager=run.GitArchivePackager(subpath="large_language_model_pretraining/nemo", ref="HEAD"), + gres=f"gpu:{devices}", + packager=run.GitArchivePackager(), dependencies=dependencies, )