From 5f6c9413999a120e1dd26c01bfe2d37f3faddabd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 2 Jan 2026 20:17:30 +0100 Subject: [PATCH 1/2] fix(llama.cpp/mmproj): fix loading mmproj in nested sub-dirs different from model path (#7832) fix(mmproj): fix loading mmproj in nested sub-dirs Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/grpc-server.cpp | 4 +--- core/backend/options.go | 13 +++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 3f33c74bf271..1009d36fd7df 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -358,9 +358,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt params.model.path = request->modelfile(); if (!request->mmproj().empty()) { - // get the directory of modelfile - std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\")); - params.mmproj.path = model_dir + "/"+ request->mmproj(); + params.mmproj.path = request->mmproj(); } // params.model_alias ?? params.model_alias = request->modelfile(); diff --git a/core/backend/options.go b/core/backend/options.go index b585a22b3cd2..f3d5a4ccd402 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -36,7 +36,7 @@ func ModelOptions(c config.ModelConfig, so *config.ApplicationConfig, opts ...mo c.Threads = &threads - grpcOpts := grpcModelOpts(c) + grpcOpts := grpcModelOpts(c, so.SystemState.Model.ModelsPath) defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts)) if so.ParallelBackendRequests { @@ -72,7 +72,7 @@ func getSeed(c config.ModelConfig) int32 { return seed } -func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { +func grpcModelOpts(c config.ModelConfig, modelPath string) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -131,7 +131,7 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { }) } - return &pb.ModelOptions{ + opts := &pb.ModelOptions{ CUDA: c.CUDA || c.Diffusers.CUDA, SchedulerType: c.Diffusers.SchedulerType, GrammarTriggers: triggers, @@ -170,7 +170,6 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt), LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt), LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt), - MMProj: c.MMProj, FlashAttention: flashAttention, CacheTypeKey: c.CacheTypeK, CacheTypeValue: c.CacheTypeV, @@ -198,6 +197,12 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { // RWKV Tokenizer: c.Tokenizer, } + + if c.MMProj != "" { + opts.MMProj = filepath.Join(modelPath, c.MMProj) + } + + return opts } func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions { From 641606ae93177c0874fffcaf5113c74936a841a7 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:26:37 +0100 Subject: [PATCH 2/2] chore: :arrow_up: Update ggml-org/llama.cpp to `706e3f93a60109a40f1224eaf4af0d59caa7c3ae` (#7836) :arrow_up: Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- backend/cpp/llama-cpp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 8b145e4519f1..5f285ab217c8 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=ced765be44ce173c374f295b3c6f4175f8fd109b +LLAMA_VERSION?=706e3f93a60109a40f1224eaf4af0d59caa7c3ae LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?=