diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 8b145e4519f1..5f285ab217c8 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=ced765be44ce173c374f295b3c6f4175f8fd109b +LLAMA_VERSION?=706e3f93a60109a40f1224eaf4af0d59caa7c3ae LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 3f33c74bf271..1009d36fd7df 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -358,9 +358,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt params.model.path = request->modelfile(); if (!request->mmproj().empty()) { - // get the directory of modelfile - std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\")); - params.mmproj.path = model_dir + "/"+ request->mmproj(); + params.mmproj.path = request->mmproj(); } // params.model_alias ?? params.model_alias = request->modelfile(); diff --git a/core/backend/options.go b/core/backend/options.go index b585a22b3cd2..f3d5a4ccd402 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -36,7 +36,7 @@ func ModelOptions(c config.ModelConfig, so *config.ApplicationConfig, opts ...mo c.Threads = &threads - grpcOpts := grpcModelOpts(c) + grpcOpts := grpcModelOpts(c, so.SystemState.Model.ModelsPath) defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts)) if so.ParallelBackendRequests { @@ -72,7 +72,7 @@ func getSeed(c config.ModelConfig) int32 { return seed } -func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { +func grpcModelOpts(c config.ModelConfig, modelPath string) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -131,7 +131,7 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { }) } - return &pb.ModelOptions{ + opts := &pb.ModelOptions{ CUDA: c.CUDA || c.Diffusers.CUDA, SchedulerType: c.Diffusers.SchedulerType, GrammarTriggers: triggers, @@ -170,7 +170,6 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt), LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt), LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt), - MMProj: c.MMProj, FlashAttention: flashAttention, CacheTypeKey: c.CacheTypeK, CacheTypeValue: c.CacheTypeV, @@ -198,6 +197,12 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { // RWKV Tokenizer: c.Tokenizer, } + + if c.MMProj != "" { + opts.MMProj = filepath.Join(modelPath, c.MMProj) + } + + return opts } func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions {