Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/cpp/llama-cpp/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

LLAMA_VERSION?=ced765be44ce173c374f295b3c6f4175f8fd109b
LLAMA_VERSION?=706e3f93a60109a40f1224eaf4af0d59caa7c3ae
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

CMAKE_ARGS?=
Expand Down
4 changes: 1 addition & 3 deletions backend/cpp/llama-cpp/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt

params.model.path = request->modelfile();
if (!request->mmproj().empty()) {
// get the directory of modelfile
std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\"));
params.mmproj.path = model_dir + "/"+ request->mmproj();
params.mmproj.path = request->mmproj();
}
// params.model_alias ??
params.model_alias = request->modelfile();
Expand Down
13 changes: 9 additions & 4 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func ModelOptions(c config.ModelConfig, so *config.ApplicationConfig, opts ...mo

c.Threads = &threads

grpcOpts := grpcModelOpts(c)
grpcOpts := grpcModelOpts(c, so.SystemState.Model.ModelsPath)
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))

if so.ParallelBackendRequests {
Expand Down Expand Up @@ -72,7 +72,7 @@ func getSeed(c config.ModelConfig) int32 {
return seed
}

func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions {
func grpcModelOpts(c config.ModelConfig, modelPath string) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
Expand Down Expand Up @@ -131,7 +131,7 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions {
})
}

return &pb.ModelOptions{
opts := &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
GrammarTriggers: triggers,
Expand Down Expand Up @@ -170,7 +170,6 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions {
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
MMProj: c.MMProj,
FlashAttention: flashAttention,
CacheTypeKey: c.CacheTypeK,
CacheTypeValue: c.CacheTypeV,
Expand Down Expand Up @@ -198,6 +197,12 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions {
// RWKV
Tokenizer: c.Tokenizer,
}

if c.MMProj != "" {
opts.MMProj = filepath.Join(modelPath, c.MMProj)
}

return opts
}

func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions {
Expand Down
Loading