2c76a4cb41
GitOrigin-RevId: c757e9bd77b16ca2e03c89bf8bc9ecb28e0c06ad
15 lines
544 B
Diff
15 lines
544 B
Diff
diff --git a/llm/llama.go b/llm/llama.go
|
|
index 0b460e9..b79e04a 100644
|
|
--- a/llm/llama.go
|
|
+++ b/llm/llama.go
|
|
@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
|
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
|
|
}
|
|
|
|
- if opts.NumGQA > 0 {
|
|
- params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
|
- }
|
|
-
|
|
if len(adapters) > 0 {
|
|
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
|
|
params = append(params, "--lora", adapters[0])
|