mudler
diff --git a/‎.github/gallery-agent/agent.go‎
Lines changed: 1 addition & 1 deletion b/‎.github/gallery-agent/agent.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/gallery-agent/testing.go‎
Lines changed: 20 additions & 20 deletions b/‎.github/gallery-agent/testing.go‎
Lines changed: 20 additions & 20 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 2 deletions b/‎Dockerfile‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/backend.proto‎
Lines changed: 2 additions & 0 deletions b/‎backend/backend.proto‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backend/cpp/llama-cpp/grpc-server.cpp‎
Lines changed: 57 additions & 1 deletion b/‎backend/cpp/llama-cpp/grpc-server.cpp‎
Lines changed: 57 additions & 1 deletion
diff --git a/‎backend/go/acestep-cpp/acestepcpp_test.go‎
Lines changed: 3 additions & 3 deletions b/‎backend/go/acestep-cpp/acestepcpp_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backend/go/acestep-cpp/goacestepcpp.go‎
Lines changed: 7 additions & 7 deletions b/‎backend/go/acestep-cpp/goacestepcpp.go‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎backend/go/llm/llama/llama.go‎
Lines changed: 0 additions & 1 deletion b/‎backend/go/llm/llama/llama.go‎
Lines changed: 0 additions & 1 deletion
@@ -406,7 +406,7 @@ func getHuggingFaceAvatarURL(author string) string {
 	}
 
 	// Parse the response to get avatar URL
-	var userInfo map[string]interface{}
+	var userInfo map[string]any
 	body, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return ""
 
@@ -3,7 +3,7 @@ package main
 import (
 	"context"
 	"fmt"
-	"math/rand"
+	"math/rand/v2"
 	"strings"
 	"time"
 )
@@ -13,11 +13,11 @@ func runSyntheticMode() error {
 	generator := NewSyntheticDataGenerator()
 
 	// Generate a random number of synthetic models (1-3)
-	numModels := generator.rand.Intn(3) + 1
+	numModels := generator.rand.IntN(3) + 1
 	fmt.Printf("Generating %d synthetic models for testing...\n", numModels)
 
 	var models []ProcessedModel
-	for i := 0; i < numModels; i++ {
+	for i := range numModels {
 		model := generator.GenerateProcessedModel()
 		models = append(models, model)
 		fmt.Printf("Generated synthetic model: %s\n", model.ModelID)
@@ -42,14 +42,14 @@ type SyntheticDataGenerator struct {
 // NewSyntheticDataGenerator creates a new synthetic data generator
 func NewSyntheticDataGenerator() *SyntheticDataGenerator {
 	return &SyntheticDataGenerator{
-		rand: rand.New(rand.NewSource(time.Now().UnixNano())),
+		rand: rand.New(rand.NewPCG(uint64(time.Now().UnixNano()), 0)),
 	}
 }
 
 // GenerateProcessedModelFile creates a synthetic ProcessedModelFile
 func (g *SyntheticDataGenerator) GenerateProcessedModelFile() ProcessedModelFile {
 	fileTypes := []string{"model", "readme", "other"}
-	fileType := fileTypes[g.rand.Intn(len(fileTypes))]
+	fileType := fileTypes[g.rand.IntN(len(fileTypes))]
 
 	var path string
 	var isReadme bool
@@ -68,7 +68,7 @@ func (g *SyntheticDataGenerator) GenerateProcessedModelFile() ProcessedModelFile
 
 	return ProcessedModelFile{
 		Path:     path,
-		Size:     int64(g.rand.Intn(1000000000) + 1000000), // 1MB to 1GB
+		Size:     int64(g.rand.IntN(1000000000) + 1000000), // 1MB to 1GB
 		SHA256:   g.randomSHA256(),
 		IsReadme: isReadme,
 		FileType: fileType,
@@ -80,19 +80,19 @@ func (g *SyntheticDataGenerator) GenerateProcessedModel() ProcessedModel {
 	authors := []string{"microsoft", "meta", "google", "openai", "anthropic", "mistralai", "huggingface"}
 	modelNames := []string{"llama", "gpt", "claude", "mistral", "gemma", "phi", "qwen", "codellama"}
 
-	author := authors[g.rand.Intn(len(authors))]
-	modelName := modelNames[g.rand.Intn(len(modelNames))]
+	author := authors[g.rand.IntN(len(authors))]
+	modelName := modelNames[g.rand.IntN(len(modelNames))]
 	modelID := fmt.Sprintf("%s/%s-%s", author, modelName, g.randomString(6))
 
 	// Generate files
-	numFiles := g.rand.Intn(5) + 2 // 2-6 files
+	numFiles := g.rand.IntN(5) + 2 // 2-6 files
 	files := make([]ProcessedModelFile, numFiles)
 
 	// Ensure at least one model file and one readme
 	hasModelFile := false
 	hasReadme := false
 
-	for i := 0; i < numFiles; i++ {
+	for i := range numFiles {
 		files[i] = g.GenerateProcessedModelFile()
 		if files[i].FileType == "model" {
 			hasModelFile = true
@@ -140,27 +140,27 @@ func (g *SyntheticDataGenerator) GenerateProcessedModel() ProcessedModel {
 
 	// Generate sample metadata
 	licenses := []string{"apache-2.0", "mit", "llama2", "gpl-3.0", "bsd", ""}
-	license := licenses[g.rand.Intn(len(licenses))]
+	license := licenses[g.rand.IntN(len(licenses))]
 
 	sampleTags := []string{"llm", "gguf", "gpu", "cpu", "text-to-text", "chat", "instruction-tuned"}
-	numTags := g.rand.Intn(4) + 3 // 3-6 tags
+	numTags := g.rand.IntN(4) + 3 // 3-6 tags
 	tags := make([]string, numTags)
-	for i := 0; i < numTags; i++ {
-		tags[i] = sampleTags[g.rand.Intn(len(sampleTags))]
+	for i := range numTags {
+		tags[i] = sampleTags[g.rand.IntN(len(sampleTags))]
 	}
 	// Remove duplicates
 	tags = g.removeDuplicates(tags)
 
 	// Optionally include icon (50% chance)
 	icon := ""
-	if g.rand.Intn(2) == 0 {
+	if g.rand.IntN(2) == 0 {
 		icon = fmt.Sprintf("https://cdn-avatars.huggingface.co/v1/production/uploads/%s.png", g.randomString(24))
 	}
 
 	return ProcessedModel{
 		ModelID:                 modelID,
 		Author:                  author,
-		Downloads:               g.rand.Intn(1000000) + 1000,
+		Downloads:               g.rand.IntN(1000000) + 1000,
 		LastModified:            g.randomDate(),
 		Files:                   files,
 		PreferredModelFile:      preferredModelFile,
@@ -180,7 +180,7 @@ func (g *SyntheticDataGenerator) randomString(length int) string {
 	const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
 	b := make([]byte, length)
 	for i := range b {
-		b[i] = charset[g.rand.Intn(len(charset))]
+		b[i] = charset[g.rand.IntN(len(charset))]
 	}
 	return string(b)
 }
@@ -189,14 +189,14 @@ func (g *SyntheticDataGenerator) randomSHA256() string {
 	const charset = "0123456789abcdef"
 	b := make([]byte, 64)
 	for i := range b {
-		b[i] = charset[g.rand.Intn(len(charset))]
+		b[i] = charset[g.rand.IntN(len(charset))]
 	}
 	return string(b)
 }
 
 func (g *SyntheticDataGenerator) randomDate() string {
 	now := time.Now()
-	daysAgo := g.rand.Intn(365) // Random date within last year
+	daysAgo := g.rand.IntN(365) // Random date within last year
 	pastDate := now.AddDate(0, 0, -daysAgo)
 	return pastDate.Format("2006-01-02T15:04:05.000Z")
 }
@@ -220,5 +220,5 @@ func (g *SyntheticDataGenerator) generateReadmeContent(modelName, author string)
 		fmt.Sprintf("# %s Language Model\n\nDeveloped by %s, this model represents state-of-the-art performance in natural language understanding and generation.\n\n## Key Features\n\n- Multilingual support\n- Context-aware responses\n- Efficient memory usage\n- Fast inference speed\n\n## Applications\n\n- Chatbots and virtual assistants\n- Content generation\n- Code completion\n- Educational tools", strings.Title(modelName), author),
 	}
 
-	return templates[g.rand.Intn(len(templates))]
+	return templates[g.rand.IntN(len(templates))]
 }
@@ -21,7 +21,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        go-version: ['1.25.x']
+        go-version: ['1.26.x']
     steps:
       - name: Free Disk Space (Ubuntu)
         uses: jlumbroso/free-disk-space@main
@@ -179,7 +179,7 @@ jobs:
     runs-on: macos-latest
     strategy:
       matrix:
-        go-version: ['1.25.x']
+        go-version: ['1.26.x']
     steps:
       - name: Clone
         uses: actions/checkout@v6
 
@@ -176,7 +176,7 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM requirements-drivers AS build-requirements
 
-ARG GO_VERSION=1.25.4
+ARG GO_VERSION=1.26.0
 ARG CMAKE_VERSION=3.31.10
 ARG CMAKE_FROM_SOURCE=false
 ARG TARGETARCH
@@ -319,7 +319,6 @@ COPY ./.git ./.git
 # Some of the Go backends use libs from the main src, we could further optimize the caching by building the CPP backends before here
 COPY ./pkg/grpc ./pkg/grpc
 COPY ./pkg/utils ./pkg/utils
-COPY ./pkg/langchain ./pkg/langchain
 
 RUN ls -l ./
 RUN make protogen-go
 
@@ -154,6 +154,7 @@ For older news and full release notes, see [GitHub Releases](https://github.com/
 - [Object Detection](https://localai.io/features/object-detection/)
 - [Reranker API](https://localai.io/features/reranker/)
 - [P2P Inferencing](https://localai.io/features/distribute/)
+- [Distributed Mode](https://localai.io/features/distributed-mode/) — Horizontal scaling with PostgreSQL + NATS
 - [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/)
 - [Built-in Agents](https://localai.io/features/agents/) — Autonomous AI agents with tool use, RAG, skills, SSE streaming, and [Agent Hub](https://agenthub.localai.io)
 - [Backend Gallery](https://localai.io/backends/) — Install/remove backends on the fly via OCI images
 
@@ -51,6 +51,7 @@ service Backend {
   rpc StartQuantization(QuantizationRequest) returns (QuantizationJobResult) {}
   rpc QuantizationProgress(QuantizationProgressRequest) returns (stream QuantizationProgressUpdate) {}
   rpc StopQuantization(QuantizationStopRequest) returns (Result) {}
+
 }
 
 // Define the empty request
@@ -676,3 +677,4 @@ message QuantizationProgressUpdate {
 message QuantizationStopRequest {
   string job_id = 1;
 }
+
@@ -22,8 +22,10 @@
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
 #include <grpcpp/grpcpp.h>
 #include <grpcpp/health_check_service_interface.h>
+#include <grpcpp/security/server_credentials.h>
 #include <regex>
 #include <atomic>
+#include <cstdlib>
 #include <mutex>
 #include <signal.h>
 #include <thread>
@@ -37,6 +39,47 @@ using grpc::Server;
 using grpc::ServerBuilder;
 using grpc::ServerContext;
 using grpc::Status;
+
+// gRPC bearer token auth via AuthMetadataProcessor for distributed mode.
+// Reads LOCALAI_GRPC_AUTH_TOKEN from the environment. When set, rejects
+// requests without a matching "authorization: Bearer <token>" metadata header.
+class TokenAuthMetadataProcessor : public grpc::AuthMetadataProcessor {
+public:
+    explicit TokenAuthMetadataProcessor(const std::string& token) : token_(token) {}
+
+    bool IsBlocking() const override { return false; }
+
+    grpc::Status Process(const InputMetadata& auth_metadata,
+                         grpc::AuthContext* /*context*/,
+                         OutputMetadata* /*consumed_auth_metadata*/,
+                         OutputMetadata* /*response_metadata*/) override {
+        auto it = auth_metadata.find("authorization");
+        if (it != auth_metadata.end()) {
+            std::string expected = "Bearer " + token_;
+            std::string got(it->second.data(), it->second.size());
+            // Constant-time comparison
+            if (expected.size() == got.size() && ct_memcmp(expected.data(), got.data(), expected.size()) == 0) {
+                return grpc::Status::OK;
+            }
+        }
+        return grpc::Status(grpc::StatusCode::UNAUTHENTICATED, "invalid token");
+    }
+
+private:
+    std::string token_;
+
+    // Minimal constant-time comparison (avoids OpenSSL dependency)
+    static int ct_memcmp(const void* a, const void* b, size_t n) {
+        const unsigned char* pa = static_cast<const unsigned char*>(a);
+        const unsigned char* pb = static_cast<const unsigned char*>(b);
+        unsigned char result = 0;
+        for (size_t i = 0; i < n; i++) {
+            result |= pa[i] ^ pb[i];
+        }
+        return result;
+    }
+};
+
 // END LocalAI
 
 
@@ -2760,11 +2803,24 @@ int main(int argc, char** argv) {
     BackendServiceImpl service(ctx_server);
 
     ServerBuilder builder;
-    builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
+    // Add bearer token auth via AuthMetadataProcessor if LOCALAI_GRPC_AUTH_TOKEN is set
+    const char* auth_token = std::getenv("LOCALAI_GRPC_AUTH_TOKEN");
+    std::shared_ptr<grpc::ServerCredentials> creds;
+    if (auth_token != nullptr && auth_token[0] != '\0') {
+        creds = grpc::InsecureServerCredentials();
+        creds->SetAuthMetadataProcessor(
+            std::make_shared<TokenAuthMetadataProcessor>(auth_token));
+        std::cout << "gRPC auth enabled via LOCALAI_GRPC_AUTH_TOKEN" << std::endl;
+    } else {
+        creds = grpc::InsecureServerCredentials();
+    }
+
+    builder.AddListeningPort(server_address, creds);
     builder.RegisterService(&service);
     builder.SetMaxMessageSize(50 * 1024 * 1024); // 50MB
     builder.SetMaxSendMessageSize(50 * 1024 * 1024); // 50MB
     builder.SetMaxReceiveMessageSize(50 * 1024 * 1024); // 50MB
+
     std::unique_ptr<Server> server(builder.BuildAndStart());
    // run the HTTP server in a thread - see comment below
     std::thread t([&]()
 
@@ -106,10 +106,10 @@ func TestLoadModel(t *testing.T) {
 	defer conn.Close()
 
 	client := pb.NewBackendClient(conn)
-	
+
 	// Get base directory from main model file for relative paths
 	mainModelPath := filepath.Join(modelDir, "acestep-5Hz-lm-0.6B-Q8_0.gguf")
-	
+
 	resp, err := client.LoadModel(context.Background(), &pb.ModelOptions{
 		ModelFile: mainModelPath,
 		ModelPath: modelDir,
@@ -134,7 +134,7 @@ func TestSoundGeneration(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer os.RemoveAll(tmpDir)
+	t.Cleanup(func() { os.RemoveAll(tmpDir) })
 
 	outputFile := filepath.Join(tmpDir, "output.wav")
 
 
@@ -11,7 +11,7 @@ import (
 )
 
 var (
-	CppLoadModel    func(lmModelPath, textEncoderPath, ditModelPath, vaeModelPath string) int
+	CppLoadModel     func(lmModelPath, textEncoderPath, ditModelPath, vaeModelPath string) int
 	CppGenerateMusic func(caption, lyrics string, bpm int, keyscale, timesignature string, duration, temperature float32, instrumental bool, seed int, dst string, threads int) int
 )
 
@@ -29,18 +29,18 @@ func (a *AceStepCpp) Load(opts *pb.ModelOptions) error {
 	var textEncoderModel, ditModel, vaeModel string
 
 	for _, oo := range opts.Options {
-		parts := strings.SplitN(oo, ":", 2)
-		if len(parts) != 2 {
+		key, value, found := strings.Cut(oo, ":")
+		if !found {
 			fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo)
 			continue
 		}
-		switch parts[0] {
+		switch key {
 		case "text_encoder_model":
-			textEncoderModel = parts[1]
+			textEncoderModel = value
 		case "dit_model":
-			ditModel = parts[1]
+			ditModel = value
 		case "vae_model":
-			vaeModel = parts[1]
+			vaeModel = value
 		default:
 			fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo)
 		}
 
@@ -18,7 +18,6 @@ type LLM struct {
 	draftModel *llama.LLama
 }
 
-
 // Free releases GPU resources and frees the llama model
 // This should be called when the model is being unloaded to properly release VRAM
 func (llm *LLM) Free() error {
Original file line number	Diff line number	Diff line change
`@@ -406,7 +406,7 @@ func getHuggingFaceAvatarURL(author string) string {`
`406`	`406`	`}`
`407`	`407`
`408`	`408`	`// Parse the response to get avatar URL`
`409`		`- var userInfo map[string]interface{}`
	`409`	`+ var userInfo map[string]any`
`410`	`410`	`body, err := io.ReadAll(resp.Body)`
`411`	`411`	`if err != nil {`
`412`	`412`	`return ""`
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@ service Backend {`
`51`	`51`	`rpc StartQuantization(QuantizationRequest) returns (QuantizationJobResult) {}`
`52`	`52`	`rpc QuantizationProgress(QuantizationProgressRequest) returns (stream QuantizationProgressUpdate) {}`
`53`	`53`	`rpc StopQuantization(QuantizationStopRequest) returns (Result) {}`
	`54`	`+`
`54`	`55`	`}`
`55`	`56`
`56`	`57`	`// Define the empty request`
`@@ -676,3 +677,4 @@ message QuantizationProgressUpdate {`
`676`	`677`	`message QuantizationStopRequest {`
`677`	`678`	`string job_id = 1;`
`678`	`679`	`}`
	`680`	`+`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,6 @@ type LLM struct {`
`18`	`18`	`draftModel *llama.LLama`
`19`	`19`	`}`
`20`	`20`
`21`		`-`
`22`	`21`	`// Free releases GPU resources and frees the llama model`
`23`	`22`	`// This should be called when the model is being unloaded to properly release VRAM`
`24`	`23`	`func (llm *LLM) Free() error {`