|
20 | 20 | llmSemaphore = semaphore.NewWeighted(5) |
21 | 21 | llmTimeout = 60 * time.Second |
22 | 22 |
|
23 | | - // Rate limiter for Fanar API |
24 | | - fanarRateMu sync.Mutex |
25 | | - fanarLastMinute []time.Time |
26 | | - fanarMaxPerMin = 35 |
27 | | - |
28 | 23 | // Anthropic cache stats |
29 | 24 | cacheStatsMu sync.Mutex |
30 | 25 | cacheHits int |
@@ -62,169 +57,20 @@ func generate(prompt *Prompt) (string, error) { |
62 | 57 |
|
63 | 58 | messages = append(messages, map[string]string{"role": "user", "content": prompt.Question}) |
64 | 59 |
|
65 | | - // Check for forced provider |
66 | | - if prompt.Provider == ProviderAnthropic { |
67 | | - if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" { |
68 | | - model := prompt.Model |
69 | | - if model == "" { |
70 | | - model = os.Getenv("ANTHROPIC_MODEL") |
71 | | - } |
72 | | - if model == "" { |
73 | | - model = "claude-sonnet-4-20250514" |
74 | | - } |
75 | | - return generateAnthropic(key, model, systemPromptText, messages) |
76 | | - } |
77 | | - return "", fmt.Errorf("anthropic provider requested but ANTHROPIC_API_KEY not set") |
78 | | - } |
79 | | - |
80 | | - if prompt.Provider == ProviderFanar { |
81 | | - if key := os.Getenv("FANAR_API_KEY"); key != "" { |
82 | | - url := os.Getenv("FANAR_API_URL") |
83 | | - if url == "" { |
84 | | - url = "https://api.fanar.qa" |
85 | | - } |
86 | | - return generateFanar(url, key, messages, prompt.Priority) |
87 | | - } |
88 | | - return "", fmt.Errorf("fanar provider requested but FANAR_API_KEY not set") |
| 60 | + key := os.Getenv("ANTHROPIC_API_KEY") |
| 61 | + if key == "" { |
| 62 | + return "", fmt.Errorf("ANTHROPIC_API_KEY not set") |
89 | 63 | } |
90 | 64 |
|
91 | | - if prompt.Provider == ProviderOllama { |
92 | | - model := os.Getenv("MODEL_NAME") |
93 | | - if model == "" { |
94 | | - model = "llama3.2" |
95 | | - } |
96 | | - url := os.Getenv("MODEL_API_URL") |
97 | | - if url == "" { |
98 | | - url = "http://localhost:11434" |
99 | | - } |
100 | | - return generateOllama(url, model, messages) |
101 | | - } |
102 | | - |
103 | | - // Default provider priority: Anthropic > Fanar > Ollama |
104 | | - // (Anthropic first for quality, Fanar as fallback for Arabic/cultural content) |
105 | | - if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" { |
106 | | - model := os.Getenv("ANTHROPIC_MODEL") |
107 | | - if model == "" { |
108 | | - model = "claude-sonnet-4-20250514" |
109 | | - } |
110 | | - return generateAnthropic(key, model, systemPromptText, messages) |
111 | | - } |
112 | | - |
113 | | - if key := os.Getenv("FANAR_API_KEY"); key != "" { |
114 | | - url := os.Getenv("FANAR_API_URL") |
115 | | - if url == "" { |
116 | | - url = "https://api.fanar.qa" |
117 | | - } |
118 | | - return generateFanar(url, key, messages, prompt.Priority) |
119 | | - } |
120 | | - |
121 | | - // Default to Ollama |
122 | | - model := os.Getenv("MODEL_NAME") |
| 65 | + model := prompt.Model |
123 | 66 | if model == "" { |
124 | | - model = "llama3.2" |
125 | | - } |
126 | | - url := os.Getenv("MODEL_API_URL") |
127 | | - if url == "" { |
128 | | - url = "http://localhost:11434" |
129 | | - } |
130 | | - return generateOllama(url, model, messages) |
131 | | -} |
132 | | - |
133 | | -func generateOllama(apiURL, model string, messages []map[string]string) (string, error) { |
134 | | - app.Log("ai", "[LLM] Using Ollama at %s with model %s", apiURL, model) |
135 | | - |
136 | | - req := map[string]interface{}{ |
137 | | - "model": model, |
138 | | - "messages": messages, |
139 | | - "stream": false, |
| 67 | + model = os.Getenv("ANTHROPIC_MODEL") |
140 | 68 | } |
141 | | - |
142 | | - body, _ := json.Marshal(req) |
143 | | - httpReq, _ := http.NewRequest("POST", apiURL+"/api/chat", bytes.NewReader(body)) |
144 | | - httpReq.Header.Set("Content-Type", "application/json") |
145 | | - |
146 | | - client := &http.Client{Timeout: llmTimeout} |
147 | | - resp, err := client.Do(httpReq) |
148 | | - if err != nil { |
149 | | - return "", fmt.Errorf("failed to connect to Ollama: %v", err) |
150 | | - } |
151 | | - defer resp.Body.Close() |
152 | | - |
153 | | - respBody, _ := io.ReadAll(resp.Body) |
154 | | - |
155 | | - var result struct { |
156 | | - Message struct { |
157 | | - Content string `json:"content"` |
158 | | - } `json:"message"` |
159 | | - Error string `json:"error"` |
160 | | - } |
161 | | - json.Unmarshal(respBody, &result) |
162 | | - |
163 | | - if result.Error != "" { |
164 | | - return "", fmt.Errorf("ollama error: %s", result.Error) |
165 | | - } |
166 | | - return result.Message.Content, nil |
167 | | -} |
168 | | - |
169 | | -func generateFanar(apiURL, apiKey string, messages []map[string]string, priority int) (string, error) { |
170 | | - if !checkFanarRateLimit(priority) { |
171 | | - maxWait := 3 |
172 | | - if priority == PriorityHigh { |
173 | | - maxWait = 15 |
174 | | - } else if priority == PriorityMedium { |
175 | | - maxWait = 8 |
176 | | - } |
177 | | - |
178 | | - app.Log("ai", "[LLM] Fanar rate limit reached (priority %d), waiting...", priority) |
179 | | - for i := 0; i < maxWait; i++ { |
180 | | - time.Sleep(time.Second) |
181 | | - if checkFanarRateLimit(priority) { |
182 | | - break |
183 | | - } |
184 | | - if i == maxWait-1 { |
185 | | - return "", fmt.Errorf("fanar rate limit exceeded") |
186 | | - } |
187 | | - } |
188 | | - } |
189 | | - |
190 | | - app.Log("ai", "[LLM] Using Fanar at %s", apiURL) |
191 | | - |
192 | | - req := map[string]interface{}{ |
193 | | - "model": "Fanar", |
194 | | - "messages": messages, |
195 | | - } |
196 | | - body, _ := json.Marshal(req) |
197 | | - |
198 | | - httpReq, _ := http.NewRequest("POST", apiURL+"/v1/chat/completions", bytes.NewReader(body)) |
199 | | - httpReq.Header.Set("Content-Type", "application/json") |
200 | | - httpReq.Header.Set("Authorization", "Bearer "+apiKey) |
201 | | - |
202 | | - client := &http.Client{Timeout: llmTimeout} |
203 | | - resp, err := client.Do(httpReq) |
204 | | - if err != nil { |
205 | | - return "", fmt.Errorf("fanar API request failed: %v", err) |
206 | | - } |
207 | | - defer resp.Body.Close() |
208 | | - |
209 | | - respBody, _ := io.ReadAll(resp.Body) |
210 | | - |
211 | | - var result struct { |
212 | | - Choices []struct { |
213 | | - Message struct { |
214 | | - Content string `json:"content"` |
215 | | - } `json:"message"` |
216 | | - } `json:"choices"` |
217 | | - Error interface{} `json:"error"` |
| 69 | + if model == "" { |
| 70 | + model = "claude-sonnet-4-20250514" |
218 | 71 | } |
219 | | - json.Unmarshal(respBody, &result) |
220 | 72 |
|
221 | | - if result.Error != nil { |
222 | | - return "", fmt.Errorf("%v", result.Error) |
223 | | - } |
224 | | - if len(result.Choices) > 0 { |
225 | | - return result.Choices[0].Message.Content, nil |
226 | | - } |
227 | | - return "", nil |
| 73 | + return generateAnthropic(key, model, systemPromptText, messages) |
228 | 74 | } |
229 | 75 |
|
230 | 76 | func generateAnthropic(apiKey, model, systemPrompt string, messages []map[string]string) (string, error) { |
@@ -321,55 +167,6 @@ func generateAnthropic(apiKey, model, systemPrompt string, messages []map[string |
321 | 167 | return content, nil |
322 | 168 | } |
323 | 169 |
|
324 | | -func checkFanarRateLimit(priority int) bool { |
325 | | - fanarRateMu.Lock() |
326 | | - defer fanarRateMu.Unlock() |
327 | | - |
328 | | - now := time.Now() |
329 | | - cutoff := now.Add(-time.Minute) |
330 | | - |
331 | | - var recent []time.Time |
332 | | - for _, t := range fanarLastMinute { |
333 | | - if t.After(cutoff) { |
334 | | - recent = append(recent, t) |
335 | | - } |
336 | | - } |
337 | | - fanarLastMinute = recent |
338 | | - |
339 | | - var maxForPriority int |
340 | | - switch priority { |
341 | | - case PriorityHigh: |
342 | | - maxForPriority = fanarMaxPerMin |
343 | | - case PriorityMedium: |
344 | | - maxForPriority = 25 |
345 | | - default: |
346 | | - maxForPriority = 15 |
347 | | - } |
348 | | - |
349 | | - if len(fanarLastMinute) >= maxForPriority { |
350 | | - return false |
351 | | - } |
352 | | - |
353 | | - fanarLastMinute = append(fanarLastMinute, now) |
354 | | - return true |
355 | | -} |
356 | | - |
357 | | -// GetFanarRateStatus returns current rate limit status |
358 | | -func GetFanarRateStatus() (used, max int) { |
359 | | - fanarRateMu.Lock() |
360 | | - defer fanarRateMu.Unlock() |
361 | | - |
362 | | - now := time.Now() |
363 | | - cutoff := now.Add(-time.Minute) |
364 | | - count := 0 |
365 | | - for _, t := range fanarLastMinute { |
366 | | - if t.After(cutoff) { |
367 | | - count++ |
368 | | - } |
369 | | - } |
370 | | - return count, fanarMaxPerMin |
371 | | -} |
372 | | - |
373 | 170 | // GetCacheStats returns Anthropic prompt cache statistics |
374 | 171 | func GetCacheStats() (hits, misses, readTokens, creationTokens int) { |
375 | 172 | cacheStatsMu.Lock() |
|
0 commit comments