From ec540d2a0dfe28619db80f4d5d83bae37f029393 Mon Sep 17 00:00:00 2001 From: Rolando Santamaria Maso Date: Wed, 10 Jun 2026 09:13:03 +0200 Subject: [PATCH 1/4] feat(web_search): add SearXNG-backed web_search tool + compose sidecar Gives the agent local-first web search with no cloud API or keys, matching the transcribe/vision zero-setup pattern. Tool (cmd/odek/web_search_tool.go): - Native Go tool querying a self-hosted SearXNG JSON API; returns ranked results (title/url/snippet/engine) + direct answers, capped by max_results. - Output wrapped as untrusted content (SERP snippets can carry injection). - Gated as network_egress (prompt in restricted, allow in godmode), consistent with browser/http_batch. The backend URL is fixed config, not agent-supplied, so the tool has no SSRF surface (only a query string is accepted). - Registered only when web_search.base_url is set, so plain installs without a SearXNG instance don't see a dead tool. Config (internal/config): - WebSearchConfig{BaseURL, Categories, Language, MaxResults, Timeout} threaded end-to-end (FileConfig, ResolvedConfig, resolveWebSearch, overlayFile). Wiring (cmd/odek): - builtinTools' growing positional config params (Transcription, Vision) are bundled into a toolConfig struct to stop per-tool signature churn; all ~10 call sites updated. web_search is threaded into run/serve/repl/telegram/ schedule/subagent/mcp. Docker: - New `searxng` compose sidecar (pinned image), co-starting with every profile, internal-only (no host port), with depends_on wired on each odek service. - docker/searxng/settings.yml enables the JSON API and disables the anti-bot limiter, so no Redis/Valkey is needed. SEARXNG_SECRET added to .env.example. - Both bundled configs set web_search.base_url=http://searxng:8080. Tests: hermetic httptest SearXNG mock covering happy path, max_results override vs config cap, untrusted wrapping, JSON-disabled 403, unreachable backend, policy denial, empty query, schema; resolveWebSearch defaults/merge. Full suite green under -race. Docs: README, SECURITY, CHEATSHEET, CONFIG, TELEGRAM, docker/README, DOCKER_COMPOSE_USER_GUIDE. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 2 +- cmd/odek/injection_hardening_test.go | 3 +- cmd/odek/main.go | 27 +++- cmd/odek/main_test.go | 2 +- cmd/odek/mcp.go | 2 +- cmd/odek/repl.go | 2 +- cmd/odek/schedule.go | 2 +- cmd/odek/serve.go | 2 +- cmd/odek/subagent.go | 2 +- cmd/odek/subagent_contract_test.go | 7 +- cmd/odek/telegram.go | 2 +- cmd/odek/web_search_tool.go | 227 +++++++++++++++++++++++++++ cmd/odek/web_search_tool_test.go | 187 ++++++++++++++++++++++ docker/.env.example | 6 + docker/README.md | 18 +++ docker/config.godmode.json | 4 + docker/config.restricted.json | 4 + docker/docker-compose.yml | 24 +++ docker/searxng/settings.yml | 43 +++++ docs/CHEATSHEET.md | 22 +++ docs/CONFIG.md | 2 +- docs/DOCKER_COMPOSE_USER_GUIDE.md | 12 ++ docs/SECURITY.md | 1 + docs/TELEGRAM.md | 2 +- internal/config/loader.go | 51 ++++++ internal/config/websearch_test.go | 54 +++++++ 26 files changed, 689 insertions(+), 21 deletions(-) create mode 100644 cmd/odek/web_search_tool.go create mode 100644 cmd/odek/web_search_tool_test.go create mode 100644 docker/searxng/settings.yml create mode 100644 internal/config/websearch_test.go diff --git a/README.md b/README.md index b93a577..15b5af5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ odek is not a framework. It's a **runtime** — the smallest possible surface ar Every session can run in an isolated Docker container: no network, no host mounts beyond the working directory, zero capabilities, destroyed on exit. `odek serve` enables the sandbox **by default**; `odek run` keeps it opt-in but warns when running unsandboxed. `--ctx` files are auto-injected into the container at `/workspace/`. Full security model in [docs/SANDBOXING.md](docs/SANDBOXING.md). ### 🛡️ Prompt-Injection-Aware -External content the agent ingests (`browser`, `read_file`, `shell`, `search_files`, `multi_grep`, `transcribe`, `vision`, `session_search`, MCP tools) is wrapped in per-call nonce'd `` boundaries so the model can distinguish data from instructions. Redirect hops are re-classified (`browser`/`http_batch`), MCP tool descriptions are scanned for injection at registration, and the MCP error channel is wrapped too. The danger classifier resists 8 known shell-evasion tricks (`$()`, backticks, `$IFS`, `command`/`exec`, `\rm`, basenamed absolute paths). Approvers engage friction mode after 3 same-class approvals in 60 s. Memory episodes from tainted sessions are stored but never auto-replayed. Skill auto-save tracks provenance and pins untrusted suggestions for explicit `odek skill promote`. `odek audit ` surfaces every ingest + per-turn divergence heuristic. Full threat model in [docs/SECURITY.md](docs/SECURITY.md). +External content the agent ingests (`browser`, `read_file`, `shell`, `search_files`, `multi_grep`, `transcribe`, `vision`, `web_search`, `session_search`, MCP tools) is wrapped in per-call nonce'd `` boundaries so the model can distinguish data from instructions. Redirect hops are re-classified (`browser`/`http_batch`), MCP tool descriptions are scanned for injection at registration, and the MCP error channel is wrapped too. The danger classifier resists 8 known shell-evasion tricks (`$()`, backticks, `$IFS`, `command`/`exec`, `\rm`, basenamed absolute paths). Approvers engage friction mode after 3 same-class approvals in 60 s. Memory episodes from tainted sessions are stored but never auto-replayed. Skill auto-save tracks provenance and pins untrusted suggestions for explicit `odek skill promote`. `odek audit ` surfaces every ingest + per-turn divergence heuristic. Full threat model in [docs/SECURITY.md](docs/SECURITY.md). ### 🧩 Sub-Agent Delegation Parallel OS-process sub-agents via `delegate_tasks`. True isolation — each sub-agent is a fresh `odek subagent` process with its own config, tools, and termination timeout. Up to 8 concurrent workers. [docs/SUBAGENTS.md](docs/SUBAGENTS.md) diff --git a/cmd/odek/injection_hardening_test.go b/cmd/odek/injection_hardening_test.go index d19a220..d890fea 100644 --- a/cmd/odek/injection_hardening_test.go +++ b/cmd/odek/injection_hardening_test.go @@ -9,7 +9,6 @@ import ( "testing" "github.com/BackendStack21/odek" - "github.com/BackendStack21/odek/internal/config" "github.com/BackendStack21/odek/internal/danger" ) @@ -244,7 +243,7 @@ func TestBuiltinTools_SessionSearchWrappedAsUntrusted(t *testing.T) { store, cleanup := seedSessionStore(t) defer cleanup() - tools := builtinTools(danger.DangerousConfig{}, nil, nil, 4, "", config.TranscriptionConfig{}, config.VisionConfig{}, store) + tools := builtinTools(danger.DangerousConfig{}, nil, nil, 4, "", toolConfig{}, store) var ss odek.Tool for _, tool := range tools { diff --git a/cmd/odek/main.go b/cmd/odek/main.go index 4830d09..a18e799 100644 --- a/cmd/odek/main.go +++ b/cmd/odek/main.go @@ -779,7 +779,7 @@ func run(args []string) error { // Sandbox setup var sandboxCleanup func() error - tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, resolved.Vision, nil) + tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, toolConfig{Transcription: resolved.Transcription, Vision: resolved.Vision, WebSearch: resolved.WebSearch}, nil) // MCP server tools var mcpCleanup func() @@ -1054,7 +1054,17 @@ func setupSandbox(tools []odek.Tool, cfg sandboxConfig) (containerName string, c return containerName, cleanup, nil } -func builtinTools(dc danger.DangerousConfig, sm *skills.SkillManager, approver danger.Approver, maxConcurrency int, apiKey string, tc config.TranscriptionConfig, vc config.VisionConfig, store *session.Store) []odek.Tool { +// toolConfig bundles the per-tool configuration sections threaded into +// builtinTools. Grouping them keeps the builtinTools signature stable as new +// configurable tools are added (rather than growing a positional parameter +// per tool). +type toolConfig struct { + Transcription config.TranscriptionConfig + Vision config.VisionConfig + WebSearch config.WebSearchConfig +} + +func builtinTools(dc danger.DangerousConfig, sm *skills.SkillManager, approver danger.Approver, maxConcurrency int, apiKey string, tcfg toolConfig, store *session.Store) []odek.Tool { tools := []odek.Tool{ &shellTool{ dangerousConfig: dc, @@ -1088,8 +1098,8 @@ func builtinTools(dc danger.DangerousConfig, sm *skills.SkillManager, approver d &base64Tool{dangerousConfig: dc}, &trTool{dangerousConfig: dc}, &wordCountTool{dangerousConfig: dc}, - newTranscribeTool(dc, tc), - newVisionTool(dc, vc), + newTranscribeTool(dc, tcfg.Transcription), + newVisionTool(dc, tcfg.Vision), // session_search returns content from arbitrary past sessions — // including sessions that ingested untrusted content. That path // otherwise bypasses the memory taint gate and the audit log, so @@ -1098,6 +1108,13 @@ func builtinTools(dc danger.DangerousConfig, sm *skills.SkillManager, approver d newBrowserTool(dc), } + // web_search is registered only when a SearXNG backend is configured — + // without a base_url there is no instance to query, so the tool would just + // confuse the agent. The Docker compose setup sets this automatically. + if tcfg.WebSearch.BaseURL != "" { + tools = append(tools, newWebSearchTool(dc, tcfg.WebSearch)) + } + if sm != nil { tools = append(tools, &skills.SkillLoadTool{Manager: sm}, @@ -1599,7 +1616,7 @@ func continueCmd(args []string) error { "./.odek/skills", ) } - tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, resolved.Vision, store) + tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, toolConfig{Transcription: resolved.Transcription, Vision: resolved.Vision, WebSearch: resolved.WebSearch}, store) var sandboxCleanup func() error // MCP server tools diff --git a/cmd/odek/main_test.go b/cmd/odek/main_test.go index 619d412..2d253e7 100644 --- a/cmd/odek/main_test.go +++ b/cmd/odek/main_test.go @@ -203,7 +203,7 @@ func TestRun_NoAPIKey(t *testing.T) { } func TestBuiltinTools(t *testing.T) { - tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", toolConfig{}, nil) if len(tools) == 0 { t.Fatal("builtinTools() returned empty slice") } diff --git a/cmd/odek/mcp.go b/cmd/odek/mcp.go index 55f604f..36c9e1d 100644 --- a/cmd/odek/mcp.go +++ b/cmd/odek/mcp.go @@ -73,7 +73,7 @@ Flags: } // Build tools - toolSet := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, config.TranscriptionConfig{}, config.VisionConfig{}, nil) + toolSet := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, toolConfig{WebSearch: resolved.WebSearch}, nil) // MCP server tools — connect and discover before sandbox var mcpCleanup func() diff --git a/cmd/odek/repl.go b/cmd/odek/repl.go index bd2fcae..142b348 100644 --- a/cmd/odek/repl.go +++ b/cmd/odek/repl.go @@ -77,7 +77,7 @@ func replCmd(args []string) error { "./.odek/skills", ) } - tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, toolConfig{WebSearch: resolved.WebSearch}, nil) var sandboxCleanup func() error // MCP server tools diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go index 2858370..1ebdf4d 100644 --- a/cmd/odek/schedule.go +++ b/cmd/odek/schedule.go @@ -570,7 +570,7 @@ func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system resolved.Dangerous.NonInteractive = &deny } - tools := builtinTools(resolved.Dangerous, nil, nil, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, resolved.Vision, nil) + tools := builtinTools(resolved.Dangerous, nil, nil, resolved.MaxConcurrency, resolved.APIKey, toolConfig{Transcription: resolved.Transcription, Vision: resolved.Vision, WebSearch: resolved.WebSearch}, nil) tools = append(tools, mcpTools...) // Capture cumulative token usage from the final iteration so the Runner diff --git a/cmd/odek/serve.go b/cmd/odek/serve.go index b37a06d..bca8138 100644 --- a/cmd/odek/serve.go +++ b/cmd/odek/serve.go @@ -267,7 +267,7 @@ func newServeAgent(resolved config.ResolvedConfig, system string, sendFn func(v approver := newWSApprover(sendFn) resolved.Dangerous.Approver = approver - tools := builtinTools(resolved.Dangerous, sm, approver, resolved.MaxConcurrency, resolved.APIKey, config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(resolved.Dangerous, sm, approver, resolved.MaxConcurrency, resolved.APIKey, toolConfig{WebSearch: resolved.WebSearch}, nil) // Find the delegateTasksTool to wire up sub-agent log streaming var subagentTool *delegateTasksTool diff --git a/cmd/odek/subagent.go b/cmd/odek/subagent.go index b4f8275..6c52013 100644 --- a/cmd/odek/subagent.go +++ b/cmd/odek/subagent.go @@ -291,7 +291,7 @@ func subagentCmd(args []string) error { "./.odek/skills", ) } - tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(resolved.Dangerous, sm, nil, resolved.MaxConcurrency, resolved.APIKey, toolConfig{WebSearch: resolved.WebSearch}, nil) var sandboxCleanup func() error // MCP server tools diff --git a/cmd/odek/subagent_contract_test.go b/cmd/odek/subagent_contract_test.go index ffb9221..340205f 100644 --- a/cmd/odek/subagent_contract_test.go +++ b/cmd/odek/subagent_contract_test.go @@ -12,7 +12,6 @@ import ( "time" "github.com/BackendStack21/odek" - "github.com/BackendStack21/odek/internal/config" "github.com/BackendStack21/odek/internal/danger" "github.com/BackendStack21/odek/internal/llm" ) @@ -320,7 +319,7 @@ func TestSubagent_ExitCodeThree(t *testing.T) { // ── 4. delegate_tasks Tool Schema ─────────────────────────────────── func TestDelegateTasksTool_Exists(t *testing.T) { - tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", toolConfig{}, nil) if len(tools) == 0 { t.Fatal("builtinTools() returned empty slice") } @@ -338,7 +337,7 @@ func TestDelegateTasksTool_Exists(t *testing.T) { } func TestDelegateTasksTool_HasSchema(t *testing.T) { - tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", toolConfig{}, nil) var tool odek.Tool for _, t2 := range tools { @@ -432,7 +431,7 @@ func TestDelegateTasksTool_HasSchema(t *testing.T) { } func TestDelegateTasksTool_Description(t *testing.T) { - tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", config.TranscriptionConfig{}, config.VisionConfig{}, nil) + tools := builtinTools(danger.DangerousConfig{}, nil, nil, 3, "", toolConfig{}, nil) var tool odek.Tool for _, t2 := range tools { diff --git a/cmd/odek/telegram.go b/cmd/odek/telegram.go index ab48e4c..4dbbb70 100644 --- a/cmd/odek/telegram.go +++ b/cmd/odek/telegram.go @@ -1113,7 +1113,7 @@ func handleChatMessage( } // Build the agent with Telegram approver. - tools := builtinTools(resolved.Dangerous, nil, approver, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, resolved.Vision, sessionManager.Store) + tools := builtinTools(resolved.Dangerous, nil, approver, resolved.MaxConcurrency, resolved.APIKey, toolConfig{Transcription: resolved.Transcription, Vision: resolved.Vision, WebSearch: resolved.WebSearch}, sessionManager.Store) modelLabel := odek.ProfileLabel(resolved.Model) if modelLabel == "" { diff --git a/cmd/odek/web_search_tool.go b/cmd/odek/web_search_tool.go new file mode 100644 index 0000000..d4b6669 --- /dev/null +++ b/cmd/odek/web_search_tool.go @@ -0,0 +1,227 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/BackendStack21/odek" + "github.com/BackendStack21/odek/internal/config" + "github.com/BackendStack21/odek/internal/danger" +) + +// maxSearXNGBody caps the response body read from SearXNG (defensive — a +// metasearch JSON payload is small; this guards against a misbehaving backend). +const maxSearXNGBody = 4 << 20 // 4 MiB + +// ═════════════════════════════════════════════════════════════════════════ +// web_search Tool (SearXNG JSON API backend) +// ═════════════════════════════════════════════════════════════════════════ + +type webSearchTool struct { + dangerousConfig danger.DangerousConfig + cfg config.WebSearchConfig + client *http.Client +} + +func newWebSearchTool(dc danger.DangerousConfig, cfg config.WebSearchConfig) *webSearchTool { + timeout := cfg.Timeout + if timeout <= 0 { + timeout = 15 + } + return &webSearchTool{ + dangerousConfig: dc, + cfg: cfg, + client: &http.Client{Timeout: time.Duration(timeout) * time.Second}, + } +} + +func (t *webSearchTool) Name() string { return "web_search" } + +func (t *webSearchTool) Description() string { + return `Search the web via a self-hosted SearXNG metasearch instance. Returns ranked results (title, url, snippet, engine) plus any direct answers. Use this to find pages, then fetch the most relevant URLs with the browser or http_batch tools. Results come from external search engines and are treated as untrusted content.` +} + +type webSearchArgs struct { + Query string `json:"query"` + Category string `json:"category,omitempty"` + MaxResults int `json:"max_results,omitempty"` +} + +func (t *webSearchTool) Schema() any { + return map[string]any{ + "type": "object", + "properties": map[string]any{ + "query": map[string]any{ + "type": "string", + "description": "The search query.", + }, + "category": map[string]any{ + "type": "string", + "description": "Optional SearXNG category to restrict the search (e.g. \"general\", \"news\", \"science\", \"it\"). Defaults to the instance configuration.", + }, + "max_results": map[string]any{ + "type": "integer", + "description": "Optional cap on the number of results returned. Defaults to the configured maximum.", + }, + }, + "required": []string{"query"}, + } +} + +// searxngResponse models the subset of the SearXNG JSON API we surface. +type searxngResponse struct { + Query string `json:"query"` + Results []struct { + Title string `json:"title"` + URL string `json:"url"` + Content string `json:"content"` + Engine string `json:"engine"` + } `json:"results"` + Answers []json.RawMessage `json:"answers"` + Infoboxes []json.RawMessage `json:"infoboxes"` + Suggestions []string `json:"suggestions"` +} + +type webSearchResult struct { + Title string `json:"title"` + URL string `json:"url"` + Snippet string `json:"snippet,omitempty"` + Engine string `json:"engine,omitempty"` +} + +type webSearchOutput struct { + Query string `json:"query"` + Results []webSearchResult `json:"results"` + Answers []string `json:"answers,omitempty"` + Count int `json:"count"` + Error string `json:"error,omitempty"` +} + +func (t *webSearchTool) Call(argsJSON string) (result string, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("web_search: panic: %v", r) + result = `{"error":"internal error"}` + } + }() + + var args webSearchArgs + if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { + return jsonError("invalid arguments: " + err.Error()) + } + query := strings.TrimSpace(args.Query) + if query == "" { + return jsonError("query is required") + } + if t.cfg.BaseURL == "" { + return jsonError("web_search is not configured: set web_search.base_url to a SearXNG instance") + } + + // Security: a web search ultimately fans out to external search engines and + // leaks the query terms beyond the trust boundary, so gate it as network + // egress — consistent with the browser/http_batch tools. The backend URL is + // fixed config (not agent-controlled), so there is no SSRF surface here. + if err := t.dangerousConfig.CheckOperation(danger.ToolOperation{ + Name: "web_search", Resource: query, Risk: danger.NetworkEgress, + }, nil); err != nil { + return jsonError(err.Error()) + } + + maxResults := args.MaxResults + if maxResults <= 0 { + maxResults = t.cfg.MaxResults + } + if maxResults <= 0 { + maxResults = 10 + } + + resp, err := t.query(query, args.Category) + if err != nil { + return jsonResult(webSearchOutput{Query: query, Error: err.Error()}) + } + + out := webSearchOutput{Query: query} + for _, r := range resp.Results { + if len(out.Results) >= maxResults { + break + } + out.Results = append(out.Results, webSearchResult{ + Title: r.Title, + URL: r.URL, + Snippet: r.Content, + Engine: r.Engine, + }) + } + out.Count = len(out.Results) + for _, a := range resp.Answers { + if s := strings.TrimSpace(string(a)); s != "" && s != "null" { + out.Answers = append(out.Answers, strings.Trim(s, `"`)) + } + } + + raw, mErr := json.Marshal(out) + if mErr != nil { + return jsonError("marshal error: " + mErr.Error()) + } + // Results are external web content — wrap so the model distinguishes data + // from instructions (a SERP snippet could carry an injection payload). + return wrapUntrusted("web_search:"+query, string(raw)), nil +} + +// query performs the SearXNG JSON request and decodes the response. +func (t *webSearchTool) query(query, category string) (*searxngResponse, error) { + endpoint, err := url.Parse(strings.TrimRight(t.cfg.BaseURL, "/") + "/search") + if err != nil { + return nil, fmt.Errorf("invalid web_search base_url %q: %v", t.cfg.BaseURL, err) + } + q := endpoint.Query() + q.Set("q", query) + q.Set("format", "json") + if cat := strings.TrimSpace(category); cat != "" { + q.Set("categories", cat) + } else if t.cfg.Categories != "" { + q.Set("categories", t.cfg.Categories) + } + if t.cfg.Language != "" { + q.Set("language", t.cfg.Language) + } + endpoint.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, endpoint.String(), nil) + if err != nil { + return nil, fmt.Errorf("build request: %v", err) + } + req.Header.Set("Accept", "application/json") + + httpResp, err := t.client.Do(req) + if err != nil { + return nil, fmt.Errorf("cannot reach SearXNG at %s — is the service running? (%v)", t.cfg.BaseURL, err) + } + defer httpResp.Body.Close() + + if httpResp.StatusCode == http.StatusForbidden { + return nil, fmt.Errorf("SearXNG returned 403 for format=json — enable the JSON API in settings.yml (search.formats must include \"json\")") + } + if httpResp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("SearXNG returned HTTP %d", httpResp.StatusCode) + } + + body, err := io.ReadAll(io.LimitReader(httpResp.Body, maxSearXNGBody)) + if err != nil { + return nil, fmt.Errorf("read response: %v", err) + } + + var resp searxngResponse + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("decode SearXNG JSON (got %d bytes): %v", len(body), err) + } + return &resp, nil +} + +// Ensure webSearchTool implements odek.Tool +var _ odek.Tool = (*webSearchTool)(nil) diff --git a/cmd/odek/web_search_tool_test.go b/cmd/odek/web_search_tool_test.go new file mode 100644 index 0000000..de330b6 --- /dev/null +++ b/cmd/odek/web_search_tool_test.go @@ -0,0 +1,187 @@ +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/BackendStack21/odek/internal/config" + "github.com/BackendStack21/odek/internal/danger" +) + +// allowAll is a danger config that permits network egress without prompting, +// so the tool's gating doesn't block the hermetic test. +func allowAllDanger() danger.DangerousConfig { + return danger.DangerousConfig{Classes: map[danger.RiskClass]danger.Action{ + danger.NetworkEgress: danger.Allow, + }} +} + +// mockSearXNG returns a test server that serves a canned JSON SERP and records +// the last query it received. +func mockSearXNG(t *testing.T, results int) (*httptest.Server, *string) { + t.Helper() + var lastQuery string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("format") != "json" { + w.WriteHeader(http.StatusForbidden) + return + } + lastQuery = r.URL.Query().Get("q") + resp := map[string]any{"query": lastQuery} + var rs []map[string]string + for i := 0; i < results; i++ { + rs = append(rs, map[string]string{ + "title": "Result " + string(rune('A'+i)), + "url": "https://example.com/" + string(rune('a'+i)), + "content": "snippet text", + "engine": "duckduckgo", + }) + } + resp["results"] = rs + resp["answers"] = []string{"42"} + _ = json.NewEncoder(w).Encode(resp) + })) + t.Cleanup(srv.Close) + return srv, &lastQuery +} + +func decodeWebSearch(t *testing.T, raw string) webSearchOutput { + t.Helper() + // Strip the untrusted_content wrapper to get at the JSON payload. + start := strings.IndexByte(raw, '{') + end := strings.LastIndexByte(raw, '}') + if start < 0 || end < start { + t.Fatalf("no JSON object found in output: %q", raw) + } + var out webSearchOutput + if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil { + t.Fatalf("decode webSearchOutput: %v (raw=%q)", err, raw) + } + return out +} + +func TestWebSearch_HappyPath(t *testing.T) { + srv, lastQuery := mockSearXNG(t, 3) + tool := newWebSearchTool(allowAllDanger(), config.WebSearchConfig{BaseURL: srv.URL, MaxResults: 10}) + + raw, err := tool.Call(`{"query":"golang generics"}`) + if err != nil { + t.Fatalf("Call error: %v", err) + } + if !strings.Contains(raw, "` | | `transcribe` | `transcribe: