Skip to content

Intelligence Module

The intelligence module handles model management and query routing. It provides the HeuristicRouter which selects models based on query characteristics (code detection, math detection, query length, urgency), and a model catalog of well-known local and cloud models with their specifications.

Heuristic Router

HeuristicRouter

HeuristicRouter

HeuristicRouter(available_models: List[str] | None = None, *, default_model: str = '', fallback_model: str = '')

Bases: RouterPolicy

Rule-based model router.

Rules (applied in order): 1. Code detected → prefer model with "code"/"coder" in name 2. Math detected → prefer larger model 3. Short query (<50 chars, no code/math) → prefer smaller/faster model 4. Long/complex query (>500 chars OR reasoning keywords) → prefer larger model 5. High urgency (>0.8) → override to smaller model 6. Default fallback → default_model → fallback_model → first available

Source code in src/openjarvis/learning/router.py
def __init__(
    self,
    available_models: List[str] | None = None,
    *,
    default_model: str = "",
    fallback_model: str = "",
) -> None:
    self._available = available_models or []
    self._default = default_model
    self._fallback = fallback_model

build_routing_context

build_routing_context

build_routing_context(query: str, *, urgency: float = 0.5) -> RoutingContext

Populate a RoutingContext from a raw query string.

Source code in src/openjarvis/learning/router.py
def build_routing_context(query: str, *, urgency: float = 0.5) -> RoutingContext:
    """Populate a ``RoutingContext`` from a raw query string."""
    return RoutingContext(
        query=query,
        query_length=len(query),
        has_code=bool(_CODE_PATTERNS.search(query)),
        has_math=bool(_MATH_PATTERNS.search(query)),
        urgency=urgency,
    )

Model Catalog

Built-in catalog of well-known model specifications, including local models (Qwen, Llama, Mistral, DeepSeek) and cloud models (OpenAI, Anthropic, Google).

BUILTIN_MODELS

BUILTIN_MODELS module-attribute

BUILTIN_MODELS: List[ModelSpec] = [ModelSpec(model_id='qwen3:8b', name='Qwen3 8B', parameter_count_b=8.2, context_length=32768, supported_engines=('vllm', 'ollama', 'llamacpp', 'sglang'), provider='alibaba', metadata={'architecture': 'dense'}), ModelSpec(model_id='qwen3:32b', name='Qwen3 32B', parameter_count_b=32.0, context_length=32768, min_vram_gb=20.0, supported_engines=('ollama', 'vllm'), provider='alibaba', metadata={'architecture': 'dense'}), ModelSpec(model_id='llama3.3:70b', name='Llama 3.3 70B', parameter_count_b=70.0, context_length=131072, min_vram_gb=40.0, supported_engines=('ollama', 'vllm'), provider='meta', metadata={'architecture': 'dense'}), ModelSpec(model_id='llama3.2:3b', name='Llama 3.2 3B', parameter_count_b=3.0, context_length=131072, supported_engines=('ollama', 'vllm', 'llamacpp'), provider='meta', metadata={'architecture': 'dense'}), ModelSpec(model_id='deepseek-coder-v2:16b', name='DeepSeek Coder V2 16B', parameter_count_b=16.0, context_length=131072, supported_engines=('ollama', 'vllm'), provider='deepseek', metadata={'architecture': 'dense'}), ModelSpec(model_id='mistral:7b', name='Mistral 7B', parameter_count_b=7.0, context_length=32768, supported_engines=('ollama', 'vllm', 'llamacpp'), provider='mistral', metadata={'architecture': 'dense'}), ModelSpec(model_id='gpt-oss:120b', name='GPT-OSS 120B', parameter_count_b=117.0, active_parameter_count_b=5.1, context_length=131072, min_vram_gb=12.0, supported_engines=('vllm', 'ollama'), provider='open-source', metadata={'architecture': 'moe'}), ModelSpec(model_id='glm-4.7-flash', name='GLM 4.7 Flash', parameter_count_b=30.0, active_parameter_count_b=3.0, context_length=131072, min_vram_gb=8.0, supported_engines=('vllm', 'sglang', 'llamacpp'), provider='zhipu', metadata={'architecture': 'moe'}), ModelSpec(model_id='trinity-mini', name='Trinity Mini', parameter_count_b=26.0, active_parameter_count_b=3.0, context_length=128000, min_vram_gb=8.0, supported_engines=('vllm', 'llamacpp'), provider='trinity', metadata={'architecture': 'moe'}), ModelSpec(model_id='teichai/glm-4.7-flash-opus-distill', name='GLM 4.7 Flash Claude Opus 4.5 Distill', parameter_count_b=30.0, active_parameter_count_b=3.0, context_length=131072, min_vram_gb=8.0, supported_engines=('vllm', 'llamacpp'), provider='teichai', metadata={'architecture': 'moe', 'hf_repo': 'TeichAI/GLM-4.7-Flash-Claude-Opus-4.5-High-Reasoning-Distill-GGUF', 'teacher': 'Claude Opus 4.5', 'quantization': 'GGUF Q4_K_M / Q8_0', 'license': 'apache-2.0'}), ModelSpec(model_id='teichai/qwen3-14b-gpt5.2-distill', name='Qwen3 14B GPT-5.2 Distill', parameter_count_b=14.8, context_length=32768, min_vram_gb=10.0, supported_engines=('vllm', 'llamacpp'), provider='teichai', metadata={'architecture': 'dense', 'hf_repo': 'TeichAI/Qwen3-14B-GPT-5.2-Distill-GGUF', 'teacher': 'GPT-5.2', 'quantization': 'GGUF Q4_K_M / Q8_0', 'license': 'apache-2.0'}), ModelSpec(model_id='teichai/nemotron-14b-opus-distill', name='Nemotron Cascade 14B Claude Opus Distill', parameter_count_b=14.8, context_length=32768, min_vram_gb=10.0, supported_engines=('vllm', 'llamacpp'), provider='teichai', metadata={'architecture': 'dense', 'hf_repo': 'TeichAI/Nemotron-Cascade-14B-Claude-Opus-Distill-GGUF', 'teacher': 'Claude 4.5 Opus', 'quantization': 'GGUF Q4_K_M / Q8_0', 'license': 'apache-2.0'}), ModelSpec(model_id='gpt-4o', name='GPT-4o', parameter_count_b=0.0, context_length=128000, supported_engines=('cloud',), provider='openai', requires_api_key=True, metadata={'pricing_input': 2.5, 'pricing_output': 10.0}), ModelSpec(model_id='gpt-4o-mini', name='GPT-4o Mini', parameter_count_b=0.0, context_length=128000, supported_engines=('cloud',), provider='openai', requires_api_key=True, metadata={'pricing_input': 0.15, 'pricing_output': 0.6}), ModelSpec(model_id='gpt-5-mini', name='GPT-5 Mini', parameter_count_b=0.0, context_length=400000, supported_engines=('cloud',), provider='openai', requires_api_key=True, metadata={'pricing_input': 0.25, 'pricing_output': 2.0}), ModelSpec(model_id='gpt-5-mini-2025-08-07', name='GPT-5 Mini (2025-08-07)', parameter_count_b=0.0, context_length=400000, supported_engines=('cloud',), provider='openai', requires_api_key=True, metadata={'pricing_input': 0.25, 'pricing_output': 2.0}), ModelSpec(model_id='claude-sonnet-4-20250514', name='Claude Sonnet 4', parameter_count_b=0.0, context_length=200000, supported_engines=('cloud',), provider='anthropic', requires_api_key=True, metadata={'pricing_input': 3.0, 'pricing_output': 15.0}), ModelSpec(model_id='claude-opus-4-20250514', name='Claude Opus 4', parameter_count_b=0.0, context_length=200000, supported_engines=('cloud',), provider='anthropic', requires_api_key=True, metadata={'pricing_input': 15.0, 'pricing_output': 75.0}), ModelSpec(model_id='claude-opus-4-6', name='Claude Opus 4.6', parameter_count_b=0.0, context_length=200000, supported_engines=('cloud',), provider='anthropic', requires_api_key=True, metadata={'pricing_input': 5.0, 'pricing_output': 25.0}), ModelSpec(model_id='claude-sonnet-4-6', name='Claude Sonnet 4.6', parameter_count_b=0.0, context_length=200000, supported_engines=('cloud',), provider='anthropic', requires_api_key=True, metadata={'pricing_input': 3.0, 'pricing_output': 15.0}), ModelSpec(model_id='claude-haiku-4-5', name='Claude Haiku 4.5', parameter_count_b=0.0, context_length=200000, supported_engines=('cloud',), provider='anthropic', requires_api_key=True, metadata={'pricing_input': 1.0, 'pricing_output': 5.0}), ModelSpec(model_id='gemini-2.5-pro', name='Gemini 2.5 Pro', parameter_count_b=0.0, context_length=1000000, supported_engines=('cloud',), provider='google', requires_api_key=True, metadata={'pricing_input': 1.25, 'pricing_output': 10.0}), ModelSpec(model_id='gemini-2.5-flash', name='Gemini 2.5 Flash', parameter_count_b=0.0, context_length=1000000, supported_engines=('cloud',), provider='google', requires_api_key=True, metadata={'pricing_input': 0.3, 'pricing_output': 2.5}), ModelSpec(model_id='gemini-3-pro', name='Gemini 3 Pro', parameter_count_b=0.0, context_length=1000000, supported_engines=('cloud',), provider='google', requires_api_key=True, metadata={'pricing_input': 2.0, 'pricing_output': 12.0}), ModelSpec(model_id='gemini-3-flash', name='Gemini 3 Flash', parameter_count_b=0.0, context_length=1000000, supported_engines=('cloud',), provider='google', requires_api_key=True, metadata={'pricing_input': 0.5, 'pricing_output': 3.0})]

register_builtin_models

register_builtin_models

register_builtin_models() -> None

Populate ModelRegistry with well-known models.

Source code in src/openjarvis/intelligence/model_catalog.py
def register_builtin_models() -> None:
    """Populate ``ModelRegistry`` with well-known models."""
    for spec in BUILTIN_MODELS:
        if not ModelRegistry.contains(spec.model_id):
            ModelRegistry.register_value(spec.model_id, spec)

merge_discovered_models

merge_discovered_models

merge_discovered_models(engine_key: str, model_ids: List[str]) -> None

Create minimal ModelSpec entries for models not already in the registry.

Source code in src/openjarvis/intelligence/model_catalog.py
def merge_discovered_models(engine_key: str, model_ids: List[str]) -> None:
    """Create minimal ``ModelSpec`` entries for models not already in the registry."""
    for model_id in model_ids:
        if not ModelRegistry.contains(model_id):
            spec = ModelSpec(
                model_id=model_id,
                name=model_id,
                parameter_count_b=0.0,
                context_length=0,
                supported_engines=(engine_key,),
            )
            ModelRegistry.register_value(model_id, spec)