Skip to content

Engine Module

The engine module implements the inference runtime pillar. All backends implement the InferenceEngine ABC with generate(), stream(), list_models(), and health() methods. The discovery subsystem probes running engines and selects the best available backend based on configuration and health checks.

Abstract Base Class

InferenceEngine

InferenceEngine

Bases: ABC

Base class for all inference engine backends.

Subclasses must be registered via @EngineRegistry.register("name") to become discoverable.

Functions

generate abstractmethod

generate(messages: Sequence[Message], *, model: str, temperature: float = 0.7, max_tokens: int = 1024, **kwargs: Any) -> Dict[str, Any]

Synchronous completion — returns a dict with content and usage.

Source code in src/openjarvis/engine/_stubs.py
@abstractmethod
def generate(
    self,
    messages: Sequence[Message],
    *,
    model: str,
    temperature: float = 0.7,
    max_tokens: int = 1024,
    **kwargs: Any,
) -> Dict[str, Any]:
    """Synchronous completion — returns a dict with ``content`` and ``usage``."""

stream abstractmethod async

stream(messages: Sequence[Message], *, model: str, temperature: float = 0.7, max_tokens: int = 1024, **kwargs: Any) -> AsyncIterator[str]

Yield token strings as they are generated.

Source code in src/openjarvis/engine/_stubs.py
@abstractmethod
async def stream(
    self,
    messages: Sequence[Message],
    *,
    model: str,
    temperature: float = 0.7,
    max_tokens: int = 1024,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Yield token strings as they are generated."""
    # NOTE: must contain a yield to satisfy the type checker
    yield ""  # pragma: no cover

list_models abstractmethod

list_models() -> List[str]

Return identifiers of models available on this engine.

Source code in src/openjarvis/engine/_stubs.py
@abstractmethod
def list_models(self) -> List[str]:
    """Return identifiers of models available on this engine."""

health abstractmethod

health() -> bool

Return True when the engine is reachable and healthy.

Source code in src/openjarvis/engine/_stubs.py
@abstractmethod
def health(self) -> bool:
    """Return ``True`` when the engine is reachable and healthy."""

close

close() -> None

Release resources (HTTP clients, connections, threads, etc.).

Source code in src/openjarvis/engine/_stubs.py
def close(self) -> None:
    """Release resources (HTTP clients, connections, threads, etc.)."""

prepare

prepare(model: str) -> None

Optional warm-up hook called before the first request.

Source code in src/openjarvis/engine/_stubs.py
def prepare(self, model: str) -> None:
    """Optional warm-up hook called before the first request."""

EngineConnectionError

EngineConnectionError

Bases: Exception

Raised when an engine is unreachable.

messages_to_dicts

messages_to_dicts

messages_to_dicts(messages: Sequence[Message]) -> List[Dict[str, Any]]

Convert Message objects to OpenAI-format dicts.

Source code in src/openjarvis/engine/_base.py
def messages_to_dicts(messages: Sequence[Message]) -> List[Dict[str, Any]]:
    """Convert ``Message`` objects to OpenAI-format dicts."""
    out: List[Dict[str, Any]] = []
    for m in messages:
        d: Dict[str, Any] = {"role": m.role.value, "content": m.content}
        if m.name:
            d["name"] = m.name
        if m.tool_calls:
            d["tool_calls"] = [
                {
                    "id": tc.id,
                    "type": "function",
                    "function": {
                        "name": tc.name,
                        "arguments": tc.arguments,
                    },
                }
                for tc in m.tool_calls
            ]
        if m.tool_call_id:
            d["tool_call_id"] = m.tool_call_id
        out.append(d)
    return out

Engine Implementations

OllamaEngine

OllamaEngine

OllamaEngine(host: str = 'http://localhost:11434', *, timeout: float = 120.0)

Bases: InferenceEngine

Ollama backend via its native HTTP API.

Source code in src/openjarvis/engine/ollama.py
def __init__(
    self,
    host: str = "http://localhost:11434",
    *,
    timeout: float = 120.0,
) -> None:
    self._host = host.rstrip("/")
    self._client = httpx.Client(base_url=self._host, timeout=timeout)

VLLMEngine

VLLMEngine

VLLMEngine(host: str | None = None, *, timeout: float = 120.0)

Bases: _OpenAICompatibleEngine

vLLM backend — thin wrapper over the shared OpenAI-compatible base.

Source code in src/openjarvis/engine/_openai_compat.py
def __init__(self, host: str | None = None, *, timeout: float = 120.0) -> None:
    self._host = (host or self._default_host).rstrip("/")
    self._client = httpx.Client(base_url=self._host, timeout=timeout)

LlamaCppEngine

LlamaCppEngine

LlamaCppEngine(host: str | None = None, *, timeout: float = 120.0)

Bases: _OpenAICompatibleEngine

llama.cpp server — OpenAI-compatible base.

Source code in src/openjarvis/engine/_openai_compat.py
def __init__(self, host: str | None = None, *, timeout: float = 120.0) -> None:
    self._host = (host or self._default_host).rstrip("/")
    self._client = httpx.Client(base_url=self._host, timeout=timeout)

SGLangEngine

SGLangEngine

SGLangEngine(host: str | None = None, *, timeout: float = 120.0)

Bases: _OpenAICompatibleEngine

SGLang backend — thin wrapper over the shared OpenAI-compatible base.

Source code in src/openjarvis/engine/_openai_compat.py
def __init__(self, host: str | None = None, *, timeout: float = 120.0) -> None:
    self._host = (host or self._default_host).rstrip("/")
    self._client = httpx.Client(base_url=self._host, timeout=timeout)

CloudEngine

CloudEngine

CloudEngine()

Bases: InferenceEngine

Cloud inference via OpenAI, Anthropic, and Google SDKs.

Source code in src/openjarvis/engine/cloud.py
def __init__(self) -> None:
    self._openai_client: Any = None
    self._anthropic_client: Any = None
    self._google_client: Any = None
    self._init_clients()

estimate_cost

estimate_cost

estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float

Estimate USD cost based on the hardcoded pricing table.

Source code in src/openjarvis/engine/cloud.py
def estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
    """Estimate USD cost based on the hardcoded pricing table."""
    # Try exact match first, then prefix match
    prices = PRICING.get(model)
    if prices is None:
        for key, val in PRICING.items():
            if model.startswith(key):
                prices = val
                break
    if prices is None:
        return 0.0
    input_cost = (prompt_tokens / 1_000_000) * prices[0]
    output_cost = (completion_tokens / 1_000_000) * prices[1]
    return input_cost + output_cost

Engine Discovery

Functions for probing running engines, aggregating available models, and selecting the best engine for a given configuration.

get_engine

get_engine

get_engine(config: JarvisConfig, engine_key: str | None = None) -> Tuple[str, InferenceEngine] | None

Get a specific engine by key, or the default with fallback.

Returns (key, engine_instance) or None if no engine is available.

Source code in src/openjarvis/engine/_discovery.py
def get_engine(
    config: JarvisConfig, engine_key: str | None = None
) -> Tuple[str, InferenceEngine] | None:
    """Get a specific engine by key, or the default with fallback.

    Returns ``(key, engine_instance)`` or ``None`` if no engine is available.
    """
    if engine_key:
        if EngineRegistry.contains(engine_key):
            try:
                engine = _make_engine(engine_key, config)
                if engine.health():
                    return (engine_key, engine)
            except Exception:
                pass
        return None

    # Try default first
    default_key = config.engine.default
    if EngineRegistry.contains(default_key):
        try:
            engine = _make_engine(default_key, config)
            if engine.health():
                return (default_key, engine)
        except Exception:
            pass

    # Fallback to any healthy engine
    healthy = discover_engines(config)
    return healthy[0] if healthy else None

discover_engines

discover_engines

discover_engines(config: JarvisConfig) -> List[Tuple[str, InferenceEngine]]

Probe registered engines and return [(key, instance)] for healthy ones.

Results are sorted with the config default engine first.

Source code in src/openjarvis/engine/_discovery.py
def discover_engines(config: JarvisConfig) -> List[Tuple[str, InferenceEngine]]:
    """Probe registered engines and return ``[(key, instance)]`` for healthy ones.

    Results are sorted with the config default engine first.
    """
    healthy: List[Tuple[str, InferenceEngine]] = []
    for key in EngineRegistry.keys():
        try:
            engine = _make_engine(key, config)
            if engine.health():
                healthy.append((key, engine))
        except Exception:
            continue

    default_key = config.engine.default

    def sort_key(item: Tuple[str, Any]) -> Tuple[int, str]:
        return (0 if item[0] == default_key else 1, item[0])

    healthy.sort(key=sort_key)
    return healthy

discover_models

discover_models

discover_models(engines: List[Tuple[str, InferenceEngine]]) -> Dict[str, List[str]]

Call list_models() on each engine and return a dict.

Source code in src/openjarvis/engine/_discovery.py
def discover_models(
    engines: List[Tuple[str, InferenceEngine]],
) -> Dict[str, List[str]]:
    """Call ``list_models()`` on each engine and return a dict."""
    result: Dict[str, List[str]] = {}
    for key, engine in engines:
        try:
            result[key] = engine.list_models()
        except Exception:
            result[key] = []
    return result