Source code for honegumi_rag_assistant.states

"""
Shared pipeline state definitions for the Honegumi RAG Assistant.

This module defines a single :class:`TypedDict` describing the state
passed between nodes in the LangGraph.  Using a strongly typed state
helps catch errors at wiring time and makes it explicit which keys are
available to each node.  Whenever a node returns a dictionary, the
returned keys must correspond to entries on this type.

Fields marked with :class:`typing_extensions.Annotated` and
``operator.add`` will be automatically concatenated across multiple
calls to the same node.  This mechanism is used to aggregate
document contexts, critique reports and issue evidence as the agent
loops through retries.
"""

from typing import TypedDict, Optional, Dict, Any, List
from typing_extensions import Annotated
import operator



[docs]
def merge_contexts(left: List[Dict[str, Any]] | None, right: List[Dict[str, Any]] | None) -> List[Dict[str, Any]]:
    """Custom reducer for contexts that accumulates contexts from parallel retrievers.
    
    For parallel retrieval with Send API, we need to ACCUMULATE contexts from all
    parallel branches. This reducer concatenates left and right lists.
    
    This differs from operator.add in that it handles None values gracefully.
    """
    result = []
    if left is not None:
        result.extend(left)
    if right is not None:
        result.extend(right)
    return result




[docs]
def merge_vectorstore_flag(left: bool | None, right: bool | None) -> bool:
    """Custom reducer for vectorstore_missing flag.
    
    If ANY parallel retriever reports missing vector store, the flag should be True.
    This is an OR operation: True if either left or right is True.
    """
    return bool(left or right)




[docs]
class HonegumiRAGState(TypedDict):
    """State dictionary propagated through the LangGraph pipeline.

    The keys in this dictionary mirror the high‑level stages of the
    agentic workflow.  Nodes mutate the state by returning a subset of
    these keys in their output dictionaries; the LangGraph runtime then
    merges the outputs back into the global state.  Keys annotated with
    ``operator.add`` are concatenated across iterations allowing the
    pipeline to accumulate evidence and critiques.

    Attributes
    ----------
    problem : str
        The original natural language description of the user’s
        optimisation problem.  This value is set at the beginning of
        the pipeline and never modified.
    bo_params : Optional[Dict[str, Any]]
        A dictionary of optimisation parameters returned by the
        :class:`ParameterSelector` node.  It contains choices such as
        ``objective``, ``model``, and constraint flags.  If the
        parameter selection fails this field may remain ``None``.
    problem_structure : Optional[Dict[str, Any]]
        The extracted problem structure from Stage 1 of parameter selection.
        Contains search_space, objectives, constraints, and experimental setup
        details. This is used as input for Stage 2 grid parameter selection.
    skeleton_code : Optional[str]
        The raw Python code skeleton generated by Honegumi.  This
        skeleton contains the structure of the Ax optimisation workflow
        but lacks problem‑specific logic.
    template_metadata : Optional[Dict[str, Any]]
        Additional metadata about the generated skeleton, such as
        template names, version information or hashes.  Not used by
        downstream nodes but exposed for observability.
    contexts : List[Dict[str, Any]]
        A list of context snippets retrieved from the Ax documentation.
        Each entry in the list should contain at least a ``text`` field
        with the content and may include ``source``, ``url`` or other
        metadata.  The retriever manually accumulates contexts across
        multiple retrievals within a single code generation cycle.
    candidate_code : Optional[str]
        A provisional version of the final Python script produced by the
        :class:`CodeWriterAgent`.  This field is overwritten on each
        iteration of code writing.
    critique_report : Annotated[List[str], operator.add]
        A list of critiques or observations produced by the code writer
        or reviewer.  The list is extended on each pass allowing
        successive agents to append feedback for debugging.
    confidence : Optional[float]
        A numeric confidence score (0.0–1.0) associated with the
        generated code.  Downstream nodes may use this value to decide
        whether to trigger a fallback or attempt a repair.
    final_code : Optional[str]
        The final, approved Python script ready to be delivered to the
        user.  This value is set by the :class:`ReviewerAgent`.
    error : Optional[str]
        An error message set by any node if unrecoverable problems are
        encountered.  The presence of an error should cause the
        pipeline to abort early in production deployments.
    retrieval_count : int
        Counter tracking how many times the retrieval agent has been
        invoked during the current code writing iteration. Limited to
        a maximum of 3 to prevent infinite loops.
    review_count : int
        Counter tracking how many times the code has been sent back for
        revision by the reviewer. Limited to a maximum of 2 to prevent
        infinite revision loops.
    retrieval_query : Optional[str]
        The specific question generated by the CodeWriterAgent to query
        the Ax documentation. This is used by the RetrieverAgent to
        perform targeted searches.
    """

    problem: str
    bo_params: Optional[Dict[str, Any]]
    problem_structure: Optional[Dict[str, Any]]
    skeleton_code: Optional[str]
    template_metadata: Optional[Dict[str, Any]]
    contexts: Annotated[List[Dict[str, Any]], merge_contexts]  # Custom reducer for proper replacement
    candidate_code: Optional[str]
    critique_report: Annotated[List[str], operator.add]
    confidence: Optional[float]
    final_code: Optional[str]
    error: Optional[str]
    retrieval_count: int
    review_count: int
    retrieval_query: Optional[str]
    retrieval_queries: List[str]  # List of queries for parallel retrieval
    vectorstore_missing: Annotated[Optional[bool], merge_vectorstore_flag]  # Flag to track if vector store is unavailable