Source code for honegumi_rag_assistant.states

"""
Shared pipeline state definitions for the Honegumi RAG Assistant.

This module defines a single :class:`TypedDict` describing the state
passed between nodes in the LangGraph.  Using a strongly typed state
helps catch errors at wiring time and makes it explicit which keys are
available to each node.  Whenever a node returns a dictionary, the
returned keys must correspond to entries on this type.

Fields marked with :class:`typing_extensions.Annotated` and
``operator.add`` will be automatically concatenated across multiple
calls to the same node.  This mechanism is used to aggregate
document contexts, critique reports and issue evidence as the agent
loops through retries.
"""

from typing import TypedDict, Optional, Dict, Any, List
from typing_extensions import Annotated
import operator


[docs] def merge_contexts(left: List[Dict[str, Any]] | None, right: List[Dict[str, Any]] | None) -> List[Dict[str, Any]]: """Custom reducer for contexts that accumulates contexts from parallel retrievers. For parallel retrieval with Send API, we need to ACCUMULATE contexts from all parallel branches. This reducer concatenates left and right lists. This differs from operator.add in that it handles None values gracefully. """ result = [] if left is not None: result.extend(left) if right is not None: result.extend(right) return result
[docs] def merge_vectorstore_flag(left: bool | None, right: bool | None) -> bool: """Custom reducer for vectorstore_missing flag. If ANY parallel retriever reports missing vector store, the flag should be True. This is an OR operation: True if either left or right is True. """ return bool(left or right)
[docs] class HonegumiRAGState(TypedDict): """State dictionary propagated through the LangGraph pipeline. The keys in this dictionary mirror the high‑level stages of the agentic workflow. Nodes mutate the state by returning a subset of these keys in their output dictionaries; the LangGraph runtime then merges the outputs back into the global state. Keys annotated with ``operator.add`` are concatenated across iterations allowing the pipeline to accumulate evidence and critiques. Attributes ---------- problem : str The original natural language description of the user’s optimisation problem. This value is set at the beginning of the pipeline and never modified. bo_params : Optional[Dict[str, Any]] A dictionary of optimisation parameters returned by the :class:`ParameterSelector` node. It contains choices such as ``objective``, ``model``, and constraint flags. If the parameter selection fails this field may remain ``None``. problem_structure : Optional[Dict[str, Any]] The extracted problem structure from Stage 1 of parameter selection. Contains search_space, objectives, constraints, and experimental setup details. This is used as input for Stage 2 grid parameter selection. skeleton_code : Optional[str] The raw Python code skeleton generated by Honegumi. This skeleton contains the structure of the Ax optimisation workflow but lacks problem‑specific logic. template_metadata : Optional[Dict[str, Any]] Additional metadata about the generated skeleton, such as template names, version information or hashes. Not used by downstream nodes but exposed for observability. contexts : List[Dict[str, Any]] A list of context snippets retrieved from the Ax documentation. Each entry in the list should contain at least a ``text`` field with the content and may include ``source``, ``url`` or other metadata. The retriever manually accumulates contexts across multiple retrievals within a single code generation cycle. candidate_code : Optional[str] A provisional version of the final Python script produced by the :class:`CodeWriterAgent`. This field is overwritten on each iteration of code writing. critique_report : Annotated[List[str], operator.add] A list of critiques or observations produced by the code writer or reviewer. The list is extended on each pass allowing successive agents to append feedback for debugging. confidence : Optional[float] A numeric confidence score (0.0–1.0) associated with the generated code. Downstream nodes may use this value to decide whether to trigger a fallback or attempt a repair. final_code : Optional[str] The final, approved Python script ready to be delivered to the user. This value is set by the :class:`ReviewerAgent`. error : Optional[str] An error message set by any node if unrecoverable problems are encountered. The presence of an error should cause the pipeline to abort early in production deployments. retrieval_count : int Counter tracking how many times the retrieval agent has been invoked during the current code writing iteration. Limited to a maximum of 3 to prevent infinite loops. review_count : int Counter tracking how many times the code has been sent back for revision by the reviewer. Limited to a maximum of 2 to prevent infinite revision loops. retrieval_query : Optional[str] The specific question generated by the CodeWriterAgent to query the Ax documentation. This is used by the RetrieverAgent to perform targeted searches. """ problem: str bo_params: Optional[Dict[str, Any]] problem_structure: Optional[Dict[str, Any]] skeleton_code: Optional[str] template_metadata: Optional[Dict[str, Any]] contexts: Annotated[List[Dict[str, Any]], merge_contexts] # Custom reducer for proper replacement candidate_code: Optional[str] critique_report: Annotated[List[str], operator.add] confidence: Optional[float] final_code: Optional[str] error: Optional[str] retrieval_count: int review_count: int retrieval_query: Optional[str] retrieval_queries: List[str] # List of queries for parallel retrieval vectorstore_missing: Annotated[Optional[bool], merge_vectorstore_flag] # Flag to track if vector store is unavailable