"""
Shared pipeline state definitions for the Honegumi RAG Assistant.
This module defines a single :class:`TypedDict` describing the state
passed between nodes in the LangGraph. Using a strongly typed state
helps catch errors at wiring time and makes it explicit which keys are
available to each node. Whenever a node returns a dictionary, the
returned keys must correspond to entries on this type.
Fields marked with :class:`typing_extensions.Annotated` and
``operator.add`` will be automatically concatenated across multiple
calls to the same node. This mechanism is used to aggregate
document contexts, critique reports and issue evidence as the agent
loops through retries.
"""
from typing import TypedDict, Optional, Dict, Any, List
from typing_extensions import Annotated
import operator
[docs]
def merge_contexts(left: List[Dict[str, Any]] | None, right: List[Dict[str, Any]] | None) -> List[Dict[str, Any]]:
"""Custom reducer for contexts that accumulates contexts from parallel retrievers.
For parallel retrieval with Send API, we need to ACCUMULATE contexts from all
parallel branches. This reducer concatenates left and right lists.
This differs from operator.add in that it handles None values gracefully.
"""
result = []
if left is not None:
result.extend(left)
if right is not None:
result.extend(right)
return result
[docs]
def merge_vectorstore_flag(left: bool | None, right: bool | None) -> bool:
"""Custom reducer for vectorstore_missing flag.
If ANY parallel retriever reports missing vector store, the flag should be True.
This is an OR operation: True if either left or right is True.
"""
return bool(left or right)
[docs]
class HonegumiRAGState(TypedDict):
"""State dictionary propagated through the LangGraph pipeline.
The keys in this dictionary mirror the high‑level stages of the
agentic workflow. Nodes mutate the state by returning a subset of
these keys in their output dictionaries; the LangGraph runtime then
merges the outputs back into the global state. Keys annotated with
``operator.add`` are concatenated across iterations allowing the
pipeline to accumulate evidence and critiques.
Attributes
----------
problem : str
The original natural language description of the user’s
optimisation problem. This value is set at the beginning of
the pipeline and never modified.
bo_params : Optional[Dict[str, Any]]
A dictionary of optimisation parameters returned by the
:class:`ParameterSelector` node. It contains choices such as
``objective``, ``model``, and constraint flags. If the
parameter selection fails this field may remain ``None``.
problem_structure : Optional[Dict[str, Any]]
The extracted problem structure from Stage 1 of parameter selection.
Contains search_space, objectives, constraints, and experimental setup
details. This is used as input for Stage 2 grid parameter selection.
skeleton_code : Optional[str]
The raw Python code skeleton generated by Honegumi. This
skeleton contains the structure of the Ax optimisation workflow
but lacks problem‑specific logic.
template_metadata : Optional[Dict[str, Any]]
Additional metadata about the generated skeleton, such as
template names, version information or hashes. Not used by
downstream nodes but exposed for observability.
contexts : List[Dict[str, Any]]
A list of context snippets retrieved from the Ax documentation.
Each entry in the list should contain at least a ``text`` field
with the content and may include ``source``, ``url`` or other
metadata. The retriever manually accumulates contexts across
multiple retrievals within a single code generation cycle.
candidate_code : Optional[str]
A provisional version of the final Python script produced by the
:class:`CodeWriterAgent`. This field is overwritten on each
iteration of code writing.
critique_report : Annotated[List[str], operator.add]
A list of critiques or observations produced by the code writer
or reviewer. The list is extended on each pass allowing
successive agents to append feedback for debugging.
confidence : Optional[float]
A numeric confidence score (0.0–1.0) associated with the
generated code. Downstream nodes may use this value to decide
whether to trigger a fallback or attempt a repair.
final_code : Optional[str]
The final, approved Python script ready to be delivered to the
user. This value is set by the :class:`ReviewerAgent`.
error : Optional[str]
An error message set by any node if unrecoverable problems are
encountered. The presence of an error should cause the
pipeline to abort early in production deployments.
retrieval_count : int
Counter tracking how many times the retrieval agent has been
invoked during the current code writing iteration. Limited to
a maximum of 3 to prevent infinite loops.
review_count : int
Counter tracking how many times the code has been sent back for
revision by the reviewer. Limited to a maximum of 2 to prevent
infinite revision loops.
retrieval_query : Optional[str]
The specific question generated by the CodeWriterAgent to query
the Ax documentation. This is used by the RetrieverAgent to
perform targeted searches.
"""
problem: str
bo_params: Optional[Dict[str, Any]]
problem_structure: Optional[Dict[str, Any]]
skeleton_code: Optional[str]
template_metadata: Optional[Dict[str, Any]]
contexts: Annotated[List[Dict[str, Any]], merge_contexts] # Custom reducer for proper replacement
candidate_code: Optional[str]
critique_report: Annotated[List[str], operator.add]
confidence: Optional[float]
final_code: Optional[str]
error: Optional[str]
retrieval_count: int
review_count: int
retrieval_query: Optional[str]
retrieval_queries: List[str] # List of queries for parallel retrieval
vectorstore_missing: Annotated[Optional[bool], merge_vectorstore_flag] # Flag to track if vector store is unavailable