Source code for honegumi_rag_assistant.nodes.reviewer

"""
Node: Reviewer agent.

The reviewer acts as a quality control gate that can either approve the
generated code or send it back for revision. It uses LLM-based code review
to check correctness, completeness, and adherence to Ax Platform best practices.

The reviewer can send code back for revision up to 2 times with specific
feedback. After the maximum number of revisions, it will approve the code
regardless of quality to prevent infinite loops.
"""

from __future__ import annotations

from typing import Dict, Any, Literal
from pydantic import BaseModel, Field

from langchain_openai import ChatOpenAI

from ..states import HonegumiRAGState
from ..app_config import settings
from ..timing_utils import time_node


[docs] class ReviewDecision(BaseModel): """Decision made by the Reviewer Agent.""" action: Literal["approve", "revise"] = Field( description=( "Choose 'approve' if the code is correct, complete, and ready to use. " "Choose 'revise' if the code has issues that need to be fixed by the Code Writer." ) ) feedback: str | None = Field( default=None, description=( "If action is 'revise', provide specific, actionable feedback about what " "needs to be fixed. Be precise about the issues (e.g., 'Missing parameter bounds', " "'Objective function returns wrong type'). If action is 'approve', set to None." ) )
[docs] class ReviewerAgent: """Perform LLM-based review and decide whether to approve or request revision."""
[docs] @staticmethod @time_node("Reviewer Agent") def review_code(state: HonegumiRAGState) -> Dict[str, Any]: """Review the candidate code and decide to approve or revise. Parameters ---------- state : HonegumiRAGState The current pipeline state containing ``candidate_code``, ``review_count``, and other context. Returns ------- Dict[str, Any] Either: - {"final_code": str} if approved - {"critique_report": List[str], "review_count": int} if revision needed """ code = state.get("candidate_code") or "" review_count = state.get("review_count", 0) problem = state.get("problem", "") bo_params = state.get("bo_params", {}) skeleton = state.get("skeleton_code", "") # Basic sanity check if not code.strip(): return {"final_code": "# Error: Generated code is empty\npass"} # If we've already revised twice, approve regardless of quality if review_count >= 2: return { "final_code": code, "critique_report": [f"Approved after {review_count} revision attempts (max reached)."], } if not settings.openai_api_key: # Without API key, do simple checks and approve lower = code.lower() if "notimplementederror" in lower or "todo" in lower: if review_count < 2: return { "critique_report": ["Code contains placeholders (TODO/NotImplementedError). Remove them."], "review_count": review_count + 1, } return {"final_code": code} # Build review prompt param_str = "\n".join([f"{k}: {v}" for k, v in bo_params.items()]) if not settings.debug: print("Reviewing the generated code for errors...") review_prompt = f"""You are an expert code reviewer specializing in Bayesian optimization with the Ax Platform. **YOUR TASK:** Review the generated Python script for correctness, completeness, and adherence to best practices. **PROBLEM DESCRIPTION:** {problem} **OPTIMIZATION PARAMETERS:** {param_str} **HONEGUMI SKELETON (expected structure):** {skeleton[:800]}... (truncated) **GENERATED CODE TO REVIEW:** {code} **REVIEW CRITERIA:** 1. **Correctness**: Does the code correctly implement the problem requirements? 2. **Completeness**: Are all necessary components filled in (objective function, parameters, constraints)? 3. **Ax API Usage**: Does it use Ax Platform APIs correctly? 4. **Structure Preservation**: Does it maintain the Honegumi skeleton structure? 5. **No Placeholders**: No TODO, NotImplementedError, or placeholder comments? 6. **Executability**: Would this code run without errors? **YOUR DECISION:** - Choose 'approve' if the code meets all criteria and is ready to use - Choose 'revise' if there are issues that need fixing (be specific about what's wrong) Keep in mind: This is review attempt {review_count + 1}/3. Be thorough but fair. """ try: # Use LangChain's ChatOpenAI for LangSmith tracing llm = ChatOpenAI( model=settings.reviewer_model, api_key=settings.openai_api_key, ) # Use structured output with Pydantic model structured_llm = llm.with_structured_output( ReviewDecision, method="function_calling", include_raw=False, ) decision: ReviewDecision = structured_llm.invoke([ {"role": "system", "content": "You are an expert code reviewer specializing in Bayesian optimization and the Ax Platform."}, {"role": "user", "content": review_prompt} ]) if settings.debug: # DEBUG: Print Reviewer's decision print("\n" + "="*80) print(f"DEBUG: REVIEWER DECISION (Review Attempt {review_count + 1}/3)") print("="*80) print(f"Action: {decision.action}") if decision.action == "revise": print(f"Feedback: {decision.feedback or '(No specific feedback provided)'}") print(f"Review count after this: {review_count + 1}/2") else: print("Status: Code approved!") print("="*80 + "\n") if decision.action == "approve": if not settings.debug: print("Code approved!\n") return { "final_code": code, "critique_report": [f"Code approved by reviewer on attempt {review_count + 1}."], } else: # Send back for revision with retrieval_count reset to 0 # This allows Code Writer to retrieve again with fresh context feedback = decision.feedback or "Code needs improvement (no specific feedback provided)." if settings.debug: print(f"Sending code back to Code Writer for revision...\n") print(f"DEBUG: Resetting retrieval_count to 0 (contexts preserved)\n") print(f"DEBUG: Reviewer returning critique_report, review_count, retrieval_count\n") print(f"DEBUG: NOT returning contexts key - LangGraph should preserve existing\n") else: print("Requesting code revision...") return { "critique_report": [feedback], "review_count": review_count + 1, "retrieval_count": 0, # Reset counter but keep contexts } except Exception as exc: # On error, approve the code to avoid blocking pipeline return { "final_code": code, "critique_report": [f"Review error: {exc}. Approving by default."], }