LLM retries and validations

2026-06-28 18:08:58 +02:00 · 2026-06-28 18:08:58 +02:00 · 5b93040f73
commit 5b93040f73
parent a7e6d5540f
2 changed files with 242 additions and 135 deletions
--- a/tools/engine.py
+++ b/tools/engine.py
@ -804,19 +804,25 @@ class GameEngine:
        elif last_prompt:
            self._append_llm_log(f"Resume from: {last_prompt[:120]}")
-        # ── Phase 1: Prose ────────────────────────────────────────────────
+        # ── Outer loop: Phase 1 (prose) → Phase 2 (summarize) → Phase 3 (extract) ──
        import random
        die_roll = random.randint(1, 6)
        self._append_llm_log(f"Dice: {die_roll} (1d6)")
        if on_action:
            on_action(f"Phase 1/3: writing story (dice={die_roll})")
        if on_debug:
            on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll})
        book_log = None
        changes_block = ""
-        for attempt in range(3):
+        log_entry = None
        user_prompt = self._auto_prompt("")
        ambience = None
        debug_info = ""
        for outer_attempt in range(3):
            # ── Phase 1: Prose ────────────────────────────────────────────
            if on_action:
                on_action(f"Phase 1/3: writing story (dice={die_roll})")
            if on_debug:
                on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll, "outer_attempt": outer_attempt + 1})
            system = PROSE_PROMPT.substitute(
                character=self._read_file(CHAR_PATH) or "*No character sheet.*",
                world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*",
@ -832,14 +838,14 @@ class GameEngine:
            text = self._call_llm([
                {"role": "system", "content": system},
                {"role": "user", "content": user},
-            ], label=f"Prose attempt {attempt + 1}", max_tokens=1024, on_debug=on_debug)
+            ], label=f"Prose attempt {outer_attempt + 1}", max_tokens=1024, on_debug=on_debug)
            if not text or not text.strip():
                if on_debug:
-                    on_debug("phase", {"phase": 1, "status": "empty", "attempt": attempt + 1})
+                    on_debug("phase", {"phase": 1, "status": "empty", "attempt": outer_attempt + 1})
                continue
            raw = text.strip()
            # Split narrative from ### Changes block
            changes_block = ""
            if "### Changes" in raw:
                parts = raw.split("### Changes", 1)
@ -850,137 +856,179 @@ class GameEngine:
            if on_debug:
                preview = book_log[:150].replace("\n", "\\n")
                on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview})
            break
-        if not book_log:
+            # ── Validation ────────────────────────────────────────────────
            return TurnResult(error="Prose generation failed after 3 attempts")
        # ── Phase 2: Summarize ────────────────────────────────────────────
        if on_action:
            on_action("Phase 2/3: summarizing story")
        if on_debug:
            on_debug("phase", {"phase": 2, "name": "summarize", "status": "start"})
        log_context = self._read_recent_log()
        log_entry = None
        for attempt in range(2):
            context = book_log
            if changes_block:
                context += f"\n\n{changes_block}"
            text = self._call_llm([
                {"role": "user", "content":
                    f"Given the session log so far, summarize the new story in one line. "
                    f"Focus on who was involved (character and NPC names):\n\n"
                    f"## Session Log\n{log_context}\n\n"
                    f"## New Story\n{context}"}
            ], label=f"Summarize attempt {attempt + 1}", on_debug=on_debug)
            if text and text.strip():
                log_entry = text.strip().split("\n")[0][:120]
                if on_debug:
                    on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
                break
        if not log_entry:
            log_entry = book_log.split("\n")[0][:120]
            if on_debug:
-                on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry})
+                on_debug("phase", {"phase": 1, "name": "validation", "status": "start"})
-
+            valid, reason = self._validate_narrative(book_log, on_debug=on_debug)
-        # ── Phase 3: Extract state changes ────────────────────────────────
+            if not valid:
        if on_action:
            on_action("Phase 3/3: extracting state changes")
        if on_debug:
            on_debug("phase", {"phase": 3, "name": "extract", "status": "start"})
        user_prompt = self._auto_prompt(book_log)
        ambience = None
        debug_info = ""
        current_char = self._read_file(CHAR_PATH) or "*No character.*"
        current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
        for attempt in range(3):
            phase3_prompt = (
                f"## Current Character\n{current_char}\n\n"
                f"## Current World\n{current_world}\n\n"
                f"## Story\n{book_log}\n\n"
            )
            if changes_block.strip():
                phase3_prompt += (
                    f"## Changes to apply\n{changes_block}\n\n"
                    f"Convert the listed changes into tool calls:\n\n"
                )
            else:
                phase3_prompt += (
                    f"Read the story and compare with current state. Output tool calls for any changes:\n\n"
                )
            phase3_prompt += (
                f"Output ```tool blocks for changes only. Examples:\n\n"
            )
            text = self._call_llm([
                {"role": "user", "content": phase3_prompt +
                    f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"modify_traits\", \"args\": {{\"dex\": 15}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"add_to_inventory\", \"args\": {{\"item\": \"Silver key\"}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"remove_from_inventory\", \"args\": {{\"item\": \"Torches (10)\"}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"replace_gear\", \"args\": {{\"before\": \"Mace (1d6+1)\", \"after\": \"Mace (1d6+2, sharpened)\"}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"add_note\", \"args\": {{\"note\": \"Found a hidden passage under the temple\"}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"replace_note\", \"args\": {{\"before\": \"Old note text\", \"after\": \"New note text\"}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"world_update\", \"args\": {{\"content\": \"# The World\\n\\n...full new world state...\"}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
                    f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
                    f"Only output tools for things that actually changed. Omit unchanged fields."}
            ], label=f"Extract attempt {attempt + 1}", on_debug=on_debug)
            if not text or not text.strip():
                if on_debug:
-                    on_debug("phase", {"phase": 3, "status": "empty", "attempt": attempt + 1})
+                    on_debug("phase", {"phase": 1, "status": "validation_failed", "reason": reason, "outer_attempt": outer_attempt + 1})
                book_log = None
                continue
-            tool_calls = self._extract_tool_calls(
+            # ── Phase 2: Summarize ────────────────────────────────────────
-                text, round_num=attempt + 1, on_debug=on_debug
+            if on_action:
-            )
+                on_action("Phase 2/3: summarizing story")
            if on_debug and tool_calls:
                names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
                fin = any(tc.get("tool") == "finalize_turn" for tc in tool_calls)
                on_debug("phase", {"phase": 3, "status": "tools_found", "tools": names, "has_finalize": fin})
            errors = []
            for tc in tool_calls:
                name = tc.get("tool", "?")
                args = tc.get("args", {})
                if name == "finalize_turn":
                    if args.get("user_prompt"):
                        user_prompt = args["user_prompt"]
                    if args.get("ambience"):
                        ambience = args["ambience"]
                    continue
                if on_action:
                    on_action(f"State: {self._describe_tool_action(name, args)}")
                if on_debug:
                    on_debug("tool_call", {"round": attempt + 1, "tool": name, "args": args})
                if name == "player_roll" and on_player_roll:
                    dice = args.get("dice", "1d6")
                    reason = args.get("reason", "a check")
                    roll_val = on_player_roll(dice, reason)
                    result = f"Player rolled {dice} for '{reason}': {roll_val}"
                else:
                    result = self._execute_tool(name, args)
                if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
                    errors.append(f"{name}: {result}")
                if on_debug:
                    on_debug("tool_result", {"round": attempt + 1, "tool": name, "result": result})
            if not errors:
                if on_debug:
                    on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
                break
            debug_info = "; ".join(errors)
            if on_debug:
-                on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": attempt + 1})
+                on_debug("phase", {"phase": 2, "name": "summarize", "status": "start"})
-        if errors and on_debug:
+            log_context = self._read_recent_log()
-            on_debug("phase", {"phase": 3, "status": "exhausted", "errors": errors})
+            log_entry = None
            for p2_attempt in range(2):
                context = book_log
                if changes_block:
                    context += f"\n\n{changes_block}"
                text = self._call_llm([
                    {"role": "user", "content":
                        f"Given the session log so far, summarize the new story in one line. "
                        f"Focus on who was involved (character and NPC names):\n\n"
                        f"## Session Log\n{log_context}\n\n"
                        f"## New Story\n{context}"}
                ], label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug)
                if text and text.strip():
                    log_entry = text.strip().split("\n")[0][:300]
                    if on_debug:
                        on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
                    break
            if not log_entry:
                log_entry = book_log.split("\n")[0][:120]
                if on_debug:
                    on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry})
            # ── Phase 3: Extract state changes ────────────────────────────
            if on_action:
                on_action("Phase 3/3: extracting state changes")
            if on_debug:
                on_debug("phase", {"phase": 3, "name": "extract", "status": "start"})
            user_prompt = self._auto_prompt(book_log)
            ambience = None
            phase3_errors = []
            previous_attempt = None  # {output, feedback}
            phase3_ok = False
            for p3_attempt in range(5):
                current_char = self._read_file(CHAR_PATH) or "*No character.*"
                current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
                phase3_prompt = (
                    f"## Current Character\n{current_char}\n\n"
                    f"## Current World\n{current_world}\n\n"
                    f"## Story\n{book_log}\n\n"
                )
                if changes_block.strip():
                    phase3_prompt += (
                        f"## Changes to apply\n{changes_block}\n\n"
                        f"Convert the listed changes into tool calls:\n\n"
                    )
                else:
                    phase3_prompt += (
                        f"Read the story and compare with current state. Output tool calls for any changes:\n\n"
                    )
                phase3_prompt += (
                    f"Output ```tool blocks for changes only. Examples:\n\n"
                )
                if previous_attempt:
                    phase3_prompt += (
                        f"--- PREVIOUS ATTEMPT (had errors) ---\n"
                        f"{previous_attempt['output']}\n\n"
                        f"--- FEEDBACK ---\n"
                        f"{previous_attempt['feedback']}\n\n"
                        f"Fix the issues above. Output corrected tool calls only.\n\n"
                    )
                text = self._call_llm([
                    {"role": "user", "content": phase3_prompt +
                        f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"modify_traits\", \"args\": {{\"dex\": 15}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"add_to_inventory\", \"args\": {{\"item\": \"Silver key\"}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"remove_from_inventory\", \"args\": {{\"item\": \"Torches (10)\"}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"replace_gear\", \"args\": {{\"before\": \"Mace (1d6+1)\", \"after\": \"Mace (1d6+2, sharpened)\"}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"add_note\", \"args\": {{\"note\": \"Found a hidden passage under the temple\"}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"replace_note\", \"args\": {{\"before\": \"Old note text\", \"after\": \"New note text\"}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"world_update\", \"args\": {{\"content\": \"# The World\\n\\n...full new world state...\"}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
                        f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
                        f"Only output tools for things that actually changed. Omit unchanged fields."}
                ], label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug)
                if not text or not text.strip():
                    if on_debug:
                        on_debug("phase", {"phase": 3, "status": "empty", "attempt": p3_attempt + 1})
                    continue
                tool_calls = self._extract_tool_calls(
                    text, round_num=p3_attempt + 1, on_debug=on_debug
                )
                if on_debug and tool_calls:
                    names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
                    fin = any(tc.get("tool") == "finalize_turn" for tc in tool_calls)
                    on_debug("phase", {"phase": 3, "status": "tools_found", "tools": names, "has_finalize": fin})
                errors = []
                for tc in tool_calls:
                    name = tc.get("tool", "?")
                    args = tc.get("args", {})
                    if name == "finalize_turn":
                        if args.get("user_prompt"):
                            user_prompt = args["user_prompt"]
                        if args.get("ambience"):
                            ambience = args["ambience"]
                        continue
                    if on_action:
                        on_action(f"State: {self._describe_tool_action(name, args)}")
                    if on_debug:
                        on_debug("tool_call", {"round": p3_attempt + 1, "tool": name, "args": args})
                    if name == "player_roll" and on_player_roll:
                        dice = args.get("dice", "1d6")
                        reason = args.get("reason", "a check")
                        roll_val = on_player_roll(dice, reason)
                        result = f"Player rolled {dice} for '{reason}': {roll_val}"
                    else:
                        result = self._execute_tool(name, args)
                    if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
                        errors.append(f"{name}: {result}")
                    if on_debug:
                        on_debug("tool_result", {"round": p3_attempt + 1, "tool": name, "result": result})
                if not errors:
                    phase3_ok = True
                    debug_info = ""
                    if on_debug:
                        on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
                    break
                phase3_errors = errors
                debug_info = "; ".join(errors)
                if on_debug:
                    on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": p3_attempt + 1})
                # Build feedback for the LLM to fix on next attempt
                feedback_lines = ["The previous tool calls had errors:"]
                for e in errors:
                    feedback_lines.append(f"- {e}")
                feedback_lines.append("")
                feedback_lines.append("Fix ALL issues above. Use correct tool names, valid JSON, and reasonable values.")
                previous_attempt = {"output": text, "feedback": "\n".join(feedback_lines)}
            if phase3_ok:
                break  # All phases succeeded on this outer attempt
            # Phase 3 failed after 5 attempts — retry from Phase 1
            if on_debug:
                on_debug("phase", {"phase": 3, "status": "exhausted", "errors": phase3_errors})
                on_debug("phase", {"phase": 1, "status": "retry_after_phase3_failure", "outer_attempt": outer_attempt + 1})
            book_log = None  # Reset so Phase 1 runs again on next outer iteration
        if not book_log:
            return TurnResult(error="Generation failed after exhausting all retries")
        # ── Finalize ──────────────────────────────────────────────────────
        if on_action:
            on_action("Turn complete")
        if on_debug:
@ -1022,6 +1070,59 @@ class GameEngine:
        """Fallback player prompt."""
        return "**What do you do?**"
    def _validate_narrative(self, book_log: str, *, on_debug: callable = None) -> tuple[bool, str]:
        """Check if book_log is acceptable narrative. Returns (ok, reason)."""
        lines = book_log.strip().split("\n")
        if not lines:
            return False, "Empty narrative"
        # 1) Heuristic: high repetition count
        from collections import Counter
        common = Counter(lines).most_common(1)
        if common and common[0][1] >= 5:
            return False, f"Repetition: '{common[0][0][:60]}' ×{common[0][1]}"
        # 2) Heuristic: game mechanics bleedthrough
        mech_lines = [l for l in lines if re.match(
            r'^\*\*(?:Roll|Damage|Success|Failure|Check|Save|Hit|Miss|'
            r'Strenght|Dexterity|Willpower|STR|DEX|WIL|'
            r'(?:[A-Z][a-z]+(?: \(\w+\))?:))',
            l
        )]
        if mech_lines:
            ratio = len(mech_lines) / len(lines)
            if ratio > 0.3:
                return False, f"Game mechanics dominate ({len(mech_lines)}/{len(lines)} lines)"
        # 3) Heuristic: tool / json blocks leaked into narrative
        if re.search(r'```(?:tool|json)', book_log):
            return False, "Contains unprocessed tool blocks"
        # 4) Heuristic: under 50 characters of real prose
        prose = re.sub(r'[*_#>`~\-\d]', '', book_log).strip()
        if len(prose) < 50:
            return False, "Too short to be meaningful"
        # 5) LLM quality rating (only if heuristics pass)
        text = self._call_llm([
            {"role": "user", "content":
                f"Rate this RPG narrative quality 1-5.\n"
                f"1 = unreadable (spam, repetition, pure mechanics, garbled)\n"
                f"2 = poor (mostly mechanics, little story)\n"
                f"3 = acceptable (some narrative but rough)\n"
                f"4 = good (solid prose, minor issues)\n"
                f"5 = excellent (vivid, engaging)\n"
                f"Reply with ONLY a single digit 1-5.\n\n"
                f"{book_log[:600]}"}
        ], label="Narrative validation", max_tokens=2, on_debug=on_debug)
        if text and text.strip().isdigit():
            score = int(text.strip())
            if score < 3:
                return False, f"Quality score: {score}/5"
        return True, ""
    # ── Response Parsing ────────────────────────────────────────────────
    @staticmethod
--- a/tools/run.py
+++ b/tools/run.py
@ -841,8 +841,10 @@ class ChaosTUI(App):
            if status == "start":
                name = data.get("name", "")
                dice = data.get("dice")
                outer = data.get("outer_attempt")
                d = f"  dice={dice}" if dice else ""
-                self._append_debug(f"▸ Phase {p}: {name} {d}")
+                o = f" [attempt {outer}/3]" if outer else ""
                self._append_debug(f"▸ Phase {p}: {name}{o} {d}")
            elif status == "done":
                if p == 1:
                    self._append_debug(f"  ✔ prose: {data.get('chars', 0)} chars")
@ -870,6 +872,10 @@ class ChaosTUI(App):
                self._append_debug(f"  ✖ Phase 3 exhausted all retries — state changes may be missing!")
                for e in errs:
                    self._append_debug(f"    {e}")
            elif status == "retry_after_phase3_failure":
                self._append_debug(f"  ⟳ Phase 3 failed — retrying from Phase 1 (attempt {data.get('outer_attempt', '?')}/3)")
            elif status == "validation_failed":
                self._append_debug(f"  ✖ narrative rejected: {data.get('reason', '?')} (attempt {data.get('outer_attempt', '?')}/3)")
        elif event_type == "phase_done":
            self._append_debug(f"  ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars")
            if data.get("log_entry"):