diff --git a/tools/engine.py b/tools/engine.py index a813d0e..677d8f3 100644 --- a/tools/engine.py +++ b/tools/engine.py @@ -804,19 +804,25 @@ class GameEngine: elif last_prompt: self._append_llm_log(f"Resume from: {last_prompt[:120]}") - # ── Phase 1: Prose ──────────────────────────────────────────────── + # ── Outer loop: Phase 1 (prose) → Phase 2 (summarize) → Phase 3 (extract) ── import random die_roll = random.randint(1, 6) self._append_llm_log(f"Dice: {die_roll} (1d6)") - if on_action: - on_action(f"Phase 1/3: writing story (dice={die_roll})") - if on_debug: - on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll}) - book_log = None changes_block = "" - for attempt in range(3): + log_entry = None + user_prompt = self._auto_prompt("") + ambience = None + debug_info = "" + + for outer_attempt in range(3): + # ── Phase 1: Prose ──────────────────────────────────────────── + if on_action: + on_action(f"Phase 1/3: writing story (dice={die_roll})") + if on_debug: + on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll, "outer_attempt": outer_attempt + 1}) + system = PROSE_PROMPT.substitute( character=self._read_file(CHAR_PATH) or "*No character sheet.*", world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*", @@ -832,14 +838,14 @@ class GameEngine: text = self._call_llm([ {"role": "system", "content": system}, {"role": "user", "content": user}, - ], label=f"Prose attempt {attempt + 1}", max_tokens=1024, on_debug=on_debug) + ], label=f"Prose attempt {outer_attempt + 1}", max_tokens=1024, on_debug=on_debug) if not text or not text.strip(): if on_debug: - on_debug("phase", {"phase": 1, "status": "empty", "attempt": attempt + 1}) + on_debug("phase", {"phase": 1, "status": "empty", "attempt": outer_attempt + 1}) continue + raw = text.strip() - # Split narrative from ### Changes block changes_block = "" if "### Changes" in raw: parts = raw.split("### Changes", 1) @@ -850,137 +856,179 @@ class GameEngine: if on_debug: preview = book_log[:150].replace("\n", "\\n") on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview}) - break - if not book_log: - return TurnResult(error="Prose generation failed after 3 attempts") - - # ── Phase 2: Summarize ──────────────────────────────────────────── - if on_action: - on_action("Phase 2/3: summarizing story") - if on_debug: - on_debug("phase", {"phase": 2, "name": "summarize", "status": "start"}) - - log_context = self._read_recent_log() - log_entry = None - for attempt in range(2): - context = book_log - if changes_block: - context += f"\n\n{changes_block}" - text = self._call_llm([ - {"role": "user", "content": - f"Given the session log so far, summarize the new story in one line. " - f"Focus on who was involved (character and NPC names):\n\n" - f"## Session Log\n{log_context}\n\n" - f"## New Story\n{context}"} - ], label=f"Summarize attempt {attempt + 1}", on_debug=on_debug) - if text and text.strip(): - log_entry = text.strip().split("\n")[0][:120] - if on_debug: - on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry}) - break - - if not log_entry: - log_entry = book_log.split("\n")[0][:120] + # ── Validation ──────────────────────────────────────────────── if on_debug: - on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry}) - - # ── Phase 3: Extract state changes ──────────────────────────────── - if on_action: - on_action("Phase 3/3: extracting state changes") - if on_debug: - on_debug("phase", {"phase": 3, "name": "extract", "status": "start"}) - - user_prompt = self._auto_prompt(book_log) - ambience = None - debug_info = "" - current_char = self._read_file(CHAR_PATH) or "*No character.*" - current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*" - - for attempt in range(3): - phase3_prompt = ( - f"## Current Character\n{current_char}\n\n" - f"## Current World\n{current_world}\n\n" - f"## Story\n{book_log}\n\n" - ) - if changes_block.strip(): - phase3_prompt += ( - f"## Changes to apply\n{changes_block}\n\n" - f"Convert the listed changes into tool calls:\n\n" - ) - else: - phase3_prompt += ( - f"Read the story and compare with current state. Output tool calls for any changes:\n\n" - ) - phase3_prompt += ( - f"Output ```tool blocks for changes only. Examples:\n\n" - ) - text = self._call_llm([ - {"role": "user", "content": phase3_prompt + - f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n" - f"```tool\n{{\"tool\": \"modify_traits\", \"args\": {{\"dex\": 15}}}}\n```\n" - f"```tool\n{{\"tool\": \"add_to_inventory\", \"args\": {{\"item\": \"Silver key\"}}}}\n```\n" - f"```tool\n{{\"tool\": \"remove_from_inventory\", \"args\": {{\"item\": \"Torches (10)\"}}}}\n```\n" - f"```tool\n{{\"tool\": \"replace_gear\", \"args\": {{\"before\": \"Mace (1d6+1)\", \"after\": \"Mace (1d6+2, sharpened)\"}}}}\n```\n" - f"```tool\n{{\"tool\": \"add_note\", \"args\": {{\"note\": \"Found a hidden passage under the temple\"}}}}\n```\n" - f"```tool\n{{\"tool\": \"replace_note\", \"args\": {{\"before\": \"Old note text\", \"after\": \"New note text\"}}}}\n```\n" - f"```tool\n{{\"tool\": \"world_update\", \"args\": {{\"content\": \"# The World\\n\\n...full new world state...\"}}}}\n```\n" - f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n" - f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n" - f"Only output tools for things that actually changed. Omit unchanged fields."} - ], label=f"Extract attempt {attempt + 1}", on_debug=on_debug) - - if not text or not text.strip(): + on_debug("phase", {"phase": 1, "name": "validation", "status": "start"}) + valid, reason = self._validate_narrative(book_log, on_debug=on_debug) + if not valid: if on_debug: - on_debug("phase", {"phase": 3, "status": "empty", "attempt": attempt + 1}) + on_debug("phase", {"phase": 1, "status": "validation_failed", "reason": reason, "outer_attempt": outer_attempt + 1}) + book_log = None continue - tool_calls = self._extract_tool_calls( - text, round_num=attempt + 1, on_debug=on_debug - ) - if on_debug and tool_calls: - names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"] - fin = any(tc.get("tool") == "finalize_turn" for tc in tool_calls) - on_debug("phase", {"phase": 3, "status": "tools_found", "tools": names, "has_finalize": fin}) - - errors = [] - for tc in tool_calls: - name = tc.get("tool", "?") - args = tc.get("args", {}) - if name == "finalize_turn": - if args.get("user_prompt"): - user_prompt = args["user_prompt"] - if args.get("ambience"): - ambience = args["ambience"] - continue - if on_action: - on_action(f"State: {self._describe_tool_action(name, args)}") - if on_debug: - on_debug("tool_call", {"round": attempt + 1, "tool": name, "args": args}) - - if name == "player_roll" and on_player_roll: - dice = args.get("dice", "1d6") - reason = args.get("reason", "a check") - roll_val = on_player_roll(dice, reason) - result = f"Player rolled {dice} for '{reason}': {roll_val}" - else: - result = self._execute_tool(name, args) - - if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"): - errors.append(f"{name}: {result}") - if on_debug: - on_debug("tool_result", {"round": attempt + 1, "tool": name, "result": result}) - - if not errors: - if on_debug: - on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])}) - break - debug_info = "; ".join(errors) + # ── Phase 2: Summarize ──────────────────────────────────────── + if on_action: + on_action("Phase 2/3: summarizing story") if on_debug: - on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": attempt + 1}) + on_debug("phase", {"phase": 2, "name": "summarize", "status": "start"}) - if errors and on_debug: - on_debug("phase", {"phase": 3, "status": "exhausted", "errors": errors}) + log_context = self._read_recent_log() + log_entry = None + for p2_attempt in range(2): + context = book_log + if changes_block: + context += f"\n\n{changes_block}" + text = self._call_llm([ + {"role": "user", "content": + f"Given the session log so far, summarize the new story in one line. " + f"Focus on who was involved (character and NPC names):\n\n" + f"## Session Log\n{log_context}\n\n" + f"## New Story\n{context}"} + ], label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug) + if text and text.strip(): + log_entry = text.strip().split("\n")[0][:300] + if on_debug: + on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry}) + break + + if not log_entry: + log_entry = book_log.split("\n")[0][:120] + if on_debug: + on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry}) + + # ── Phase 3: Extract state changes ──────────────────────────── + if on_action: + on_action("Phase 3/3: extracting state changes") + if on_debug: + on_debug("phase", {"phase": 3, "name": "extract", "status": "start"}) + + user_prompt = self._auto_prompt(book_log) + ambience = None + phase3_errors = [] + + previous_attempt = None # {output, feedback} + phase3_ok = False + for p3_attempt in range(5): + current_char = self._read_file(CHAR_PATH) or "*No character.*" + current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*" + + phase3_prompt = ( + f"## Current Character\n{current_char}\n\n" + f"## Current World\n{current_world}\n\n" + f"## Story\n{book_log}\n\n" + ) + if changes_block.strip(): + phase3_prompt += ( + f"## Changes to apply\n{changes_block}\n\n" + f"Convert the listed changes into tool calls:\n\n" + ) + else: + phase3_prompt += ( + f"Read the story and compare with current state. Output tool calls for any changes:\n\n" + ) + phase3_prompt += ( + f"Output ```tool blocks for changes only. Examples:\n\n" + ) + + if previous_attempt: + phase3_prompt += ( + f"--- PREVIOUS ATTEMPT (had errors) ---\n" + f"{previous_attempt['output']}\n\n" + f"--- FEEDBACK ---\n" + f"{previous_attempt['feedback']}\n\n" + f"Fix the issues above. Output corrected tool calls only.\n\n" + ) + + text = self._call_llm([ + {"role": "user", "content": phase3_prompt + + f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n" + f"```tool\n{{\"tool\": \"modify_traits\", \"args\": {{\"dex\": 15}}}}\n```\n" + f"```tool\n{{\"tool\": \"add_to_inventory\", \"args\": {{\"item\": \"Silver key\"}}}}\n```\n" + f"```tool\n{{\"tool\": \"remove_from_inventory\", \"args\": {{\"item\": \"Torches (10)\"}}}}\n```\n" + f"```tool\n{{\"tool\": \"replace_gear\", \"args\": {{\"before\": \"Mace (1d6+1)\", \"after\": \"Mace (1d6+2, sharpened)\"}}}}\n```\n" + f"```tool\n{{\"tool\": \"add_note\", \"args\": {{\"note\": \"Found a hidden passage under the temple\"}}}}\n```\n" + f"```tool\n{{\"tool\": \"replace_note\", \"args\": {{\"before\": \"Old note text\", \"after\": \"New note text\"}}}}\n```\n" + f"```tool\n{{\"tool\": \"world_update\", \"args\": {{\"content\": \"# The World\\n\\n...full new world state...\"}}}}\n```\n" + f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n" + f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n" + f"Only output tools for things that actually changed. Omit unchanged fields."} + ], label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug) + + if not text or not text.strip(): + if on_debug: + on_debug("phase", {"phase": 3, "status": "empty", "attempt": p3_attempt + 1}) + continue + + tool_calls = self._extract_tool_calls( + text, round_num=p3_attempt + 1, on_debug=on_debug + ) + if on_debug and tool_calls: + names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"] + fin = any(tc.get("tool") == "finalize_turn" for tc in tool_calls) + on_debug("phase", {"phase": 3, "status": "tools_found", "tools": names, "has_finalize": fin}) + + errors = [] + for tc in tool_calls: + name = tc.get("tool", "?") + args = tc.get("args", {}) + if name == "finalize_turn": + if args.get("user_prompt"): + user_prompt = args["user_prompt"] + if args.get("ambience"): + ambience = args["ambience"] + continue + if on_action: + on_action(f"State: {self._describe_tool_action(name, args)}") + if on_debug: + on_debug("tool_call", {"round": p3_attempt + 1, "tool": name, "args": args}) + + if name == "player_roll" and on_player_roll: + dice = args.get("dice", "1d6") + reason = args.get("reason", "a check") + roll_val = on_player_roll(dice, reason) + result = f"Player rolled {dice} for '{reason}': {roll_val}" + else: + result = self._execute_tool(name, args) + + if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"): + errors.append(f"{name}: {result}") + if on_debug: + on_debug("tool_result", {"round": p3_attempt + 1, "tool": name, "result": result}) + + if not errors: + phase3_ok = True + debug_info = "" + if on_debug: + on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])}) + break + + phase3_errors = errors + debug_info = "; ".join(errors) + if on_debug: + on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": p3_attempt + 1}) + + # Build feedback for the LLM to fix on next attempt + feedback_lines = ["The previous tool calls had errors:"] + for e in errors: + feedback_lines.append(f"- {e}") + feedback_lines.append("") + feedback_lines.append("Fix ALL issues above. Use correct tool names, valid JSON, and reasonable values.") + previous_attempt = {"output": text, "feedback": "\n".join(feedback_lines)} + + if phase3_ok: + break # All phases succeeded on this outer attempt + + # Phase 3 failed after 5 attempts — retry from Phase 1 + if on_debug: + on_debug("phase", {"phase": 3, "status": "exhausted", "errors": phase3_errors}) + on_debug("phase", {"phase": 1, "status": "retry_after_phase3_failure", "outer_attempt": outer_attempt + 1}) + book_log = None # Reset so Phase 1 runs again on next outer iteration + + if not book_log: + return TurnResult(error="Generation failed after exhausting all retries") + + # ── Finalize ────────────────────────────────────────────────────── if on_action: on_action("Turn complete") if on_debug: @@ -1022,6 +1070,59 @@ class GameEngine: """Fallback player prompt.""" return "**What do you do?**" + def _validate_narrative(self, book_log: str, *, on_debug: callable = None) -> tuple[bool, str]: + """Check if book_log is acceptable narrative. Returns (ok, reason).""" + lines = book_log.strip().split("\n") + if not lines: + return False, "Empty narrative" + + # 1) Heuristic: high repetition count + from collections import Counter + common = Counter(lines).most_common(1) + if common and common[0][1] >= 5: + return False, f"Repetition: '{common[0][0][:60]}' ×{common[0][1]}" + + # 2) Heuristic: game mechanics bleedthrough + mech_lines = [l for l in lines if re.match( + r'^\*\*(?:Roll|Damage|Success|Failure|Check|Save|Hit|Miss|' + r'Strenght|Dexterity|Willpower|STR|DEX|WIL|' + r'(?:[A-Z][a-z]+(?: \(\w+\))?:))', + l + )] + if mech_lines: + ratio = len(mech_lines) / len(lines) + if ratio > 0.3: + return False, f"Game mechanics dominate ({len(mech_lines)}/{len(lines)} lines)" + + # 3) Heuristic: tool / json blocks leaked into narrative + if re.search(r'```(?:tool|json)', book_log): + return False, "Contains unprocessed tool blocks" + + # 4) Heuristic: under 50 characters of real prose + prose = re.sub(r'[*_#>`~\-\d]', '', book_log).strip() + if len(prose) < 50: + return False, "Too short to be meaningful" + + # 5) LLM quality rating (only if heuristics pass) + text = self._call_llm([ + {"role": "user", "content": + f"Rate this RPG narrative quality 1-5.\n" + f"1 = unreadable (spam, repetition, pure mechanics, garbled)\n" + f"2 = poor (mostly mechanics, little story)\n" + f"3 = acceptable (some narrative but rough)\n" + f"4 = good (solid prose, minor issues)\n" + f"5 = excellent (vivid, engaging)\n" + f"Reply with ONLY a single digit 1-5.\n\n" + f"{book_log[:600]}"} + ], label="Narrative validation", max_tokens=2, on_debug=on_debug) + + if text and text.strip().isdigit(): + score = int(text.strip()) + if score < 3: + return False, f"Quality score: {score}/5" + + return True, "" + # ── Response Parsing ──────────────────────────────────────────────── @staticmethod diff --git a/tools/run.py b/tools/run.py index 045cd6b..ff290b6 100755 --- a/tools/run.py +++ b/tools/run.py @@ -841,8 +841,10 @@ class ChaosTUI(App): if status == "start": name = data.get("name", "") dice = data.get("dice") + outer = data.get("outer_attempt") d = f" dice={dice}" if dice else "" - self._append_debug(f"▸ Phase {p}: {name} {d}") + o = f" [attempt {outer}/3]" if outer else "" + self._append_debug(f"▸ Phase {p}: {name}{o} {d}") elif status == "done": if p == 1: self._append_debug(f" ✔ prose: {data.get('chars', 0)} chars") @@ -870,6 +872,10 @@ class ChaosTUI(App): self._append_debug(f" ✖ Phase 3 exhausted all retries — state changes may be missing!") for e in errs: self._append_debug(f" {e}") + elif status == "retry_after_phase3_failure": + self._append_debug(f" ⟳ Phase 3 failed — retrying from Phase 1 (attempt {data.get('outer_attempt', '?')}/3)") + elif status == "validation_failed": + self._append_debug(f" ✖ narrative rejected: {data.get('reason', '?')} (attempt {data.get('outer_attempt', '?')}/3)") elif event_type == "phase_done": self._append_debug(f" ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars") if data.get("log_entry"):