LLM retries and validations

This commit is contained in:
Dejvino 2026-06-28 18:08:58 +02:00
parent a7e6d5540f
commit 5b93040f73
2 changed files with 242 additions and 135 deletions

View File

@ -804,19 +804,25 @@ class GameEngine:
elif last_prompt: elif last_prompt:
self._append_llm_log(f"Resume from: {last_prompt[:120]}") self._append_llm_log(f"Resume from: {last_prompt[:120]}")
# ── Phase 1: Prose ──────────────────────────────────────────────── # ── Outer loop: Phase 1 (prose) → Phase 2 (summarize) → Phase 3 (extract) ──
import random import random
die_roll = random.randint(1, 6) die_roll = random.randint(1, 6)
self._append_llm_log(f"Dice: {die_roll} (1d6)") self._append_llm_log(f"Dice: {die_roll} (1d6)")
if on_action:
on_action(f"Phase 1/3: writing story (dice={die_roll})")
if on_debug:
on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll})
book_log = None book_log = None
changes_block = "" changes_block = ""
for attempt in range(3): log_entry = None
user_prompt = self._auto_prompt("")
ambience = None
debug_info = ""
for outer_attempt in range(3):
# ── Phase 1: Prose ────────────────────────────────────────────
if on_action:
on_action(f"Phase 1/3: writing story (dice={die_roll})")
if on_debug:
on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll, "outer_attempt": outer_attempt + 1})
system = PROSE_PROMPT.substitute( system = PROSE_PROMPT.substitute(
character=self._read_file(CHAR_PATH) or "*No character sheet.*", character=self._read_file(CHAR_PATH) or "*No character sheet.*",
world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*", world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*",
@ -832,14 +838,14 @@ class GameEngine:
text = self._call_llm([ text = self._call_llm([
{"role": "system", "content": system}, {"role": "system", "content": system},
{"role": "user", "content": user}, {"role": "user", "content": user},
], label=f"Prose attempt {attempt + 1}", max_tokens=1024, on_debug=on_debug) ], label=f"Prose attempt {outer_attempt + 1}", max_tokens=1024, on_debug=on_debug)
if not text or not text.strip(): if not text or not text.strip():
if on_debug: if on_debug:
on_debug("phase", {"phase": 1, "status": "empty", "attempt": attempt + 1}) on_debug("phase", {"phase": 1, "status": "empty", "attempt": outer_attempt + 1})
continue continue
raw = text.strip() raw = text.strip()
# Split narrative from ### Changes block
changes_block = "" changes_block = ""
if "### Changes" in raw: if "### Changes" in raw:
parts = raw.split("### Changes", 1) parts = raw.split("### Changes", 1)
@ -850,137 +856,179 @@ class GameEngine:
if on_debug: if on_debug:
preview = book_log[:150].replace("\n", "\\n") preview = book_log[:150].replace("\n", "\\n")
on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview}) on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview})
break
if not book_log: # ── Validation ────────────────────────────────────────────────
return TurnResult(error="Prose generation failed after 3 attempts")
# ── Phase 2: Summarize ────────────────────────────────────────────
if on_action:
on_action("Phase 2/3: summarizing story")
if on_debug:
on_debug("phase", {"phase": 2, "name": "summarize", "status": "start"})
log_context = self._read_recent_log()
log_entry = None
for attempt in range(2):
context = book_log
if changes_block:
context += f"\n\n{changes_block}"
text = self._call_llm([
{"role": "user", "content":
f"Given the session log so far, summarize the new story in one line. "
f"Focus on who was involved (character and NPC names):\n\n"
f"## Session Log\n{log_context}\n\n"
f"## New Story\n{context}"}
], label=f"Summarize attempt {attempt + 1}", on_debug=on_debug)
if text and text.strip():
log_entry = text.strip().split("\n")[0][:120]
if on_debug:
on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
break
if not log_entry:
log_entry = book_log.split("\n")[0][:120]
if on_debug: if on_debug:
on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry}) on_debug("phase", {"phase": 1, "name": "validation", "status": "start"})
valid, reason = self._validate_narrative(book_log, on_debug=on_debug)
# ── Phase 3: Extract state changes ──────────────────────────────── if not valid:
if on_action:
on_action("Phase 3/3: extracting state changes")
if on_debug:
on_debug("phase", {"phase": 3, "name": "extract", "status": "start"})
user_prompt = self._auto_prompt(book_log)
ambience = None
debug_info = ""
current_char = self._read_file(CHAR_PATH) or "*No character.*"
current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
for attempt in range(3):
phase3_prompt = (
f"## Current Character\n{current_char}\n\n"
f"## Current World\n{current_world}\n\n"
f"## Story\n{book_log}\n\n"
)
if changes_block.strip():
phase3_prompt += (
f"## Changes to apply\n{changes_block}\n\n"
f"Convert the listed changes into tool calls:\n\n"
)
else:
phase3_prompt += (
f"Read the story and compare with current state. Output tool calls for any changes:\n\n"
)
phase3_prompt += (
f"Output ```tool blocks for changes only. Examples:\n\n"
)
text = self._call_llm([
{"role": "user", "content": phase3_prompt +
f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
f"```tool\n{{\"tool\": \"modify_traits\", \"args\": {{\"dex\": 15}}}}\n```\n"
f"```tool\n{{\"tool\": \"add_to_inventory\", \"args\": {{\"item\": \"Silver key\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"remove_from_inventory\", \"args\": {{\"item\": \"Torches (10)\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"replace_gear\", \"args\": {{\"before\": \"Mace (1d6+1)\", \"after\": \"Mace (1d6+2, sharpened)\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"add_note\", \"args\": {{\"note\": \"Found a hidden passage under the temple\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"replace_note\", \"args\": {{\"before\": \"Old note text\", \"after\": \"New note text\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"world_update\", \"args\": {{\"content\": \"# The World\\n\\n...full new world state...\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
f"Only output tools for things that actually changed. Omit unchanged fields."}
], label=f"Extract attempt {attempt + 1}", on_debug=on_debug)
if not text or not text.strip():
if on_debug: if on_debug:
on_debug("phase", {"phase": 3, "status": "empty", "attempt": attempt + 1}) on_debug("phase", {"phase": 1, "status": "validation_failed", "reason": reason, "outer_attempt": outer_attempt + 1})
book_log = None
continue continue
tool_calls = self._extract_tool_calls( # ── Phase 2: Summarize ────────────────────────────────────────
text, round_num=attempt + 1, on_debug=on_debug if on_action:
) on_action("Phase 2/3: summarizing story")
if on_debug and tool_calls:
names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
fin = any(tc.get("tool") == "finalize_turn" for tc in tool_calls)
on_debug("phase", {"phase": 3, "status": "tools_found", "tools": names, "has_finalize": fin})
errors = []
for tc in tool_calls:
name = tc.get("tool", "?")
args = tc.get("args", {})
if name == "finalize_turn":
if args.get("user_prompt"):
user_prompt = args["user_prompt"]
if args.get("ambience"):
ambience = args["ambience"]
continue
if on_action:
on_action(f"State: {self._describe_tool_action(name, args)}")
if on_debug:
on_debug("tool_call", {"round": attempt + 1, "tool": name, "args": args})
if name == "player_roll" and on_player_roll:
dice = args.get("dice", "1d6")
reason = args.get("reason", "a check")
roll_val = on_player_roll(dice, reason)
result = f"Player rolled {dice} for '{reason}': {roll_val}"
else:
result = self._execute_tool(name, args)
if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
errors.append(f"{name}: {result}")
if on_debug:
on_debug("tool_result", {"round": attempt + 1, "tool": name, "result": result})
if not errors:
if on_debug:
on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
break
debug_info = "; ".join(errors)
if on_debug: if on_debug:
on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": attempt + 1}) on_debug("phase", {"phase": 2, "name": "summarize", "status": "start"})
if errors and on_debug: log_context = self._read_recent_log()
on_debug("phase", {"phase": 3, "status": "exhausted", "errors": errors}) log_entry = None
for p2_attempt in range(2):
context = book_log
if changes_block:
context += f"\n\n{changes_block}"
text = self._call_llm([
{"role": "user", "content":
f"Given the session log so far, summarize the new story in one line. "
f"Focus on who was involved (character and NPC names):\n\n"
f"## Session Log\n{log_context}\n\n"
f"## New Story\n{context}"}
], label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug)
if text and text.strip():
log_entry = text.strip().split("\n")[0][:300]
if on_debug:
on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
break
if not log_entry:
log_entry = book_log.split("\n")[0][:120]
if on_debug:
on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry})
# ── Phase 3: Extract state changes ────────────────────────────
if on_action:
on_action("Phase 3/3: extracting state changes")
if on_debug:
on_debug("phase", {"phase": 3, "name": "extract", "status": "start"})
user_prompt = self._auto_prompt(book_log)
ambience = None
phase3_errors = []
previous_attempt = None # {output, feedback}
phase3_ok = False
for p3_attempt in range(5):
current_char = self._read_file(CHAR_PATH) or "*No character.*"
current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
phase3_prompt = (
f"## Current Character\n{current_char}\n\n"
f"## Current World\n{current_world}\n\n"
f"## Story\n{book_log}\n\n"
)
if changes_block.strip():
phase3_prompt += (
f"## Changes to apply\n{changes_block}\n\n"
f"Convert the listed changes into tool calls:\n\n"
)
else:
phase3_prompt += (
f"Read the story and compare with current state. Output tool calls for any changes:\n\n"
)
phase3_prompt += (
f"Output ```tool blocks for changes only. Examples:\n\n"
)
if previous_attempt:
phase3_prompt += (
f"--- PREVIOUS ATTEMPT (had errors) ---\n"
f"{previous_attempt['output']}\n\n"
f"--- FEEDBACK ---\n"
f"{previous_attempt['feedback']}\n\n"
f"Fix the issues above. Output corrected tool calls only.\n\n"
)
text = self._call_llm([
{"role": "user", "content": phase3_prompt +
f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
f"```tool\n{{\"tool\": \"modify_traits\", \"args\": {{\"dex\": 15}}}}\n```\n"
f"```tool\n{{\"tool\": \"add_to_inventory\", \"args\": {{\"item\": \"Silver key\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"remove_from_inventory\", \"args\": {{\"item\": \"Torches (10)\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"replace_gear\", \"args\": {{\"before\": \"Mace (1d6+1)\", \"after\": \"Mace (1d6+2, sharpened)\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"add_note\", \"args\": {{\"note\": \"Found a hidden passage under the temple\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"replace_note\", \"args\": {{\"before\": \"Old note text\", \"after\": \"New note text\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"world_update\", \"args\": {{\"content\": \"# The World\\n\\n...full new world state...\"}}}}\n```\n"
f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
f"Only output tools for things that actually changed. Omit unchanged fields."}
], label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug)
if not text or not text.strip():
if on_debug:
on_debug("phase", {"phase": 3, "status": "empty", "attempt": p3_attempt + 1})
continue
tool_calls = self._extract_tool_calls(
text, round_num=p3_attempt + 1, on_debug=on_debug
)
if on_debug and tool_calls:
names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
fin = any(tc.get("tool") == "finalize_turn" for tc in tool_calls)
on_debug("phase", {"phase": 3, "status": "tools_found", "tools": names, "has_finalize": fin})
errors = []
for tc in tool_calls:
name = tc.get("tool", "?")
args = tc.get("args", {})
if name == "finalize_turn":
if args.get("user_prompt"):
user_prompt = args["user_prompt"]
if args.get("ambience"):
ambience = args["ambience"]
continue
if on_action:
on_action(f"State: {self._describe_tool_action(name, args)}")
if on_debug:
on_debug("tool_call", {"round": p3_attempt + 1, "tool": name, "args": args})
if name == "player_roll" and on_player_roll:
dice = args.get("dice", "1d6")
reason = args.get("reason", "a check")
roll_val = on_player_roll(dice, reason)
result = f"Player rolled {dice} for '{reason}': {roll_val}"
else:
result = self._execute_tool(name, args)
if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
errors.append(f"{name}: {result}")
if on_debug:
on_debug("tool_result", {"round": p3_attempt + 1, "tool": name, "result": result})
if not errors:
phase3_ok = True
debug_info = ""
if on_debug:
on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
break
phase3_errors = errors
debug_info = "; ".join(errors)
if on_debug:
on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": p3_attempt + 1})
# Build feedback for the LLM to fix on next attempt
feedback_lines = ["The previous tool calls had errors:"]
for e in errors:
feedback_lines.append(f"- {e}")
feedback_lines.append("")
feedback_lines.append("Fix ALL issues above. Use correct tool names, valid JSON, and reasonable values.")
previous_attempt = {"output": text, "feedback": "\n".join(feedback_lines)}
if phase3_ok:
break # All phases succeeded on this outer attempt
# Phase 3 failed after 5 attempts — retry from Phase 1
if on_debug:
on_debug("phase", {"phase": 3, "status": "exhausted", "errors": phase3_errors})
on_debug("phase", {"phase": 1, "status": "retry_after_phase3_failure", "outer_attempt": outer_attempt + 1})
book_log = None # Reset so Phase 1 runs again on next outer iteration
if not book_log:
return TurnResult(error="Generation failed after exhausting all retries")
# ── Finalize ──────────────────────────────────────────────────────
if on_action: if on_action:
on_action("Turn complete") on_action("Turn complete")
if on_debug: if on_debug:
@ -1022,6 +1070,59 @@ class GameEngine:
"""Fallback player prompt.""" """Fallback player prompt."""
return "**What do you do?**" return "**What do you do?**"
def _validate_narrative(self, book_log: str, *, on_debug: callable = None) -> tuple[bool, str]:
"""Check if book_log is acceptable narrative. Returns (ok, reason)."""
lines = book_log.strip().split("\n")
if not lines:
return False, "Empty narrative"
# 1) Heuristic: high repetition count
from collections import Counter
common = Counter(lines).most_common(1)
if common and common[0][1] >= 5:
return False, f"Repetition: '{common[0][0][:60]}' ×{common[0][1]}"
# 2) Heuristic: game mechanics bleedthrough
mech_lines = [l for l in lines if re.match(
r'^\*\*(?:Roll|Damage|Success|Failure|Check|Save|Hit|Miss|'
r'Strenght|Dexterity|Willpower|STR|DEX|WIL|'
r'(?:[A-Z][a-z]+(?: \(\w+\))?:))',
l
)]
if mech_lines:
ratio = len(mech_lines) / len(lines)
if ratio > 0.3:
return False, f"Game mechanics dominate ({len(mech_lines)}/{len(lines)} lines)"
# 3) Heuristic: tool / json blocks leaked into narrative
if re.search(r'```(?:tool|json)', book_log):
return False, "Contains unprocessed tool blocks"
# 4) Heuristic: under 50 characters of real prose
prose = re.sub(r'[*_#>`~\-\d]', '', book_log).strip()
if len(prose) < 50:
return False, "Too short to be meaningful"
# 5) LLM quality rating (only if heuristics pass)
text = self._call_llm([
{"role": "user", "content":
f"Rate this RPG narrative quality 1-5.\n"
f"1 = unreadable (spam, repetition, pure mechanics, garbled)\n"
f"2 = poor (mostly mechanics, little story)\n"
f"3 = acceptable (some narrative but rough)\n"
f"4 = good (solid prose, minor issues)\n"
f"5 = excellent (vivid, engaging)\n"
f"Reply with ONLY a single digit 1-5.\n\n"
f"{book_log[:600]}"}
], label="Narrative validation", max_tokens=2, on_debug=on_debug)
if text and text.strip().isdigit():
score = int(text.strip())
if score < 3:
return False, f"Quality score: {score}/5"
return True, ""
# ── Response Parsing ──────────────────────────────────────────────── # ── Response Parsing ────────────────────────────────────────────────
@staticmethod @staticmethod

View File

@ -841,8 +841,10 @@ class ChaosTUI(App):
if status == "start": if status == "start":
name = data.get("name", "") name = data.get("name", "")
dice = data.get("dice") dice = data.get("dice")
outer = data.get("outer_attempt")
d = f" dice={dice}" if dice else "" d = f" dice={dice}" if dice else ""
self._append_debug(f"▸ Phase {p}: {name} {d}") o = f" [attempt {outer}/3]" if outer else ""
self._append_debug(f"▸ Phase {p}: {name}{o} {d}")
elif status == "done": elif status == "done":
if p == 1: if p == 1:
self._append_debug(f" ✔ prose: {data.get('chars', 0)} chars") self._append_debug(f" ✔ prose: {data.get('chars', 0)} chars")
@ -870,6 +872,10 @@ class ChaosTUI(App):
self._append_debug(f" ✖ Phase 3 exhausted all retries — state changes may be missing!") self._append_debug(f" ✖ Phase 3 exhausted all retries — state changes may be missing!")
for e in errs: for e in errs:
self._append_debug(f" {e}") self._append_debug(f" {e}")
elif status == "retry_after_phase3_failure":
self._append_debug(f" ⟳ Phase 3 failed — retrying from Phase 1 (attempt {data.get('outer_attempt', '?')}/3)")
elif status == "validation_failed":
self._append_debug(f" ✖ narrative rejected: {data.get('reason', '?')} (attempt {data.get('outer_attempt', '?')}/3)")
elif event_type == "phase_done": elif event_type == "phase_done":
self._append_debug(f" ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars") self._append_debug(f" ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars")
if data.get("log_entry"): if data.get("log_entry"):