LLM retries and validations
This commit is contained in:
parent
a7e6d5540f
commit
5b93040f73
153
tools/engine.py
153
tools/engine.py
@ -804,19 +804,25 @@ class GameEngine:
|
||||
elif last_prompt:
|
||||
self._append_llm_log(f"Resume from: {last_prompt[:120]}")
|
||||
|
||||
# ── Phase 1: Prose ────────────────────────────────────────────────
|
||||
# ── Outer loop: Phase 1 (prose) → Phase 2 (summarize) → Phase 3 (extract) ──
|
||||
import random
|
||||
die_roll = random.randint(1, 6)
|
||||
self._append_llm_log(f"Dice: {die_roll} (1d6)")
|
||||
|
||||
book_log = None
|
||||
changes_block = ""
|
||||
log_entry = None
|
||||
user_prompt = self._auto_prompt("")
|
||||
ambience = None
|
||||
debug_info = ""
|
||||
|
||||
for outer_attempt in range(3):
|
||||
# ── Phase 1: Prose ────────────────────────────────────────────
|
||||
if on_action:
|
||||
on_action(f"Phase 1/3: writing story (dice={die_roll})")
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll})
|
||||
on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll, "outer_attempt": outer_attempt + 1})
|
||||
|
||||
book_log = None
|
||||
changes_block = ""
|
||||
for attempt in range(3):
|
||||
system = PROSE_PROMPT.substitute(
|
||||
character=self._read_file(CHAR_PATH) or "*No character sheet.*",
|
||||
world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*",
|
||||
@ -832,14 +838,14 @@ class GameEngine:
|
||||
text = self._call_llm([
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": user},
|
||||
], label=f"Prose attempt {attempt + 1}", max_tokens=1024, on_debug=on_debug)
|
||||
], label=f"Prose attempt {outer_attempt + 1}", max_tokens=1024, on_debug=on_debug)
|
||||
|
||||
if not text or not text.strip():
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 1, "status": "empty", "attempt": attempt + 1})
|
||||
on_debug("phase", {"phase": 1, "status": "empty", "attempt": outer_attempt + 1})
|
||||
continue
|
||||
|
||||
raw = text.strip()
|
||||
# Split narrative from ### Changes block
|
||||
changes_block = ""
|
||||
if "### Changes" in raw:
|
||||
parts = raw.split("### Changes", 1)
|
||||
@ -850,12 +856,18 @@ class GameEngine:
|
||||
if on_debug:
|
||||
preview = book_log[:150].replace("\n", "\\n")
|
||||
on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview})
|
||||
break
|
||||
|
||||
if not book_log:
|
||||
return TurnResult(error="Prose generation failed after 3 attempts")
|
||||
# ── Validation ────────────────────────────────────────────────
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 1, "name": "validation", "status": "start"})
|
||||
valid, reason = self._validate_narrative(book_log, on_debug=on_debug)
|
||||
if not valid:
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 1, "status": "validation_failed", "reason": reason, "outer_attempt": outer_attempt + 1})
|
||||
book_log = None
|
||||
continue
|
||||
|
||||
# ── Phase 2: Summarize ────────────────────────────────────────────
|
||||
# ── Phase 2: Summarize ────────────────────────────────────────
|
||||
if on_action:
|
||||
on_action("Phase 2/3: summarizing story")
|
||||
if on_debug:
|
||||
@ -863,7 +875,7 @@ class GameEngine:
|
||||
|
||||
log_context = self._read_recent_log()
|
||||
log_entry = None
|
||||
for attempt in range(2):
|
||||
for p2_attempt in range(2):
|
||||
context = book_log
|
||||
if changes_block:
|
||||
context += f"\n\n{changes_block}"
|
||||
@ -873,9 +885,9 @@ class GameEngine:
|
||||
f"Focus on who was involved (character and NPC names):\n\n"
|
||||
f"## Session Log\n{log_context}\n\n"
|
||||
f"## New Story\n{context}"}
|
||||
], label=f"Summarize attempt {attempt + 1}", on_debug=on_debug)
|
||||
], label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug)
|
||||
if text and text.strip():
|
||||
log_entry = text.strip().split("\n")[0][:120]
|
||||
log_entry = text.strip().split("\n")[0][:300]
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
|
||||
break
|
||||
@ -885,7 +897,7 @@ class GameEngine:
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry})
|
||||
|
||||
# ── Phase 3: Extract state changes ────────────────────────────────
|
||||
# ── Phase 3: Extract state changes ────────────────────────────
|
||||
if on_action:
|
||||
on_action("Phase 3/3: extracting state changes")
|
||||
if on_debug:
|
||||
@ -893,11 +905,14 @@ class GameEngine:
|
||||
|
||||
user_prompt = self._auto_prompt(book_log)
|
||||
ambience = None
|
||||
debug_info = ""
|
||||
phase3_errors = []
|
||||
|
||||
previous_attempt = None # {output, feedback}
|
||||
phase3_ok = False
|
||||
for p3_attempt in range(5):
|
||||
current_char = self._read_file(CHAR_PATH) or "*No character.*"
|
||||
current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
|
||||
|
||||
for attempt in range(3):
|
||||
phase3_prompt = (
|
||||
f"## Current Character\n{current_char}\n\n"
|
||||
f"## Current World\n{current_world}\n\n"
|
||||
@ -915,6 +930,16 @@ class GameEngine:
|
||||
phase3_prompt += (
|
||||
f"Output ```tool blocks for changes only. Examples:\n\n"
|
||||
)
|
||||
|
||||
if previous_attempt:
|
||||
phase3_prompt += (
|
||||
f"--- PREVIOUS ATTEMPT (had errors) ---\n"
|
||||
f"{previous_attempt['output']}\n\n"
|
||||
f"--- FEEDBACK ---\n"
|
||||
f"{previous_attempt['feedback']}\n\n"
|
||||
f"Fix the issues above. Output corrected tool calls only.\n\n"
|
||||
)
|
||||
|
||||
text = self._call_llm([
|
||||
{"role": "user", "content": phase3_prompt +
|
||||
f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
|
||||
@ -928,15 +953,15 @@ class GameEngine:
|
||||
f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
|
||||
f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
|
||||
f"Only output tools for things that actually changed. Omit unchanged fields."}
|
||||
], label=f"Extract attempt {attempt + 1}", on_debug=on_debug)
|
||||
], label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug)
|
||||
|
||||
if not text or not text.strip():
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 3, "status": "empty", "attempt": attempt + 1})
|
||||
on_debug("phase", {"phase": 3, "status": "empty", "attempt": p3_attempt + 1})
|
||||
continue
|
||||
|
||||
tool_calls = self._extract_tool_calls(
|
||||
text, round_num=attempt + 1, on_debug=on_debug
|
||||
text, round_num=p3_attempt + 1, on_debug=on_debug
|
||||
)
|
||||
if on_debug and tool_calls:
|
||||
names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
|
||||
@ -956,7 +981,7 @@ class GameEngine:
|
||||
if on_action:
|
||||
on_action(f"State: {self._describe_tool_action(name, args)}")
|
||||
if on_debug:
|
||||
on_debug("tool_call", {"round": attempt + 1, "tool": name, "args": args})
|
||||
on_debug("tool_call", {"round": p3_attempt + 1, "tool": name, "args": args})
|
||||
|
||||
if name == "player_roll" and on_player_roll:
|
||||
dice = args.get("dice", "1d6")
|
||||
@ -969,18 +994,41 @@ class GameEngine:
|
||||
if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
|
||||
errors.append(f"{name}: {result}")
|
||||
if on_debug:
|
||||
on_debug("tool_result", {"round": attempt + 1, "tool": name, "result": result})
|
||||
on_debug("tool_result", {"round": p3_attempt + 1, "tool": name, "result": result})
|
||||
|
||||
if not errors:
|
||||
phase3_ok = True
|
||||
debug_info = ""
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
|
||||
break
|
||||
|
||||
phase3_errors = errors
|
||||
debug_info = "; ".join(errors)
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": attempt + 1})
|
||||
on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": p3_attempt + 1})
|
||||
|
||||
if errors and on_debug:
|
||||
on_debug("phase", {"phase": 3, "status": "exhausted", "errors": errors})
|
||||
# Build feedback for the LLM to fix on next attempt
|
||||
feedback_lines = ["The previous tool calls had errors:"]
|
||||
for e in errors:
|
||||
feedback_lines.append(f"- {e}")
|
||||
feedback_lines.append("")
|
||||
feedback_lines.append("Fix ALL issues above. Use correct tool names, valid JSON, and reasonable values.")
|
||||
previous_attempt = {"output": text, "feedback": "\n".join(feedback_lines)}
|
||||
|
||||
if phase3_ok:
|
||||
break # All phases succeeded on this outer attempt
|
||||
|
||||
# Phase 3 failed after 5 attempts — retry from Phase 1
|
||||
if on_debug:
|
||||
on_debug("phase", {"phase": 3, "status": "exhausted", "errors": phase3_errors})
|
||||
on_debug("phase", {"phase": 1, "status": "retry_after_phase3_failure", "outer_attempt": outer_attempt + 1})
|
||||
book_log = None # Reset so Phase 1 runs again on next outer iteration
|
||||
|
||||
if not book_log:
|
||||
return TurnResult(error="Generation failed after exhausting all retries")
|
||||
|
||||
# ── Finalize ──────────────────────────────────────────────────────
|
||||
if on_action:
|
||||
on_action("Turn complete")
|
||||
if on_debug:
|
||||
@ -1022,6 +1070,59 @@ class GameEngine:
|
||||
"""Fallback player prompt."""
|
||||
return "**What do you do?**"
|
||||
|
||||
def _validate_narrative(self, book_log: str, *, on_debug: callable = None) -> tuple[bool, str]:
|
||||
"""Check if book_log is acceptable narrative. Returns (ok, reason)."""
|
||||
lines = book_log.strip().split("\n")
|
||||
if not lines:
|
||||
return False, "Empty narrative"
|
||||
|
||||
# 1) Heuristic: high repetition count
|
||||
from collections import Counter
|
||||
common = Counter(lines).most_common(1)
|
||||
if common and common[0][1] >= 5:
|
||||
return False, f"Repetition: '{common[0][0][:60]}' ×{common[0][1]}"
|
||||
|
||||
# 2) Heuristic: game mechanics bleedthrough
|
||||
mech_lines = [l for l in lines if re.match(
|
||||
r'^\*\*(?:Roll|Damage|Success|Failure|Check|Save|Hit|Miss|'
|
||||
r'Strenght|Dexterity|Willpower|STR|DEX|WIL|'
|
||||
r'(?:[A-Z][a-z]+(?: \(\w+\))?:))',
|
||||
l
|
||||
)]
|
||||
if mech_lines:
|
||||
ratio = len(mech_lines) / len(lines)
|
||||
if ratio > 0.3:
|
||||
return False, f"Game mechanics dominate ({len(mech_lines)}/{len(lines)} lines)"
|
||||
|
||||
# 3) Heuristic: tool / json blocks leaked into narrative
|
||||
if re.search(r'```(?:tool|json)', book_log):
|
||||
return False, "Contains unprocessed tool blocks"
|
||||
|
||||
# 4) Heuristic: under 50 characters of real prose
|
||||
prose = re.sub(r'[*_#>`~\-\d]', '', book_log).strip()
|
||||
if len(prose) < 50:
|
||||
return False, "Too short to be meaningful"
|
||||
|
||||
# 5) LLM quality rating (only if heuristics pass)
|
||||
text = self._call_llm([
|
||||
{"role": "user", "content":
|
||||
f"Rate this RPG narrative quality 1-5.\n"
|
||||
f"1 = unreadable (spam, repetition, pure mechanics, garbled)\n"
|
||||
f"2 = poor (mostly mechanics, little story)\n"
|
||||
f"3 = acceptable (some narrative but rough)\n"
|
||||
f"4 = good (solid prose, minor issues)\n"
|
||||
f"5 = excellent (vivid, engaging)\n"
|
||||
f"Reply with ONLY a single digit 1-5.\n\n"
|
||||
f"{book_log[:600]}"}
|
||||
], label="Narrative validation", max_tokens=2, on_debug=on_debug)
|
||||
|
||||
if text and text.strip().isdigit():
|
||||
score = int(text.strip())
|
||||
if score < 3:
|
||||
return False, f"Quality score: {score}/5"
|
||||
|
||||
return True, ""
|
||||
|
||||
# ── Response Parsing ────────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -841,8 +841,10 @@ class ChaosTUI(App):
|
||||
if status == "start":
|
||||
name = data.get("name", "")
|
||||
dice = data.get("dice")
|
||||
outer = data.get("outer_attempt")
|
||||
d = f" dice={dice}" if dice else ""
|
||||
self._append_debug(f"▸ Phase {p}: {name} {d}")
|
||||
o = f" [attempt {outer}/3]" if outer else ""
|
||||
self._append_debug(f"▸ Phase {p}: {name}{o} {d}")
|
||||
elif status == "done":
|
||||
if p == 1:
|
||||
self._append_debug(f" ✔ prose: {data.get('chars', 0)} chars")
|
||||
@ -870,6 +872,10 @@ class ChaosTUI(App):
|
||||
self._append_debug(f" ✖ Phase 3 exhausted all retries — state changes may be missing!")
|
||||
for e in errs:
|
||||
self._append_debug(f" {e}")
|
||||
elif status == "retry_after_phase3_failure":
|
||||
self._append_debug(f" ⟳ Phase 3 failed — retrying from Phase 1 (attempt {data.get('outer_attempt', '?')}/3)")
|
||||
elif status == "validation_failed":
|
||||
self._append_debug(f" ✖ narrative rejected: {data.get('reason', '?')} (attempt {data.get('outer_attempt', '?')}/3)")
|
||||
elif event_type == "phase_done":
|
||||
self._append_debug(f" ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars")
|
||||
if data.get("log_entry"):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user