LLM retries and validations
This commit is contained in:
parent
a7e6d5540f
commit
5b93040f73
153
tools/engine.py
153
tools/engine.py
@ -804,19 +804,25 @@ class GameEngine:
|
|||||||
elif last_prompt:
|
elif last_prompt:
|
||||||
self._append_llm_log(f"Resume from: {last_prompt[:120]}")
|
self._append_llm_log(f"Resume from: {last_prompt[:120]}")
|
||||||
|
|
||||||
# ── Phase 1: Prose ────────────────────────────────────────────────
|
# ── Outer loop: Phase 1 (prose) → Phase 2 (summarize) → Phase 3 (extract) ──
|
||||||
import random
|
import random
|
||||||
die_roll = random.randint(1, 6)
|
die_roll = random.randint(1, 6)
|
||||||
self._append_llm_log(f"Dice: {die_roll} (1d6)")
|
self._append_llm_log(f"Dice: {die_roll} (1d6)")
|
||||||
|
|
||||||
|
book_log = None
|
||||||
|
changes_block = ""
|
||||||
|
log_entry = None
|
||||||
|
user_prompt = self._auto_prompt("")
|
||||||
|
ambience = None
|
||||||
|
debug_info = ""
|
||||||
|
|
||||||
|
for outer_attempt in range(3):
|
||||||
|
# ── Phase 1: Prose ────────────────────────────────────────────
|
||||||
if on_action:
|
if on_action:
|
||||||
on_action(f"Phase 1/3: writing story (dice={die_roll})")
|
on_action(f"Phase 1/3: writing story (dice={die_roll})")
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll})
|
on_debug("phase", {"phase": 1, "name": "prose", "status": "start", "dice": die_roll, "outer_attempt": outer_attempt + 1})
|
||||||
|
|
||||||
book_log = None
|
|
||||||
changes_block = ""
|
|
||||||
for attempt in range(3):
|
|
||||||
system = PROSE_PROMPT.substitute(
|
system = PROSE_PROMPT.substitute(
|
||||||
character=self._read_file(CHAR_PATH) or "*No character sheet.*",
|
character=self._read_file(CHAR_PATH) or "*No character sheet.*",
|
||||||
world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*",
|
world=self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world state.*",
|
||||||
@ -832,14 +838,14 @@ class GameEngine:
|
|||||||
text = self._call_llm([
|
text = self._call_llm([
|
||||||
{"role": "system", "content": system},
|
{"role": "system", "content": system},
|
||||||
{"role": "user", "content": user},
|
{"role": "user", "content": user},
|
||||||
], label=f"Prose attempt {attempt + 1}", max_tokens=1024, on_debug=on_debug)
|
], label=f"Prose attempt {outer_attempt + 1}", max_tokens=1024, on_debug=on_debug)
|
||||||
|
|
||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 1, "status": "empty", "attempt": attempt + 1})
|
on_debug("phase", {"phase": 1, "status": "empty", "attempt": outer_attempt + 1})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raw = text.strip()
|
raw = text.strip()
|
||||||
# Split narrative from ### Changes block
|
|
||||||
changes_block = ""
|
changes_block = ""
|
||||||
if "### Changes" in raw:
|
if "### Changes" in raw:
|
||||||
parts = raw.split("### Changes", 1)
|
parts = raw.split("### Changes", 1)
|
||||||
@ -850,12 +856,18 @@ class GameEngine:
|
|||||||
if on_debug:
|
if on_debug:
|
||||||
preview = book_log[:150].replace("\n", "\\n")
|
preview = book_log[:150].replace("\n", "\\n")
|
||||||
on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview})
|
on_debug("phase", {"phase": 1, "status": "done", "chars": len(book_log), "changes": bool(changes_block), "preview": preview})
|
||||||
break
|
|
||||||
|
|
||||||
if not book_log:
|
# ── Validation ────────────────────────────────────────────────
|
||||||
return TurnResult(error="Prose generation failed after 3 attempts")
|
if on_debug:
|
||||||
|
on_debug("phase", {"phase": 1, "name": "validation", "status": "start"})
|
||||||
|
valid, reason = self._validate_narrative(book_log, on_debug=on_debug)
|
||||||
|
if not valid:
|
||||||
|
if on_debug:
|
||||||
|
on_debug("phase", {"phase": 1, "status": "validation_failed", "reason": reason, "outer_attempt": outer_attempt + 1})
|
||||||
|
book_log = None
|
||||||
|
continue
|
||||||
|
|
||||||
# ── Phase 2: Summarize ────────────────────────────────────────────
|
# ── Phase 2: Summarize ────────────────────────────────────────
|
||||||
if on_action:
|
if on_action:
|
||||||
on_action("Phase 2/3: summarizing story")
|
on_action("Phase 2/3: summarizing story")
|
||||||
if on_debug:
|
if on_debug:
|
||||||
@ -863,7 +875,7 @@ class GameEngine:
|
|||||||
|
|
||||||
log_context = self._read_recent_log()
|
log_context = self._read_recent_log()
|
||||||
log_entry = None
|
log_entry = None
|
||||||
for attempt in range(2):
|
for p2_attempt in range(2):
|
||||||
context = book_log
|
context = book_log
|
||||||
if changes_block:
|
if changes_block:
|
||||||
context += f"\n\n{changes_block}"
|
context += f"\n\n{changes_block}"
|
||||||
@ -873,9 +885,9 @@ class GameEngine:
|
|||||||
f"Focus on who was involved (character and NPC names):\n\n"
|
f"Focus on who was involved (character and NPC names):\n\n"
|
||||||
f"## Session Log\n{log_context}\n\n"
|
f"## Session Log\n{log_context}\n\n"
|
||||||
f"## New Story\n{context}"}
|
f"## New Story\n{context}"}
|
||||||
], label=f"Summarize attempt {attempt + 1}", on_debug=on_debug)
|
], label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug)
|
||||||
if text and text.strip():
|
if text and text.strip():
|
||||||
log_entry = text.strip().split("\n")[0][:120]
|
log_entry = text.strip().split("\n")[0][:300]
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
|
on_debug("phase", {"phase": 2, "status": "done", "summary": log_entry})
|
||||||
break
|
break
|
||||||
@ -885,7 +897,7 @@ class GameEngine:
|
|||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry})
|
on_debug("phase", {"phase": 2, "status": "fallback", "summary": log_entry})
|
||||||
|
|
||||||
# ── Phase 3: Extract state changes ────────────────────────────────
|
# ── Phase 3: Extract state changes ────────────────────────────
|
||||||
if on_action:
|
if on_action:
|
||||||
on_action("Phase 3/3: extracting state changes")
|
on_action("Phase 3/3: extracting state changes")
|
||||||
if on_debug:
|
if on_debug:
|
||||||
@ -893,11 +905,14 @@ class GameEngine:
|
|||||||
|
|
||||||
user_prompt = self._auto_prompt(book_log)
|
user_prompt = self._auto_prompt(book_log)
|
||||||
ambience = None
|
ambience = None
|
||||||
debug_info = ""
|
phase3_errors = []
|
||||||
|
|
||||||
|
previous_attempt = None # {output, feedback}
|
||||||
|
phase3_ok = False
|
||||||
|
for p3_attempt in range(5):
|
||||||
current_char = self._read_file(CHAR_PATH) or "*No character.*"
|
current_char = self._read_file(CHAR_PATH) or "*No character.*"
|
||||||
current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
|
current_world = self._truncate_world(self._read_file(WORLD_PATH) or "") or "*No world.*"
|
||||||
|
|
||||||
for attempt in range(3):
|
|
||||||
phase3_prompt = (
|
phase3_prompt = (
|
||||||
f"## Current Character\n{current_char}\n\n"
|
f"## Current Character\n{current_char}\n\n"
|
||||||
f"## Current World\n{current_world}\n\n"
|
f"## Current World\n{current_world}\n\n"
|
||||||
@ -915,6 +930,16 @@ class GameEngine:
|
|||||||
phase3_prompt += (
|
phase3_prompt += (
|
||||||
f"Output ```tool blocks for changes only. Examples:\n\n"
|
f"Output ```tool blocks for changes only. Examples:\n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if previous_attempt:
|
||||||
|
phase3_prompt += (
|
||||||
|
f"--- PREVIOUS ATTEMPT (had errors) ---\n"
|
||||||
|
f"{previous_attempt['output']}\n\n"
|
||||||
|
f"--- FEEDBACK ---\n"
|
||||||
|
f"{previous_attempt['feedback']}\n\n"
|
||||||
|
f"Fix the issues above. Output corrected tool calls only.\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
text = self._call_llm([
|
text = self._call_llm([
|
||||||
{"role": "user", "content": phase3_prompt +
|
{"role": "user", "content": phase3_prompt +
|
||||||
f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
|
f"```tool\n{{\"tool\": \"modify_vitals\", \"args\": {{\"current_hp\": 5, \"cash\": 45}}}}\n```\n"
|
||||||
@ -928,15 +953,15 @@ class GameEngine:
|
|||||||
f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
|
f"```tool\n{{\"tool\": \"journal_update\", \"args\": {{\"add\": [\"Investigate the mine\"], \"done\": [\"Defeat the demon\"]}}}}\n```\n"
|
||||||
f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
|
f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
|
||||||
f"Only output tools for things that actually changed. Omit unchanged fields."}
|
f"Only output tools for things that actually changed. Omit unchanged fields."}
|
||||||
], label=f"Extract attempt {attempt + 1}", on_debug=on_debug)
|
], label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug)
|
||||||
|
|
||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 3, "status": "empty", "attempt": attempt + 1})
|
on_debug("phase", {"phase": 3, "status": "empty", "attempt": p3_attempt + 1})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tool_calls = self._extract_tool_calls(
|
tool_calls = self._extract_tool_calls(
|
||||||
text, round_num=attempt + 1, on_debug=on_debug
|
text, round_num=p3_attempt + 1, on_debug=on_debug
|
||||||
)
|
)
|
||||||
if on_debug and tool_calls:
|
if on_debug and tool_calls:
|
||||||
names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
|
names = [tc.get("tool", "?") for tc in tool_calls if tc.get("tool") != "finalize_turn"]
|
||||||
@ -956,7 +981,7 @@ class GameEngine:
|
|||||||
if on_action:
|
if on_action:
|
||||||
on_action(f"State: {self._describe_tool_action(name, args)}")
|
on_action(f"State: {self._describe_tool_action(name, args)}")
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("tool_call", {"round": attempt + 1, "tool": name, "args": args})
|
on_debug("tool_call", {"round": p3_attempt + 1, "tool": name, "args": args})
|
||||||
|
|
||||||
if name == "player_roll" and on_player_roll:
|
if name == "player_roll" and on_player_roll:
|
||||||
dice = args.get("dice", "1d6")
|
dice = args.get("dice", "1d6")
|
||||||
@ -969,18 +994,41 @@ class GameEngine:
|
|||||||
if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
|
if result.startswith("**Error:") or result.startswith("Tool error") or result.startswith("Unknown"):
|
||||||
errors.append(f"{name}: {result}")
|
errors.append(f"{name}: {result}")
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("tool_result", {"round": attempt + 1, "tool": name, "result": result})
|
on_debug("tool_result", {"round": p3_attempt + 1, "tool": name, "result": result})
|
||||||
|
|
||||||
if not errors:
|
if not errors:
|
||||||
|
phase3_ok = True
|
||||||
|
debug_info = ""
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
|
on_debug("phase", {"phase": 3, "status": "done", "applied": len([tc for tc in tool_calls if tc.get("tool") != "finalize_turn"])})
|
||||||
break
|
break
|
||||||
|
|
||||||
|
phase3_errors = errors
|
||||||
debug_info = "; ".join(errors)
|
debug_info = "; ".join(errors)
|
||||||
if on_debug:
|
if on_debug:
|
||||||
on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": attempt + 1})
|
on_debug("phase", {"phase": 3, "status": "errors", "errors": errors, "attempt": p3_attempt + 1})
|
||||||
|
|
||||||
if errors and on_debug:
|
# Build feedback for the LLM to fix on next attempt
|
||||||
on_debug("phase", {"phase": 3, "status": "exhausted", "errors": errors})
|
feedback_lines = ["The previous tool calls had errors:"]
|
||||||
|
for e in errors:
|
||||||
|
feedback_lines.append(f"- {e}")
|
||||||
|
feedback_lines.append("")
|
||||||
|
feedback_lines.append("Fix ALL issues above. Use correct tool names, valid JSON, and reasonable values.")
|
||||||
|
previous_attempt = {"output": text, "feedback": "\n".join(feedback_lines)}
|
||||||
|
|
||||||
|
if phase3_ok:
|
||||||
|
break # All phases succeeded on this outer attempt
|
||||||
|
|
||||||
|
# Phase 3 failed after 5 attempts — retry from Phase 1
|
||||||
|
if on_debug:
|
||||||
|
on_debug("phase", {"phase": 3, "status": "exhausted", "errors": phase3_errors})
|
||||||
|
on_debug("phase", {"phase": 1, "status": "retry_after_phase3_failure", "outer_attempt": outer_attempt + 1})
|
||||||
|
book_log = None # Reset so Phase 1 runs again on next outer iteration
|
||||||
|
|
||||||
|
if not book_log:
|
||||||
|
return TurnResult(error="Generation failed after exhausting all retries")
|
||||||
|
|
||||||
|
# ── Finalize ──────────────────────────────────────────────────────
|
||||||
if on_action:
|
if on_action:
|
||||||
on_action("Turn complete")
|
on_action("Turn complete")
|
||||||
if on_debug:
|
if on_debug:
|
||||||
@ -1022,6 +1070,59 @@ class GameEngine:
|
|||||||
"""Fallback player prompt."""
|
"""Fallback player prompt."""
|
||||||
return "**What do you do?**"
|
return "**What do you do?**"
|
||||||
|
|
||||||
|
def _validate_narrative(self, book_log: str, *, on_debug: callable = None) -> tuple[bool, str]:
|
||||||
|
"""Check if book_log is acceptable narrative. Returns (ok, reason)."""
|
||||||
|
lines = book_log.strip().split("\n")
|
||||||
|
if not lines:
|
||||||
|
return False, "Empty narrative"
|
||||||
|
|
||||||
|
# 1) Heuristic: high repetition count
|
||||||
|
from collections import Counter
|
||||||
|
common = Counter(lines).most_common(1)
|
||||||
|
if common and common[0][1] >= 5:
|
||||||
|
return False, f"Repetition: '{common[0][0][:60]}' ×{common[0][1]}"
|
||||||
|
|
||||||
|
# 2) Heuristic: game mechanics bleedthrough
|
||||||
|
mech_lines = [l for l in lines if re.match(
|
||||||
|
r'^\*\*(?:Roll|Damage|Success|Failure|Check|Save|Hit|Miss|'
|
||||||
|
r'Strenght|Dexterity|Willpower|STR|DEX|WIL|'
|
||||||
|
r'(?:[A-Z][a-z]+(?: \(\w+\))?:))',
|
||||||
|
l
|
||||||
|
)]
|
||||||
|
if mech_lines:
|
||||||
|
ratio = len(mech_lines) / len(lines)
|
||||||
|
if ratio > 0.3:
|
||||||
|
return False, f"Game mechanics dominate ({len(mech_lines)}/{len(lines)} lines)"
|
||||||
|
|
||||||
|
# 3) Heuristic: tool / json blocks leaked into narrative
|
||||||
|
if re.search(r'```(?:tool|json)', book_log):
|
||||||
|
return False, "Contains unprocessed tool blocks"
|
||||||
|
|
||||||
|
# 4) Heuristic: under 50 characters of real prose
|
||||||
|
prose = re.sub(r'[*_#>`~\-\d]', '', book_log).strip()
|
||||||
|
if len(prose) < 50:
|
||||||
|
return False, "Too short to be meaningful"
|
||||||
|
|
||||||
|
# 5) LLM quality rating (only if heuristics pass)
|
||||||
|
text = self._call_llm([
|
||||||
|
{"role": "user", "content":
|
||||||
|
f"Rate this RPG narrative quality 1-5.\n"
|
||||||
|
f"1 = unreadable (spam, repetition, pure mechanics, garbled)\n"
|
||||||
|
f"2 = poor (mostly mechanics, little story)\n"
|
||||||
|
f"3 = acceptable (some narrative but rough)\n"
|
||||||
|
f"4 = good (solid prose, minor issues)\n"
|
||||||
|
f"5 = excellent (vivid, engaging)\n"
|
||||||
|
f"Reply with ONLY a single digit 1-5.\n\n"
|
||||||
|
f"{book_log[:600]}"}
|
||||||
|
], label="Narrative validation", max_tokens=2, on_debug=on_debug)
|
||||||
|
|
||||||
|
if text and text.strip().isdigit():
|
||||||
|
score = int(text.strip())
|
||||||
|
if score < 3:
|
||||||
|
return False, f"Quality score: {score}/5"
|
||||||
|
|
||||||
|
return True, ""
|
||||||
|
|
||||||
# ── Response Parsing ────────────────────────────────────────────────
|
# ── Response Parsing ────────────────────────────────────────────────
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@ -841,8 +841,10 @@ class ChaosTUI(App):
|
|||||||
if status == "start":
|
if status == "start":
|
||||||
name = data.get("name", "")
|
name = data.get("name", "")
|
||||||
dice = data.get("dice")
|
dice = data.get("dice")
|
||||||
|
outer = data.get("outer_attempt")
|
||||||
d = f" dice={dice}" if dice else ""
|
d = f" dice={dice}" if dice else ""
|
||||||
self._append_debug(f"▸ Phase {p}: {name} {d}")
|
o = f" [attempt {outer}/3]" if outer else ""
|
||||||
|
self._append_debug(f"▸ Phase {p}: {name}{o} {d}")
|
||||||
elif status == "done":
|
elif status == "done":
|
||||||
if p == 1:
|
if p == 1:
|
||||||
self._append_debug(f" ✔ prose: {data.get('chars', 0)} chars")
|
self._append_debug(f" ✔ prose: {data.get('chars', 0)} chars")
|
||||||
@ -870,6 +872,10 @@ class ChaosTUI(App):
|
|||||||
self._append_debug(f" ✖ Phase 3 exhausted all retries — state changes may be missing!")
|
self._append_debug(f" ✖ Phase 3 exhausted all retries — state changes may be missing!")
|
||||||
for e in errs:
|
for e in errs:
|
||||||
self._append_debug(f" {e}")
|
self._append_debug(f" {e}")
|
||||||
|
elif status == "retry_after_phase3_failure":
|
||||||
|
self._append_debug(f" ⟳ Phase 3 failed — retrying from Phase 1 (attempt {data.get('outer_attempt', '?')}/3)")
|
||||||
|
elif status == "validation_failed":
|
||||||
|
self._append_debug(f" ✖ narrative rejected: {data.get('reason', '?')} (attempt {data.get('outer_attempt', '?')}/3)")
|
||||||
elif event_type == "phase_done":
|
elif event_type == "phase_done":
|
||||||
self._append_debug(f" ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars")
|
self._append_debug(f" ✔ turn complete — book_log: {data.get('book_log_chars', 0)} chars")
|
||||||
if data.get("log_entry"):
|
if data.get("log_entry"):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user