Add meta log
This commit is contained in:
parent
f98392478b
commit
3e1778d0d7
@ -204,7 +204,7 @@ Default is "tools" for faster single-call generation.
|
||||
|
||||
## Action Validation
|
||||
|
||||
Before every turn, a separate lightweight LLM call (`validate_action` in `tools/engine_lib/validation.py`) checks whether the player's action is possible given the character sheet and world state. This catches impossible actions like using items not in inventory, asserting false facts, or attempting nonsensical actions.
|
||||
After every turn is generated, a separate lightweight LLM call (`validate_turn` in `tools/engine_lib/validation.py`) checks whether the turn is valid given the character sheet and world state. This catches impossible actions like using items not in inventory, asserting false facts, or attempting nonsensical actions.
|
||||
|
||||
- Uses `VALIDATION_PROMPT` template with character + world state
|
||||
- Low temperature (0.2), low max tokens (256)
|
||||
|
||||
@ -121,12 +121,15 @@ class GameEngine:
|
||||
changes: list[str] = []
|
||||
start_time = datetime.now()
|
||||
total_attempts = 0
|
||||
prev_raw = ""
|
||||
|
||||
_ = None # placeholder
|
||||
|
||||
for attempt in range(MAX_RETRIES + 1):
|
||||
total_attempts = attempt + 1
|
||||
user = base_user
|
||||
if attempt > 0:
|
||||
user += f"\n\n---\n\n## Turn Generation Feedback\n{feedback}"
|
||||
user += f"\n\n---\n\n## Your Previous Response\n\n```\n{prev_raw}\n```\n\n---\n\n## Feedback\n{feedback}"
|
||||
|
||||
state.append_llm_log(f"\n[TOOL] Attempt {attempt + 1}/{MAX_RETRIES + 1} — {len(system)} chars system, {len(user)} chars user")
|
||||
|
||||
@ -137,7 +140,7 @@ class GameEngine:
|
||||
|
||||
if not text or not text.strip():
|
||||
if attempt < MAX_RETRIES:
|
||||
feedback = "Your response was empty. Generate a complete turn with narrative and state changes."
|
||||
feedback = f"Your response was empty. Generate a complete turn with narrative and state changes."
|
||||
state.append_llm_log("\n[RETRY] empty response")
|
||||
if on_action:
|
||||
on_action("DM is weaving the tale...")
|
||||
@ -146,6 +149,7 @@ class GameEngine:
|
||||
|
||||
raw = text.strip()
|
||||
state.append_llm_log(f"\n[TOOL] got {len(raw)} chars in {(datetime.now() - start_time).total_seconds() * 1000:.1f}ms")
|
||||
prev_raw = raw
|
||||
|
||||
tool_calls = extract_tool_calls(raw)
|
||||
if not tool_calls:
|
||||
@ -155,6 +159,7 @@ class GameEngine:
|
||||
book_log = ""
|
||||
ambience = None
|
||||
log_entry = None
|
||||
meta_log = ""
|
||||
state_changes: list[dict] = []
|
||||
|
||||
for tc in tool_calls:
|
||||
@ -176,6 +181,8 @@ class GameEngine:
|
||||
ambience = None
|
||||
if args.get("log_entry"):
|
||||
log_entry = args["log_entry"]
|
||||
if args.get("meta_log"):
|
||||
meta_log = args["meta_log"]
|
||||
elif name == "read_rules":
|
||||
cat = args.get("category", "mechanics")
|
||||
result = execute_tool("read_rules", {"category": cat})
|
||||
@ -201,7 +208,7 @@ class GameEngine:
|
||||
if is_meta and state_changes:
|
||||
state.append_llm_log(f"\n[TURN META REJECTED] state changes not allowed for meta action")
|
||||
if attempt < MAX_RETRIES:
|
||||
feedback = "This is a meta action. Do NOT call any state-changing tools. Respond only with meta text (starting with `>`) and no tool calls beyond a finalize_turn."
|
||||
feedback = f"This is a meta action. Do NOT call any state-changing tools. Respond only with meta text (starting with `>`) and no tool calls beyond a finalize_turn."
|
||||
state.append_llm_log(f"\n[TURN REGENERATE] (meta) attempt {attempt + 2}")
|
||||
if on_action:
|
||||
on_action("DM is consulting the fates...")
|
||||
@ -212,7 +219,7 @@ class GameEngine:
|
||||
if not is_meta and log_entry and not book_log:
|
||||
state.append_llm_log(f"\n[TURN NO NARRATIVE] finalized with log_entry but no narrative")
|
||||
if attempt < MAX_RETRIES:
|
||||
feedback = "You called finalize_turn with a log_entry but produced no narrative. Every turn must include a `narrative` tool block with the story. Regenerate with both narrative and log_entry."
|
||||
feedback = f"You called finalize_turn with a log_entry but produced no narrative. Every turn must include a `narrative` tool block with the story. Regenerate with both narrative and log_entry."
|
||||
state.append_llm_log(f"\n[TURN REGENERATE] (no narrative) attempt {attempt + 2}")
|
||||
if on_action:
|
||||
on_action("DM is weaving the tale...")
|
||||
@ -228,7 +235,7 @@ class GameEngine:
|
||||
if ratio >= 0.8:
|
||||
state.append_llm_log(f"\n[TURN DUPLICATE] {ratio:.0%} match with previous turn")
|
||||
if attempt < MAX_RETRIES:
|
||||
feedback = "The narrative is nearly identical to the previous turn. Generate something new and different."
|
||||
feedback = f"The narrative is nearly identical to the previous turn. Generate something new and different."
|
||||
state.append_llm_log(f"\n[TURN REGENERATE] (duplicate) attempt {attempt + 2}")
|
||||
if on_action:
|
||||
on_action("DM is weaving the tale...")
|
||||
@ -258,7 +265,7 @@ class GameEngine:
|
||||
state.append_llm_log(f"\n[TURN VALID] {reason}")
|
||||
elif reason == "Unrecognized":
|
||||
if attempt < MAX_RETRIES:
|
||||
feedback = "The validation system could not process the previous turn. Please regenerate."
|
||||
feedback = f"The validation system could not process the previous turn. Please regenerate."
|
||||
state.append_llm_log(f"\n[TURN REGENERATE] (unrecognized) attempt {attempt + 2}")
|
||||
if on_action:
|
||||
on_action("DM is consulting the fates...")
|
||||
@ -341,6 +348,7 @@ class GameEngine:
|
||||
log_entry=log_entry or "",
|
||||
ambience=ambience,
|
||||
tool_calls=tool_calls,
|
||||
meta_log=meta_log,
|
||||
)
|
||||
|
||||
return TurnResult(
|
||||
@ -351,6 +359,7 @@ class GameEngine:
|
||||
changes=changes,
|
||||
is_meta=is_meta,
|
||||
game_over=game_over,
|
||||
meta_log=meta_log,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -18,3 +18,4 @@ class TurnResult:
|
||||
changes: list[str] = field(default_factory=list)
|
||||
is_meta: bool = False
|
||||
game_over: bool = False
|
||||
meta_log: str = ""
|
||||
|
||||
@ -18,6 +18,7 @@ def log_turn_details(
|
||||
log_entry: str,
|
||||
ambience: Optional[str],
|
||||
tool_calls: list,
|
||||
meta_log: str = "",
|
||||
) -> None:
|
||||
"""Write structured turn summary to llm.log and fire TUI debug event."""
|
||||
ts = datetime.now().isoformat()
|
||||
@ -34,6 +35,7 @@ def log_turn_details(
|
||||
state.append_llm_log(f"├─ Output: {output_chars} chars ({output_words} words)")
|
||||
state.append_llm_log(f"├─ Log Entry: {log_entry}")
|
||||
state.append_llm_log(f"├─ Ambience: {ambience or 'None'}")
|
||||
state.append_llm_log(f"├─ Meta Log: {(meta_log or '')[:80]}")
|
||||
tools_preview = ", ".join(tc.get("tool", "?") for tc in tool_calls)
|
||||
state.append_llm_log(f"├─ Tool Calls: {len(tool_calls)} ({tools_preview})")
|
||||
state.append_llm_log(
|
||||
|
||||
@ -25,6 +25,7 @@ LLM_LOG_PATH = SESSION_DIR / 'llm.log'
|
||||
AMBIENCE_OPTIONS_PATH = SESSION_DIR / "ambience_options.md"
|
||||
CHANGES_PATH = SESSION_DIR / "changes.md"
|
||||
RULES_INJECTION_PATH = SESSION_DIR / "rules_injection.md"
|
||||
META_LOG_PATH = SESSION_DIR / "meta_log.md"
|
||||
AUDIO_DIR = SESSION_DIR / "audio"
|
||||
|
||||
END_GAME_PATH = RULES_DIR / 'end_game.md'
|
||||
|
||||
@ -45,7 +45,7 @@ Wrap each action in its own ```tool block:
|
||||
{"tool": "journal_update", "args": {"add": ["Investigate the mine"], "done": ["Defeat the demon"]}}
|
||||
```
|
||||
```tool
|
||||
{"tool": "finalize_turn", "args": {"ambience": "dungeon", "log_entry": "Kael explored the dungeon, found a hidden passage, and was ambushed by goblins."}}
|
||||
{"tool": "finalize_turn", "args": {"ambience": "dungeon", "log_entry": "Kael explored the dungeon, found a hidden passage, and was ambushed by goblins.", "meta_log": "Kael rolled a 4 (1d6) for perception — success, spotted the hidden door. HP lowered to 5 after goblin ambush."}}
|
||||
```
|
||||
|
||||
```tool
|
||||
@ -59,6 +59,8 @@ or with a category:
|
||||
|
||||
**log_entry**: Provide a short, dense summary (1-2 sentences) of the turn's main events. This becomes the session log — be specific, factual, and concise.
|
||||
|
||||
**meta_log**: (Optional) Provide a behind-the-scenes explanation of the mechanics — what dice were rolled, what rules triggered, what changed and why. This is shown to the player at the bottom of the screen for insight into the game mechanics. Be specific: "rolled X (1d6) for Y — result: Z".
|
||||
|
||||
You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats they don't have), narrate the failure and do NOT call any state-changing tools.
|
||||
|
||||
**Inventory rule**: If the player wants to use an item, you must first verify it's on their character sheet. If it is, you MUST call `remove_from_inventory` for that item AND apply the effects (e.g. `modify_vitals` for HP potions). If it's not on the sheet, reject the action — do not let them use items they don't have.
|
||||
|
||||
@ -17,7 +17,7 @@ from pathlib import Path
|
||||
from .paths import (
|
||||
CHAR_PATH, WORLD_PATH, BOOK_PATH, JOURNAL_PATH, AMBIENCE_PATH,
|
||||
LOG_PATH, LLM_LOG_PATH, AMBIENCE_OPTIONS_PATH, CHANGES_PATH,
|
||||
AUDIO_DIR, SESSION_DIR, ARCHIVE_DIR,
|
||||
META_LOG_PATH, AUDIO_DIR, SESSION_DIR, ARCHIVE_DIR,
|
||||
)
|
||||
from .models import TurnResult
|
||||
|
||||
@ -168,6 +168,21 @@ def append_llm_log(text: str) -> None:
|
||||
f.write(text + "\n")
|
||||
|
||||
|
||||
def append_meta_log(turn_num: int, entry: str) -> None:
|
||||
"""Append a meta_log entry to meta_log.md with turn number."""
|
||||
META_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(META_LOG_PATH, "a") as f:
|
||||
f.write(f"- **Turn {turn_num}** — {entry.strip()}\n")
|
||||
|
||||
|
||||
def read_last_meta_log() -> str:
|
||||
"""Return the last meta_log entry, or empty string if none."""
|
||||
if not META_LOG_PATH.exists():
|
||||
return ""
|
||||
lines = [l.strip() for l in META_LOG_PATH.read_text().splitlines() if l.strip()]
|
||||
return lines[-1] if lines else ""
|
||||
|
||||
|
||||
def update_journal(add: list[str] | None = None, done: list[str] | None = None) -> None:
|
||||
"""Add or complete TODO items in journal.md."""
|
||||
if not JOURNAL_PATH.exists():
|
||||
|
||||
@ -20,7 +20,7 @@ TOOL_REGISTRY: dict[str, dict] = {
|
||||
"replace_note": {"description": "Replace note by exact match.", "args": {"before": "exact text", "after": "new text"}},
|
||||
"world_update": {"description": "Replace world state.", "args": {"content": "full world markdown"}},
|
||||
"journal_update": {"description": "Update TODO/DONE.", "args": {"add": "[...]", "done": "[...]"}},
|
||||
"finalize_turn": {"description": "End turn.", "args": {"ambience": "soundscape name", "log_entry": "one-line summary of what happened"}},
|
||||
"finalize_turn": {"description": "End turn.", "args": {"ambience": "soundscape name", "log_entry": "one-line summary of what happened", "meta_log": "optional behind-the-scenes mechanics explanation"}},
|
||||
"read_rules": {"description": "Read a rules file by category. Categories: mechanics (full mechanics reference), core (core mechanics), character_creation, end_game (end-game closure rules). Call when you need details beyond the Core Rules in the prompt.", "args": {"category": "optional — one of: mechanics, core, character_creation, end_game (default: mechanics)"}},
|
||||
}
|
||||
|
||||
|
||||
@ -8,95 +8,6 @@ from .paths import CHAR_PATH, WORLD_PATH, JOURNAL_PATH
|
||||
from . import state
|
||||
|
||||
|
||||
VALIDATION_PROMPT = """You are a strict RPG game master validating whether a player's action is possible given the game state. Be thorough — check inventory, stats, location, NPCs, story context, and story logic.
|
||||
|
||||
## Character
|
||||
{character}
|
||||
|
||||
## World
|
||||
{world}
|
||||
|
||||
## Session Log
|
||||
*Written in 3rd person with explicit actor names.*
|
||||
{log}
|
||||
|
||||
## Recent Story
|
||||
*Written in 3rd person with explicit actor names.*
|
||||
{story}
|
||||
|
||||
## Player Action
|
||||
{action}
|
||||
|
||||
## Instructions
|
||||
- Is the player trying to use an item they don't have? -> invalid
|
||||
- Are they asserting something that contradicts the state? -> invalid
|
||||
- Is the action nonsensical given the situation? -> invalid
|
||||
- Is the player's action or intention unclear or ambiguous? -> invalid (explain what is unclear and why)
|
||||
- If you are uncertain whether the action is valid, reject it and describe exactly why you are unsure.
|
||||
- Does the action make sense given the character's abilities and resources? -> valid
|
||||
- Pay close attention to the Recent Story section — entities like monsters, NPCs, and hazards currently present in the scene ARE valid targets for action.
|
||||
- If valid, also check: if they're using a consumable item, note that it must be removed from inventory.
|
||||
|
||||
Reply with ONLY the JSON object. Examples:
|
||||
```
|
||||
{{"valid": true, "reason": "ok"}}
|
||||
```
|
||||
or
|
||||
```
|
||||
{{"valid": false, "reason": "brief explanation of why the action is impossible"}}
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
def validate_action(
|
||||
player_action: str,
|
||||
*,
|
||||
story: str = "",
|
||||
log: str = "",
|
||||
) -> tuple[bool, str]:
|
||||
"""Ask the LLM whether a player action is valid given the game state. Returns (valid, reason)."""
|
||||
if not player_action:
|
||||
return True, ""
|
||||
|
||||
char = state.read_file(CHAR_PATH) or "*No character sheet.*"
|
||||
world = state.truncate_world(state.read_file(WORLD_PATH) or "") or "*No world state.*"
|
||||
recent = story.strip() or state.read_recent_book() or "*No prior story.*"
|
||||
log_entries = log.strip() or state.read_recent_log() or "*No recent events.*"
|
||||
|
||||
prompt = VALIDATION_PROMPT.format(character=char, world=world, log=log_entries, story=recent, action=player_action)
|
||||
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
for attempt in range(2):
|
||||
text = call_llm(
|
||||
messages,
|
||||
max_tokens=1024,
|
||||
temperature=0.2,
|
||||
label="Action validation",
|
||||
)
|
||||
|
||||
if not text:
|
||||
return False, "Not sure"
|
||||
|
||||
cleaned = text.strip()
|
||||
m = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
|
||||
if m:
|
||||
cleaned = m.group(1).strip()
|
||||
try:
|
||||
data = json.loads(cleaned)
|
||||
valid = data.get("valid", True)
|
||||
reason = data.get("reason", "")
|
||||
return valid, reason
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
if attempt == 0:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": "Your previous response was not valid JSON. Reply with ONLY a JSON object in exactly this format, nothing else:\n\n```json\n{\"valid\": true, \"reason\": \"ok\"}\n```\nor\n```json\n{\"valid\": false, \"reason\": \"brief explanation\"}\n```"
|
||||
})
|
||||
|
||||
return False, "Unrecognized"
|
||||
|
||||
|
||||
TURN_VALIDATION_PROMPT = """You are a strict RPG game master validating a generated turn. Check:
|
||||
|
||||
1. **Action Sense**: Did the player's request make sense given the character, inventory, and world state?
|
||||
@ -251,7 +162,7 @@ def validate_turn(
|
||||
changes=change_summary,
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
messages = [{"role": "system", "content": prompt}]
|
||||
|
||||
for attempt in range(2):
|
||||
text = call_llm(
|
||||
@ -284,7 +195,7 @@ def validate_turn(
|
||||
if attempt == 0:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": "Your previous response was not valid. Reply with ONLY a ```tool block:\n\n```tool\n{\"tool\": \"validate\", \"args\": {\"valid\": true, \"reason\": \"ok\", \"action\": \"ok\"}}\n```\nor\n```tool\n{\"tool\": \"validate\", \"args\": {\"valid\": false, \"reason\": \"...\", \"action\": \"reject\"}}\n```\nor\n```tool\n{\"tool\": \"validate\", \"args\": {\"valid\": false, \"reason\": \"...\", \"action\": \"regenerate\"}}\n```"
|
||||
"content": f"Your previous response was NOT valid. Do NOT include any reasoning or explanation. Reply with EXACTLY ONE of these three ```tool blocks and nothing else:\n\n```tool\n{{\"tool\": \"validate\", \"args\": {{\"valid\": true, \"reason\": \"ok\", \"action\": \"ok\"}}}}\n```\n```tool\n{{\"tool\": \"validate\", \"args\": {{\"valid\": false, \"reason\": \"explain why the action is impossible\", \"action\": \"reject\"}}}}\n```\n```tool\n{{\"tool\": \"validate\", \"args\": {{\"valid\": false, \"reason\": \"describe what the LLM should fix\", \"action\": \"regenerate\"}}}}\n```"
|
||||
})
|
||||
|
||||
return False, "Unrecognized", "reject"
|
||||
|
||||
35
tools/run.py
35
tools/run.py
@ -8,6 +8,7 @@ Owns the TUI and game loop. Layout:
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import threading
|
||||
|
||||
from textual import on
|
||||
@ -68,6 +69,7 @@ class ChaosTUI(App):
|
||||
#transcript { background: #1a2a1a; color: #c8c8c8; padding: 0 1; }
|
||||
#play-narrative { background: #161616; color: #d8d8d8; padding: 1 2; height: auto; }
|
||||
#play-narrative.meta { background: #1a1a2e; color: #b0a0e0; border-top: solid #6b4fa0; border-bottom: solid #6b4fa0; }
|
||||
#play-meta { background: #0d0d1a; color: #a0a0c0; padding: 0 2; height: auto; border-top: solid #2a2a3a; }
|
||||
#play-status { background: #1a2a1a; color: #e0b060; padding: 0 2; height: 1; text-style: bold italic; text-align: center; }
|
||||
#play-status.processing { background: #2a1a0a; color: #ffd93d; }
|
||||
#play-input { height: 3; background: #222; color: #e0d0c0; border: solid #555; padding: 0 1; }
|
||||
@ -151,6 +153,7 @@ class ChaosTUI(App):
|
||||
with TabPane("PLAY", id="play-tab"):
|
||||
with VerticalScroll(id="play-scroll"):
|
||||
yield Static("*Awaiting the fates...*", id="play-narrative")
|
||||
yield Static("", id="play-meta")
|
||||
yield Static("", id="play-status")
|
||||
yield Input(placeholder="Type your action and press Enter...", id="play-input")
|
||||
yield Button("Close the Book and Start a New One", id="end-game-btn", variant="warning")
|
||||
@ -202,15 +205,17 @@ class ChaosTUI(App):
|
||||
def _begin_game(self):
|
||||
self._game_over = False
|
||||
self._last_narrative: str = ""
|
||||
self.query_one("#play-meta", Static).update("")
|
||||
pages = load_book_pages()
|
||||
if pages and pages != ["*The story has not begun.*"]:
|
||||
parts = []
|
||||
parts.append(pages[-1])
|
||||
changes: list[str] = []
|
||||
if CHANGES_PATH.exists():
|
||||
saved = [l.strip() for l in CHANGES_PATH.read_text().splitlines() if l.strip()]
|
||||
if saved:
|
||||
parts.append(self._render_changes(saved))
|
||||
changes = [l.strip() for l in CHANGES_PATH.read_text().splitlines() if l.strip()]
|
||||
last_meta = self._strip_meta_prefix(state.read_last_meta_log())
|
||||
self._set_narrative("\n\n".join(parts))
|
||||
self._update_meta(changes, last_meta)
|
||||
self._enable_input()
|
||||
return
|
||||
self._call_llm()
|
||||
@ -338,6 +343,8 @@ class ChaosTUI(App):
|
||||
else:
|
||||
summary = result.book_log.strip().split(chr(10))[0][:80]
|
||||
state.append_log(f"- **Turn {turn_num}** — {summary}")
|
||||
if result.meta_log:
|
||||
state.append_meta_log(turn_num, result.meta_log)
|
||||
result.book_log = load_book_pages()[-1]
|
||||
elif result.log_entry:
|
||||
state.append_log(f"- {result.log_entry}")
|
||||
@ -371,18 +378,32 @@ class ChaosTUI(App):
|
||||
self._show_error(err_msg, traceback_str)
|
||||
|
||||
@staticmethod
|
||||
def _render_changes(changes: list[str]) -> str:
|
||||
return "**Changes:**\n" + "\n".join(f"- {c}" for c in changes)
|
||||
def _strip_meta_prefix(entry: str) -> str:
|
||||
return re.sub(r"^- \*\*Turn \d+\*\* — ", "", entry)
|
||||
|
||||
@staticmethod
|
||||
def _render_meta(changes: list[str], meta_log: str) -> str:
|
||||
lines = []
|
||||
if changes:
|
||||
lines.append("**Changes:**")
|
||||
lines.extend(f"- {c}" for c in changes)
|
||||
if meta_log:
|
||||
lines.append(meta_log)
|
||||
return "\n\n".join(lines) if lines else ""
|
||||
|
||||
def _update_meta(self, changes: list[str], meta_log: str) -> None:
|
||||
meta = self._render_meta(changes, meta_log)
|
||||
widget = self.query_one("#play-meta", Static)
|
||||
widget.update(RichMarkdown(meta) if meta else "")
|
||||
|
||||
def _display_scene(self, result: TurnResult) -> None:
|
||||
parts = []
|
||||
if result.book_log:
|
||||
parts.append(result.book_log)
|
||||
if result.changes:
|
||||
parts.append(self._render_changes(result.changes))
|
||||
if result.user_prompt:
|
||||
parts.append(f"---\n\n{result.user_prompt}")
|
||||
self._set_narrative("\n\n".join(parts) if parts else "", meta=result.is_meta)
|
||||
self._update_meta(result.changes, result.meta_log)
|
||||
self._enable_input()
|
||||
|
||||
def _enable_input(self, value: str = "") -> None:
|
||||
|
||||
@ -1,93 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""End-to-end validation tests using the real configured LLM.
|
||||
|
||||
Tests that validate_action handles real LLM responses correctly with
|
||||
the actual character sheet and world state. Requires a running LLM.
|
||||
|
||||
Usage:
|
||||
python3 tools/test_llm_validation.py
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from engine_lib.validation import validate_action
|
||||
|
||||
PASS = 0
|
||||
FAIL = 0
|
||||
|
||||
|
||||
def check(label: str, valid: bool, reason: str, expected_valid: bool):
|
||||
global PASS, FAIL
|
||||
status = "✓" if valid == expected_valid else "✗"
|
||||
if valid == expected_valid:
|
||||
PASS += 1
|
||||
else:
|
||||
FAIL += 1
|
||||
print(f" {status} {label}: valid={valid}, reason=\"{reason}\"")
|
||||
|
||||
|
||||
def section(name: str):
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f" {name}")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
|
||||
def main():
|
||||
section("Valid actions — should pass")
|
||||
|
||||
check("Buy a drink",
|
||||
*validate_action("I buy a mug of weak ale at the Splintered Tankard"),
|
||||
expected_valid=True)
|
||||
|
||||
check("Use healing salve",
|
||||
*validate_action("I use my healing salve to restore 1 HP"),
|
||||
expected_valid=True)
|
||||
|
||||
check("Talk to Otta",
|
||||
*validate_action("I ask Mistress Otta about recent news in the Keep"),
|
||||
expected_valid=True)
|
||||
|
||||
check("Visit the market",
|
||||
*validate_action("I head to the Market Square to browse stalls"),
|
||||
expected_valid=True)
|
||||
|
||||
section("Invalid actions — should fail")
|
||||
|
||||
check("Use non-existent item",
|
||||
*validate_action("I drink a potion of invisibility"),
|
||||
expected_valid=False)
|
||||
|
||||
check("Cast a spell (not a weaver)",
|
||||
*validate_action("I cast a fireball spell at the tavern"),
|
||||
expected_valid=False)
|
||||
|
||||
check("Buy impossible item",
|
||||
*validate_action("I buy a horse for a broken copper coin"),
|
||||
expected_valid=False)
|
||||
|
||||
check("Assert false state",
|
||||
*validate_action("I fly to the moon"),
|
||||
expected_valid=False)
|
||||
|
||||
section("Edge cases")
|
||||
|
||||
check("Empty action",
|
||||
*validate_action(""),
|
||||
expected_valid=True)
|
||||
|
||||
check("Garbled nonsense",
|
||||
*validate_action("qwxz jabberwocky flargle bargle"),
|
||||
expected_valid=False)
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f" Results: {PASS} passed, {FAIL} failed")
|
||||
print(f"{'=' * 60}")
|
||||
return 0 if FAIL == 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -30,7 +30,7 @@ def test_engine_import():
|
||||
('engine_lib.state', ['read_file', 'apply_state', 'append_log', 'append_llm_log', 'next_turn_number']),
|
||||
('engine_lib.tools_handler', ['execute_tool', 'extract_tool_calls', 'TOOL_REGISTRY']),
|
||||
('engine_lib.llm', ['call_llm']),
|
||||
('engine_lib.validation', ['validate_action', 'validate_turn']),
|
||||
('engine_lib.validation', ['validate_turn']),
|
||||
('engine_lib.parsing', ['log_turn_details']),
|
||||
('engine', ['GameEngine']),
|
||||
]
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for engine_lib/validation.py."""
|
||||
"""Tests for engine_lib/validation.py — validate_turn only."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
@ -10,103 +10,6 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
def test_empty_action():
|
||||
"""Empty action should return (True, '')."""
|
||||
from engine_lib.validation import validate_action
|
||||
valid, reason = validate_action("")
|
||||
assert valid is True
|
||||
assert reason == ""
|
||||
print("✓ empty action returns (True, '')")
|
||||
|
||||
|
||||
@patch("engine_lib.validation.state.read_file")
|
||||
@patch("engine_lib.validation.state.truncate_world")
|
||||
@patch("engine_lib.validation.call_llm")
|
||||
def test_valid_action(mock_call_llm, mock_truncate_world, mock_read_file):
|
||||
from engine_lib.validation import validate_action
|
||||
|
||||
mock_read_file.side_effect = lambda p: "HP: 10\nGold: 5" if "character" in str(p).lower() else "## Location\nTavern"
|
||||
mock_truncate_world.return_value = "## Location\nTavern"
|
||||
mock_call_llm.return_value = json.dumps({"valid": True, "reason": "ok"})
|
||||
|
||||
valid, reason = validate_action("I buy a drink", story="At the tavern", log="- Entered the tavern")
|
||||
|
||||
assert valid is True
|
||||
assert reason == "ok"
|
||||
mock_call_llm.assert_called_once()
|
||||
print("✓ valid action returns (True, reason)")
|
||||
|
||||
|
||||
@patch("engine_lib.validation.state.read_file")
|
||||
@patch("engine_lib.validation.state.truncate_world")
|
||||
@patch("engine_lib.validation.call_llm")
|
||||
def test_invalid_action(mock_call_llm, mock_truncate_world, mock_read_file):
|
||||
from engine_lib.validation import validate_action
|
||||
|
||||
mock_read_file.side_effect = lambda p: "HP: 10\nGold: 0" if "character" in str(p).lower() else "## Location\nTavern"
|
||||
mock_truncate_world.return_value = "## Location\nTavern"
|
||||
mock_call_llm.return_value = json.dumps({"valid": False, "reason": "Not enough gold"})
|
||||
|
||||
valid, reason = validate_action("I buy a drink", story="At the tavern", log="- Entered the tavern")
|
||||
|
||||
assert valid is False
|
||||
assert reason == "Not enough gold"
|
||||
print("✓ invalid action returns (False, reason)")
|
||||
|
||||
|
||||
@patch("engine_lib.validation.state.read_file")
|
||||
@patch("engine_lib.validation.state.truncate_world")
|
||||
@patch("engine_lib.validation.call_llm")
|
||||
def test_llm_returns_none(mock_call_llm, mock_truncate_world, mock_read_file):
|
||||
from engine_lib.validation import validate_action
|
||||
|
||||
mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern"
|
||||
mock_truncate_world.return_value = "## Location\nTavern"
|
||||
mock_call_llm.return_value = None
|
||||
|
||||
valid, reason = validate_action("I attack the dragon", story="A dragon appears!", log="- Dragon spotted")
|
||||
|
||||
assert valid is False
|
||||
assert reason == "Not sure"
|
||||
print("✓ LLM returning None gives (False, 'Not sure')")
|
||||
|
||||
|
||||
@patch("engine_lib.validation.state.read_file")
|
||||
@patch("engine_lib.validation.state.truncate_world")
|
||||
@patch("engine_lib.validation.call_llm")
|
||||
def test_llm_returns_bad_json(mock_call_llm, mock_truncate_world, mock_read_file):
|
||||
from engine_lib.validation import validate_action
|
||||
|
||||
mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern"
|
||||
mock_truncate_world.return_value = "## Location\nTavern"
|
||||
mock_call_llm.return_value = "not valid json at all"
|
||||
|
||||
valid, reason = validate_action("I cast a spell", story="In a dungeon", log="- Found a weird altar")
|
||||
|
||||
assert valid is False
|
||||
assert reason == "Unrecognized"
|
||||
print("✓ bad JSON from LLM gives (False, 'Unrecognized')")
|
||||
|
||||
|
||||
@patch("engine_lib.validation.state.read_file")
|
||||
@patch("engine_lib.validation.state.truncate_world")
|
||||
def test_missing_character_sheet(mock_truncate_world, mock_read_file):
|
||||
from engine_lib.validation import validate_action
|
||||
|
||||
mock_read_file.return_value = ""
|
||||
mock_truncate_world.return_value = "*No world state.*"
|
||||
|
||||
with patch("engine_lib.validation.call_llm") as mock_call_llm:
|
||||
mock_call_llm.return_value = json.dumps({"valid": True, "reason": "ok"})
|
||||
valid, reason = validate_action("I look around", story="In a dark room", log="- Entered the room")
|
||||
|
||||
assert valid is True
|
||||
print("✓ handles missing character sheet gracefully")
|
||||
|
||||
|
||||
# ── validate_turn tests ────────────────────────────────────
|
||||
|
||||
|
||||
def test_turn_empty_inputs():
|
||||
"""No action and no narrative should return (True, '', 'ok')."""
|
||||
from engine_lib.validation import validate_turn
|
||||
@ -118,7 +21,6 @@ def test_turn_empty_inputs():
|
||||
|
||||
|
||||
def _mock_read(p: str) -> str:
|
||||
"""Helper for mock_read_file side_effect handling char/world/journal."""
|
||||
low = str(p).lower()
|
||||
if "character" in low:
|
||||
return "HP: 10\nGold: 5\nInventory:\n- Healing Salve"
|
||||
@ -251,12 +153,6 @@ def test_turn_bad_json(mock_call_llm, mock_truncate_world, mock_read_file):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_empty_action()
|
||||
test_valid_action()
|
||||
test_invalid_action()
|
||||
test_llm_returns_none()
|
||||
test_llm_returns_bad_json()
|
||||
test_missing_character_sheet()
|
||||
test_turn_empty_inputs()
|
||||
test_turn_valid()
|
||||
test_turn_reject()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user