Better validations

This commit is contained in:
Dejvino 2026-07-01 23:01:01 +02:00
parent 6403d46052
commit b67017c02c
6 changed files with 419 additions and 52 deletions

View File

@ -10,7 +10,7 @@ from pathlib import Path
from engine_lib.models import TurnResult from engine_lib.models import TurnResult
from engine_lib import config from engine_lib import config
from engine_lib.context import build_system_prompt from engine_lib.context import build_system_prompt
from engine_lib.validation import validate_action from engine_lib.validation import validate_turn
from engine_lib.tools_handler import execute_tool, describe_change, extract_tool_calls from engine_lib.tools_handler import execute_tool, describe_change, extract_tool_calls
from engine_lib.parsing import log_turn_details from engine_lib.parsing import log_turn_details
from engine_lib import state from engine_lib import state
@ -51,62 +51,133 @@ class GameEngine:
if on_debug: if on_debug:
on_debug("config", {"model": model, "temperature": lm.get("temperature"), "max_tokens": lm.get("max_tokens"), "strategy": "tools"}) on_debug("config", {"model": model, "temperature": lm.get("temperature"), "max_tokens": lm.get("max_tokens"), "strategy": "tools"})
if player_action:
valid, reason = validate_action(player_action, story=recent_narrative, log=session_log, on_debug=on_debug)
if valid:
state.append_llm_log(f"\n[VALIDATION PASSED] {reason}")
else:
state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}")
return TurnResult(
book_log="",
log_entry=f"You can't do that — {reason}.",
user_prompt=f"*Your action:\n\t\"{player_action}\"\nwas rejected:\n\t{reason}*",
)
system = build_system_prompt(recent_narrative=recent_narrative, recent_log=session_log) system = build_system_prompt(recent_narrative=recent_narrative, recent_log=session_log)
parts = [] base_parts = []
if player_action: if player_action:
parts.append(f"## Player's Request\n{player_action}") base_parts.append(f"## Player's Request\n{player_action}")
if not player_action and not recent_narrative: if not player_action and not recent_narrative:
parts.append( base_parts.append(
"## Instructions\n" "## Instructions\n"
"This is a new story. Welcome the player and guide them through the game setup." "This is a new story. Welcome the player and guide them through the game setup."
) )
else: else:
parts.append( base_parts.append(
"## Instructions\n" "## Instructions\n"
"Advance the story based on the player's request. " "Advance the story based on the player's request. "
"All state is shown above — write the outcome directly." "All state is shown above — write the outcome directly."
) )
parts.append(f"\n*A die is cast: **{die_roll}** (1d6).*") base_parts.append(f"\n*A die is cast: **{die_roll}** (1d6).*")
user = "\n\n".join(parts) base_user = "\n\n".join(base_parts)
start_time = datetime.now()
state.append_llm_log(f"\n[TOOL] Single call — {len(system)} chars system, {len(user)} chars user")
text = call_llm(
[{"role": "system", "content": system}, {"role": "user", "content": user}],
label="Turn generation",
on_debug=on_debug,
)
if not text or not text.strip():
return TurnResult(error="LLM returned empty response")
raw = text.strip()
state.append_llm_log(f"\n[TOOL] got {len(raw)} chars in {(datetime.now() - start_time).total_seconds() * 1000:.1f}ms")
tool_calls = extract_tool_calls(raw, on_debug=on_debug)
if not tool_calls:
state.append_llm_log("\n[TOOL] no tool blocks found")
MAX_RETRIES = 2
tool_calls = []
book_log = "" book_log = ""
log_entry = None
user_prompt = ""
ambience = None ambience = None
changes: list[str] = []
errors: list[str] = [] errors: list[str] = []
changes: list[str] = []
start_time = datetime.now()
total_attempts = 0
for attempt in range(MAX_RETRIES + 1):
total_attempts = attempt + 1
user = base_user
if attempt > 0:
user += f"\n\n---\n\n## Turn Generation Feedback\n{feedback}"
state.append_llm_log(f"\n[TOOL] Attempt {attempt + 1}/{MAX_RETRIES + 1}{len(system)} chars system, {len(user)} chars user")
text = call_llm(
[{"role": "system", "content": system}, {"role": "user", "content": user}],
label="Turn generation",
on_debug=on_debug,
)
if not text or not text.strip():
if attempt < MAX_RETRIES:
feedback = "Your response was empty. Generate a complete turn with narrative and state changes."
state.append_llm_log("\n[RETRY] empty response")
continue
return TurnResult(error="LLM returned empty response after retries")
raw = text.strip()
state.append_llm_log(f"\n[TOOL] got {len(raw)} chars in {(datetime.now() - start_time).total_seconds() * 1000:.1f}ms")
tool_calls = extract_tool_calls(raw, on_debug=on_debug)
if not tool_calls:
state.append_llm_log("\n[TOOL] no tool blocks found")
# First pass — extract narrative + identify state changes (don't execute yet)
book_log = ""
ambience = None
log_entry = None
state_changes: list[dict] = []
for tc in tool_calls:
name = tc.get("tool", "")
args = tc.get("args", {})
if name == "narrative":
text = args.get("text", "")
if text:
book_log = (book_log + "\n\n" + text) if book_log else text
elif name == "finalize_turn":
if args.get("ambience"):
ambience = args["ambience"]
if args.get("log_entry"):
log_entry = args["log_entry"]
elif name == "player_roll":
pass
elif name not in ("roll",):
state_changes.append(tc)
# Validate the generated turn
if player_action and book_log:
valid, reason, action = validate_turn(
player_action,
narrative=book_log,
log_entry=log_entry or "",
changes=state_changes,
story=recent_narrative,
log=session_log,
on_debug=on_debug,
)
if on_debug:
on_debug("turn_validation", {"valid": valid, "reason": reason, "action": action, "attempt": total_attempts})
if valid:
state.append_llm_log(f"\n[TURN VALID] {reason}")
elif reason == "Unrecognized":
if attempt < MAX_RETRIES:
feedback = "The validation system could not process the previous turn. Please regenerate."
state.append_llm_log(f"\n[TURN REGENERATE] (unrecognized) attempt {attempt + 2}")
continue
state.append_llm_log(f"\n[TURN UNRECOGNIZED] cannot validate turn")
return TurnResult(
book_log="",
log_entry="Your action was rejected — cannot validate turn.",
user_prompt=f"*Your action:\n\t\"{player_action}\"\nwas rejected:\ncannot validate turn*",
)
elif action == "reject":
state.append_llm_log(f"\n[TURN REJECTED] {reason}")
return TurnResult(
book_log="",
log_entry=f"Your action was rejected — {reason}.",
user_prompt=f"*Your action:\n\t\"{player_action}\"\nwas rejected:\n\t{reason}*",
)
elif action == "regenerate" and attempt < MAX_RETRIES:
feedback = f"The generated turn has issues: {reason}\n\nPlease regenerate the turn addressing this feedback. Keep the same player action but fix the problems described above."
state.append_llm_log(f"\n[TURN REGENERATE] attempt {attempt + 2}: {reason}")
continue
else:
state.append_llm_log(f"\n[TURN REGENERATE EXCEEDED] accepting despite: {reason}")
else:
state.append_llm_log("\n[TURN SKIP VALIDATION] no player action or no narrative")
# Accept this turn — execute all tool calls
break
# Second pass — execute all tool calls
extr_start = datetime.now() extr_start = datetime.now()
for tc in tool_calls: for tc in tool_calls:
@ -114,12 +185,9 @@ class GameEngine:
args = tc.get("args", {}) args = tc.get("args", {})
if name == "narrative": if name == "narrative":
text = args.get("text", "") pass
if text:
book_log = (book_log + "\n\n" + text) if book_log else text
elif name == "finalize_turn": elif name == "finalize_turn":
if args.get("ambience"): pass
ambience = args["ambience"]
elif name == "player_roll" and on_player_roll: elif name == "player_roll" and on_player_roll:
dice = args.get("dice", "1d6") dice = args.get("dice", "1d6")
reason = args.get("reason", "a check") reason = args.get("reason", "a check")
@ -136,7 +204,7 @@ class GameEngine:
if desc: if desc:
changes.append(desc) changes.append(desc)
if book_log: if not log_entry and book_log:
clean = re.sub(r'\s+', ' ', book_log).strip() clean = re.sub(r'\s+', ' ', book_log).strip()
sentences = re.split(r'(?<=[.!?])\s+', clean) sentences = re.split(r'(?<=[.!?])\s+', clean)
log_entry = sentences[0][:200] if sentences else clean[:200] log_entry = sentences[0][:200] if sentences else clean[:200]
@ -153,13 +221,13 @@ class GameEngine:
on_debug("phase_done", { on_debug("phase_done", {
"book_log_chars": len(book_log), "book_log_chars": len(book_log),
"log_entry": log_entry, "log_entry": log_entry,
"user_prompt": user_prompt,
"ambience": ambience, "ambience": ambience,
"extract_errors": errors or None, "extract_errors": errors or None,
"total_elapsed_ms": total_elapsed, "total_elapsed_ms": total_elapsed,
"tool_calls_count": len(tool_calls), "tool_calls_count": len(tool_calls),
"applied_changes_count": applied, "applied_changes_count": applied,
"tool_call_results": tool_calls, "tool_call_results": tool_calls,
"total_attempts": total_attempts,
}) })
log_turn_details( log_turn_details(
@ -180,7 +248,6 @@ class GameEngine:
return TurnResult( return TurnResult(
book_log=book_log, book_log=book_log,
log_entry=log_entry, log_entry=log_entry,
user_prompt=user_prompt,
ambience=ambience, ambience=ambience,
debug_info="; ".join(errors) if errors else "", debug_info="; ".join(errors) if errors else "",
changes=changes, changes=changes,

View File

@ -53,9 +53,11 @@ Wrap each action in its own ```tool block:
{"tool": "journal_update", "args": {"add": ["Investigate the mine"], "done": ["Defeat the demon"]}} {"tool": "journal_update", "args": {"add": ["Investigate the mine"], "done": ["Defeat the demon"]}}
``` ```
```tool ```tool
{"tool": "finalize_turn", "args": {"ambience": "dungeon"}} {"tool": "finalize_turn", "args": {"ambience": "dungeon", "log_entry": "Dillion explored the dungeon, found a hidden passage, and was ambushed by goblins."}}
``` ```
**log_entry**: Provide a short, dense summary (1-2 sentences) of the turn's main events. This becomes the session log — be specific, factual, and concise.
You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats they don't have), narrate the failure and do NOT call any state-changing tools. You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats they don't have), narrate the failure and do NOT call any state-changing tools.
**Inventory rule**: If the player wants to use an item, you must first verify it's on their character sheet. If it is, you MUST call `remove_from_inventory` for that item AND apply the effects (e.g. `modify_vitals` for HP potions). If it's not on the sheet, reject the action do not let them use items they don't have. **Inventory rule**: If the player wants to use an item, you must first verify it's on their character sheet. If it is, you MUST call `remove_from_inventory` for that item AND apply the effects (e.g. `modify_vitals` for HP potions). If it's not on the sheet, reject the action do not let them use items they don't have.

View File

@ -20,7 +20,7 @@ TOOL_REGISTRY: dict[str, dict] = {
"replace_note": {"description": "Replace note by exact match.", "args": {"before": "exact text", "after": "new text"}}, "replace_note": {"description": "Replace note by exact match.", "args": {"before": "exact text", "after": "new text"}},
"world_update": {"description": "Replace world state.", "args": {"content": "full world markdown"}}, "world_update": {"description": "Replace world state.", "args": {"content": "full world markdown"}},
"journal_update": {"description": "Update TODO/DONE.", "args": {"add": "[...]", "done": "[...]"}}, "journal_update": {"description": "Update TODO/DONE.", "args": {"add": "[...]", "done": "[...]"}},
"finalize_turn": {"description": "End turn.", "args": {"ambience": "soundscape name"}}, "finalize_turn": {"description": "End turn.", "args": {"ambience": "soundscape name", "log_entry": "one-line summary of what happened"}},
} }

View File

@ -101,4 +101,152 @@ def validate_action(
return False, "Unrecognized" return False, "Unrecognized"
TURN_VALIDATION_PROMPT = """You are a strict RPG game master validating a generated turn. Check:
1. **Action Sense**: Did the player's request make sense given the character, inventory, and world state?
2. **Story Coherence**: Is the story evolution coherent, non-contradictory, and within the game world's logic?
3. **State Correctness**: Do the planned state changes match the narrative? Are they valid given current state?
4. **Log Entry**: Does the log entry accurately summarise the narrative in 1-2 short, dense sentences? Should be specific, factual, and immediately readable.
## Character (before changes)
{character}
## World
{world}
## Recent Story
{story}
## Session Log
{log}
## Player Action
{action}
## Generated Narrative
{narrative}
## Proposed Log Entry
{log_entry}
## Planned State Changes
{changes}
## Instructions
Check all criteria. **Completeness** is critical scan the narrative for every event that should change state and verify it has a corresponding tool call:
- **Item used** must have `remove_from_inventory`
- **Item acquired** must have `add_to_inventory` or `replace_gear`
- **HP changed** must have `modify_vitals`
- **Cash changed** must have `modify_vitals`
- **World changed** must have `world_update`
- **NPC/location/thread changes** must have `world_update` or `add_note`
Missing tool calls = regenerate. Also check that:
- Items removed were actually in inventory
- Items added are reasonable and don't duplicate existing items
- HP/cash changes follow logically from the narrative
- No impossible modifications
For log entry: must be a tight summary of the narrative's key events — specific entities, actions, outcomes. Vague, rambling, or mismatched log entries should be flagged for regenerate.
Reply with ONLY a JSON object using one of these formats:
Valid:
```json
{{"valid": true, "reason": "ok", "action": "ok"}}
```
Reject (player action itself was impossible or nonsensical):
```json
{{"valid": false, "reason": "explain why the action is impossible", "action": "reject"}}
```
Regenerate (turn had fixable issues like wrong state changes or minor inconsistencies):
```json
{{"valid": false, "reason": "describe what the LLM should fix", "action": "regenerate"}}
```
"""
def _format_changes(changes: list[dict]) -> str:
"""Format tool calls into a readable change list for the validation prompt."""
if not changes:
return "*No state changes planned.*"
lines = []
for tc in changes:
tool = tc.get("tool", "?")
args = {k: v for k, v in tc.get("args", {}).items() if v is not None}
parts = ", ".join(f"{k}={v}" for k, v in args.items())
lines.append(f"- {tool}: {parts}" if parts else f"- {tool}")
return "\n".join(lines)
def validate_turn(
player_action: str,
*,
narrative: str = "",
log_entry: str = "",
changes: list[dict] | None = None,
story: str = "",
log: str = "",
on_debug: callable = None,
) -> tuple[bool, str, str]:
"""Validate a complete generated turn.
Returns (valid, reason, action) where action is "ok", "reject", or "regenerate".
"""
if not player_action and not narrative:
return True, "", "ok"
char = state.read_file(CHAR_PATH) or "*No character sheet.*"
world = state.truncate_world(state.read_file(WORLD_PATH) or "") or "*No world state.*"
recent = story.strip() or state.read_recent_book() or "*No prior story.*"
log_entries = log.strip() or state.read_recent_log() or "*No recent events.*"
change_summary = _format_changes(changes or [])
prompt = TURN_VALIDATION_PROMPT.format(
character=char, world=world, story=recent,
log=log_entries, action=player_action,
narrative=narrative, log_entry=log_entry or "*No log entry provided.*",
changes=change_summary,
)
messages = [{"role": "user", "content": prompt}]
for attempt in range(2):
text = call_llm(
messages,
max_tokens=1024,
temperature=0.2,
label="Turn validation",
on_debug=on_debug,
)
if not text:
return False, "Not sure", "reject"
cleaned = text.strip()
m = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
if m:
cleaned = m.group(1).strip()
try:
data = json.loads(cleaned)
valid = data.get("valid", True)
reason = data.get("reason", "")
action = data.get("action", "ok")
if action not in ("ok", "reject", "regenerate"):
action = "ok" if valid else "reject"
if on_debug:
on_debug("turn_validation", {"valid": valid, "reason": reason, "action": action})
return valid, reason, action
except (json.JSONDecodeError, ValueError):
if on_debug:
on_debug("turn_validation", {"valid": True, "reason": "parse_failed", "raw": text[:200]})
if attempt == 0:
messages.append({
"role": "system",
"content": "Your previous response was not valid JSON. Reply with ONLY a JSON object:\n\n```json\n{\"valid\": true, \"reason\": \"ok\", \"action\": \"ok\"}\n```\nor\n```json\n{\"valid\": false, \"reason\": \"...\", \"action\": \"reject\"}\n```\nor\n```json\n{\"valid\": false, \"reason\": \"...\", \"action\": \"regenerate\"}\n```"
})
return False, "Unrecognized", "reject"

View File

@ -30,7 +30,7 @@ def test_engine_import():
('engine_lib.state', ['read_file', 'apply_state', 'append_log', 'append_llm_log', 'next_turn_number']), ('engine_lib.state', ['read_file', 'apply_state', 'append_log', 'append_llm_log', 'next_turn_number']),
('engine_lib.tools_handler', ['execute_tool', 'extract_tool_calls', 'TOOL_REGISTRY']), ('engine_lib.tools_handler', ['execute_tool', 'extract_tool_calls', 'TOOL_REGISTRY']),
('engine_lib.llm', ['call_llm']), ('engine_lib.llm', ['call_llm']),
('engine_lib.validation', ['validate_action']), ('engine_lib.validation', ['validate_action', 'validate_turn']),
('engine_lib.parsing', ['log_turn_details']), ('engine_lib.parsing', ['log_turn_details']),
('engine', ['GameEngine']), ('engine', ['GameEngine']),
] ]

View File

@ -127,6 +127,150 @@ def test_on_debug_called(mock_call_llm, mock_truncate_world, mock_read_file):
print("✓ on_debug callback receives action_validation event") print("✓ on_debug callback receives action_validation event")
# ── validate_turn tests ────────────────────────────────────
def test_turn_empty_inputs():
"""No action and no narrative should return (True, '', 'ok')."""
from engine_lib.validation import validate_turn
valid, reason, action = validate_turn("")
assert valid is True
assert reason == ""
assert action == "ok"
print("✓ empty inputs returns (True, '', 'ok')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
def test_turn_valid(mock_call_llm, mock_truncate_world, mock_read_file):
from engine_lib.validation import validate_turn
mock_read_file.side_effect = lambda p: "HP: 10\nGold: 5\nInventory:\n- Healing Salve" if "character" in str(p).lower() else "## Location\nTavern"
mock_truncate_world.return_value = "## Location\nTavern"
mock_call_llm.return_value = '{"valid": true, "reason": "ok", "action": "ok"}'
valid, reason, action = validate_turn(
"I use my healing salve",
narrative="Dillion applies the salve to his wound.",
log_entry="Dillion used his healing salve to restore 2 HP.",
changes=[{"tool": "remove_from_inventory", "args": {"item": "Healing Salve"}},
{"tool": "modify_vitals", "args": {"current_hp": 8}}],
story="At the tavern",
log="- Entered the tavern",
)
assert valid is True
assert reason == "ok"
assert action == "ok"
print("✓ turn validation returns (True, 'ok', 'ok')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
def test_turn_reject(mock_call_llm, mock_truncate_world, mock_read_file):
from engine_lib.validation import validate_turn
mock_read_file.side_effect = lambda p: "HP: 10\nGold: 0" if "character" in str(p).lower() else "## Location\nTavern"
mock_truncate_world.return_value = "## Location\nTavern"
mock_call_llm.return_value = '{"valid": false, "reason": "Player has no gold", "action": "reject"}'
valid, reason, action = validate_turn(
"I buy a round for the house",
narrative="Dillion orders drinks for everyone.",
log_entry="Dillion bought a round at the tavern.",
changes=[{"tool": "modify_vitals", "args": {"cash": 0}}],
story="At the tavern",
log="- Entered the tavern",
)
assert valid is False
assert reason == "Player has no gold"
assert action == "reject"
print("✓ turn validation returns (False, reason, 'reject')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
def test_turn_regenerate(mock_call_llm, mock_truncate_world, mock_read_file):
from engine_lib.validation import validate_turn
mock_read_file.side_effect = lambda p: "HP: 10\nInventory:\n- Healing Salve" if "character" in str(p).lower() else "## Location\nTavern"
mock_truncate_world.return_value = "## Location\nTavern"
mock_call_llm.return_value = '{"valid": false, "reason": "Narrative says salve used but no remove_from_inventory", "action": "regenerate"}'
valid, reason, action = validate_turn(
"I use my healing salve",
narrative="Dillion applies the salve to his wound.",
log_entry="Dillion used his healing salve.",
changes=[{"tool": "modify_vitals", "args": {"current_hp": 8}}],
story="At the tavern",
log="- Entered the tavern",
)
assert valid is False
assert reason == "Narrative says salve used but no remove_from_inventory"
assert action == "regenerate"
print("✓ turn validation returns (False, reason, 'regenerate')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
def test_turn_bad_json(mock_call_llm, mock_truncate_world, mock_read_file):
from engine_lib.validation import validate_turn
mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern"
mock_truncate_world.return_value = "## Location\nTavern"
mock_call_llm.return_value = "not valid json"
valid, reason, action = validate_turn(
"I attack the dragon",
narrative="Dillion swings his sword.",
log_entry="Dillion attacked the dragon.",
changes=[{"tool": "roll", "args": {"dice": "1d6"}}],
story="A dragon appears!",
log="- Dragon spotted",
)
assert valid is False
assert reason == "Unrecognized"
assert action == "reject"
print("✓ turn validation bad JSON gives (False, 'Unrecognized', 'reject')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
def test_turn_on_debug(mock_call_llm, mock_truncate_world, mock_read_file):
from engine_lib.validation import validate_turn
mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern"
mock_truncate_world.return_value = "## Location\nTavern"
mock_call_llm.return_value = '{"valid": true, "reason": "ok", "action": "ok"}'
events = []
def debug_cb(key, data):
events.append((key, data))
valid, reason, action = validate_turn(
"I open the door",
narrative="Dillion opens the door.",
log_entry="Dillion opened the door and entered the hall.",
story="In a hallway",
log="- Heard noises",
on_debug=debug_cb,
)
assert valid is True
assert len(events) == 1
assert events[0][0] == "turn_validation"
assert events[0][1]["valid"] is True
print("✓ on_debug callback receives turn_validation event")
if __name__ == "__main__": if __name__ == "__main__":
test_empty_action() test_empty_action()
test_valid_action() test_valid_action()
@ -135,4 +279,10 @@ if __name__ == "__main__":
test_llm_returns_bad_json() test_llm_returns_bad_json()
test_missing_character_sheet() test_missing_character_sheet()
test_on_debug_called() test_on_debug_called()
test_turn_empty_inputs()
test_turn_valid()
test_turn_reject()
test_turn_regenerate()
test_turn_bad_json()
test_turn_on_debug()
print("\n✓ All validation tests passed") print("\n✓ All validation tests passed")