Better validations

2026-07-01 23:01:01 +02:00 · 2026-07-01 23:01:01 +02:00 · b67017c02c
commit b67017c02c
parent 6403d46052
6 changed files with 419 additions and 52 deletions
--- a/tools/engine.py
+++ b/tools/engine.py
@ -10,7 +10,7 @@ from pathlib import Path
 from engine_lib.models import TurnResult
 from engine_lib import config
 from engine_lib.context import build_system_prompt
-from engine_lib.validation import validate_action
+from engine_lib.validation import validate_turn
 from engine_lib.tools_handler import execute_tool, describe_change, extract_tool_calls
 from engine_lib.parsing import log_turn_details
 from engine_lib import state
@ -51,62 +51,133 @@ class GameEngine:
        if on_debug:
            on_debug("config", {"model": model, "temperature": lm.get("temperature"), "max_tokens": lm.get("max_tokens"), "strategy": "tools"})
        if player_action:
            valid, reason = validate_action(player_action, story=recent_narrative, log=session_log, on_debug=on_debug)
            if valid:
                state.append_llm_log(f"\n[VALIDATION PASSED] {reason}")
            else:
                state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}")
                return TurnResult(
                    book_log="",
                    log_entry=f"You can't do that — {reason}.",
                    user_prompt=f"*Your action:\n\t\"{player_action}\"\nwas rejected:\n\t{reason}*",
                )
        system = build_system_prompt(recent_narrative=recent_narrative, recent_log=session_log)
-        parts = []
+        base_parts = []
        if player_action:
-            parts.append(f"## Player's Request\n{player_action}")
+            base_parts.append(f"## Player's Request\n{player_action}")
        if not player_action and not recent_narrative:
-            parts.append(
+            base_parts.append(
                "## Instructions\n"
                "This is a new story. Welcome the player and guide them through the game setup."
            )
        else:
-            parts.append(
+            base_parts.append(
                "## Instructions\n"
                "Advance the story based on the player's request. "
                "All state is shown above — write the outcome directly."
            )
-        parts.append(f"\n*A die is cast: **{die_roll}** (1d6).*")
+        base_parts.append(f"\n*A die is cast: **{die_roll}** (1d6).*")
-        user = "\n\n".join(parts)
+        base_user = "\n\n".join(base_parts)
        start_time = datetime.now()
        state.append_llm_log(f"\n[TOOL] Single call — {len(system)} chars system, {len(user)} chars user")
        text = call_llm(
            [{"role": "system", "content": system}, {"role": "user", "content": user}],
            label="Turn generation",
            on_debug=on_debug,
        )
        if not text or not text.strip():
            return TurnResult(error="LLM returned empty response")
        raw = text.strip()
        state.append_llm_log(f"\n[TOOL] got {len(raw)} chars in {(datetime.now() - start_time).total_seconds() * 1000:.1f}ms")
        tool_calls = extract_tool_calls(raw, on_debug=on_debug)
        if not tool_calls:
            state.append_llm_log("\n[TOOL] no tool blocks found")
        MAX_RETRIES = 2
        tool_calls = []
        book_log = ""
        log_entry = None
        user_prompt = ""
        ambience = None
        changes: list[str] = []
        errors: list[str] = []
        changes: list[str] = []
        start_time = datetime.now()
        total_attempts = 0
        for attempt in range(MAX_RETRIES + 1):
            total_attempts = attempt + 1
            user = base_user
            if attempt > 0:
                user += f"\n\n---\n\n## Turn Generation Feedback\n{feedback}"
            state.append_llm_log(f"\n[TOOL] Attempt {attempt + 1}/{MAX_RETRIES + 1} — {len(system)} chars system, {len(user)} chars user")
            text = call_llm(
                [{"role": "system", "content": system}, {"role": "user", "content": user}],
                label="Turn generation",
                on_debug=on_debug,
            )
            if not text or not text.strip():
                if attempt < MAX_RETRIES:
                    feedback = "Your response was empty. Generate a complete turn with narrative and state changes."
                    state.append_llm_log("\n[RETRY] empty response")
                    continue
                return TurnResult(error="LLM returned empty response after retries")
            raw = text.strip()
            state.append_llm_log(f"\n[TOOL] got {len(raw)} chars in {(datetime.now() - start_time).total_seconds() * 1000:.1f}ms")
            tool_calls = extract_tool_calls(raw, on_debug=on_debug)
            if not tool_calls:
                state.append_llm_log("\n[TOOL] no tool blocks found")
            # First pass — extract narrative + identify state changes (don't execute yet)
            book_log = ""
            ambience = None
            log_entry = None
            state_changes: list[dict] = []
            for tc in tool_calls:
                name = tc.get("tool", "")
                args = tc.get("args", {})
                if name == "narrative":
                    text = args.get("text", "")
                    if text:
                        book_log = (book_log + "\n\n" + text) if book_log else text
                elif name == "finalize_turn":
                    if args.get("ambience"):
                        ambience = args["ambience"]
                    if args.get("log_entry"):
                        log_entry = args["log_entry"]
                elif name == "player_roll":
                    pass
                elif name not in ("roll",):
                    state_changes.append(tc)
            # Validate the generated turn
            if player_action and book_log:
                valid, reason, action = validate_turn(
                    player_action,
                    narrative=book_log,
                    log_entry=log_entry or "",
                    changes=state_changes,
                    story=recent_narrative,
                    log=session_log,
                    on_debug=on_debug,
                )
                if on_debug:
                    on_debug("turn_validation", {"valid": valid, "reason": reason, "action": action, "attempt": total_attempts})
                if valid:
                    state.append_llm_log(f"\n[TURN VALID] {reason}")
                elif reason == "Unrecognized":
                    if attempt < MAX_RETRIES:
                        feedback = "The validation system could not process the previous turn. Please regenerate."
                        state.append_llm_log(f"\n[TURN REGENERATE] (unrecognized) attempt {attempt + 2}")
                        continue
                    state.append_llm_log(f"\n[TURN UNRECOGNIZED] cannot validate turn")
                    return TurnResult(
                        book_log="",
                        log_entry="Your action was rejected — cannot validate turn.",
                        user_prompt=f"*Your action:\n\t\"{player_action}\"\nwas rejected:\ncannot validate turn*",
                    )
                elif action == "reject":
                    state.append_llm_log(f"\n[TURN REJECTED] {reason}")
                    return TurnResult(
                        book_log="",
                        log_entry=f"Your action was rejected — {reason}.",
                        user_prompt=f"*Your action:\n\t\"{player_action}\"\nwas rejected:\n\t{reason}*",
                    )
                elif action == "regenerate" and attempt < MAX_RETRIES:
                    feedback = f"The generated turn has issues: {reason}\n\nPlease regenerate the turn addressing this feedback. Keep the same player action but fix the problems described above."
                    state.append_llm_log(f"\n[TURN REGENERATE] attempt {attempt + 2}: {reason}")
                    continue
                else:
                    state.append_llm_log(f"\n[TURN REGENERATE EXCEEDED] accepting despite: {reason}")
            else:
                state.append_llm_log("\n[TURN SKIP VALIDATION] no player action or no narrative")
            # Accept this turn — execute all tool calls
            break
        # Second pass — execute all tool calls
        extr_start = datetime.now()
        for tc in tool_calls:
@ -114,12 +185,9 @@ class GameEngine:
            args = tc.get("args", {})
            if name == "narrative":
-                text = args.get("text", "")
+                pass
                if text:
                    book_log = (book_log + "\n\n" + text) if book_log else text
            elif name == "finalize_turn":
-                if args.get("ambience"):
+                pass
                    ambience = args["ambience"]
            elif name == "player_roll" and on_player_roll:
                dice = args.get("dice", "1d6")
                reason = args.get("reason", "a check")
@ -136,7 +204,7 @@ class GameEngine:
                    if desc:
                        changes.append(desc)
-        if book_log:
+        if not log_entry and book_log:
            clean = re.sub(r'\s+', ' ', book_log).strip()
            sentences = re.split(r'(?<=[.!?])\s+', clean)
            log_entry = sentences[0][:200] if sentences else clean[:200]
@ -153,13 +221,13 @@ class GameEngine:
            on_debug("phase_done", {
                "book_log_chars": len(book_log),
                "log_entry": log_entry,
                "user_prompt": user_prompt,
                "ambience": ambience,
                "extract_errors": errors or None,
                "total_elapsed_ms": total_elapsed,
                "tool_calls_count": len(tool_calls),
                "applied_changes_count": applied,
                "tool_call_results": tool_calls,
                "total_attempts": total_attempts,
            })
        log_turn_details(
@ -180,7 +248,6 @@ class GameEngine:
        return TurnResult(
            book_log=book_log,
            log_entry=log_entry,
            user_prompt=user_prompt,
            ambience=ambience,
            debug_info="; ".join(errors) if errors else "",
            changes=changes,
--- a/tools/engine_lib/prompts.py
+++ b/tools/engine_lib/prompts.py
@ -53,9 +53,11 @@ Wrap each action in its own ```tool block:
 {"tool": "journal_update", "args": {"add": ["Investigate the mine"], "done": ["Defeat the demon"]}}
 ```
 ```tool
-{"tool": "finalize_turn", "args": {"ambience": "dungeon"}}
+{"tool": "finalize_turn", "args": {"ambience": "dungeon", "log_entry": "Dillion explored the dungeon, found a hidden passage, and was ambushed by goblins."}}
 ```
 **log_entry**: Provide a short, dense summary (1-2 sentences) of the turn's main events. This becomes the session log — be specific, factual, and concise.
 You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats they don't have), narrate the failure and do NOT call any state-changing tools.
 **Inventory rule**: If the player wants to use an item, you must first verify it's on their character sheet. If it is, you MUST call `remove_from_inventory` for that item AND apply the effects (e.g. `modify_vitals` for HP potions). If it's not on the sheet, reject the action — do not let them use items they don't have.
--- a/tools/engine_lib/tools_handler.py
+++ b/tools/engine_lib/tools_handler.py
@ -20,7 +20,7 @@ TOOL_REGISTRY: dict[str, dict] = {
    "replace_note": {"description": "Replace note by exact match.", "args": {"before": "exact text", "after": "new text"}},
    "world_update": {"description": "Replace world state.", "args": {"content": "full world markdown"}},
    "journal_update": {"description": "Update TODO/DONE.", "args": {"add": "[...]", "done": "[...]"}},
-    "finalize_turn": {"description": "End turn.", "args": {"ambience": "soundscape name"}},
+    "finalize_turn": {"description": "End turn.", "args": {"ambience": "soundscape name", "log_entry": "one-line summary of what happened"}},
 }
--- a/tools/engine_lib/validation.py
+++ b/tools/engine_lib/validation.py
@ -101,4 +101,152 @@ def validate_action(
    return False, "Unrecognized"
 TURN_VALIDATION_PROMPT = """You are a strict RPG game master validating a generated turn. Check:
 1. **Action Sense**: Did the player's request make sense given the character, inventory, and world state?
 2. **Story Coherence**: Is the story evolution coherent, non-contradictory, and within the game world's logic?
 3. **State Correctness**: Do the planned state changes match the narrative? Are they valid given current state?
 4. **Log Entry**: Does the log entry accurately summarise the narrative in 1-2 short, dense sentences? Should be specific, factual, and immediately readable.
 ## Character (before changes)
 {character}
 ## World
 {world}
 ## Recent Story
 {story}
 ## Session Log
 {log}
 ## Player Action
 {action}
 ## Generated Narrative
 {narrative}
 ## Proposed Log Entry
 {log_entry}
 ## Planned State Changes
 {changes}
 ## Instructions
 Check all criteria. **Completeness** is critical — scan the narrative for every event that should change state and verify it has a corresponding tool call:
 - **Item used** → must have `remove_from_inventory`
 - **Item acquired** → must have `add_to_inventory` or `replace_gear`
 - **HP changed** → must have `modify_vitals`
 - **Cash changed** → must have `modify_vitals`
 - **World changed** → must have `world_update`
 - **NPC/location/thread changes** → must have `world_update` or `add_note`
 Missing tool calls = regenerate. Also check that:
 - Items removed were actually in inventory
 - Items added are reasonable and don't duplicate existing items
 - HP/cash changes follow logically from the narrative
 - No impossible modifications
 For log entry: must be a tight summary of the narrative's key events — specific entities, actions, outcomes. Vague, rambling, or mismatched log entries should be flagged for regenerate.
 Reply with ONLY a JSON object using one of these formats:
 Valid:
 ```json
 {{"valid": true, "reason": "ok", "action": "ok"}}
 ```
 Reject (player action itself was impossible or nonsensical):
 ```json
 {{"valid": false, "reason": "explain why the action is impossible", "action": "reject"}}
 ```
 Regenerate (turn had fixable issues like wrong state changes or minor inconsistencies):
 ```json
 {{"valid": false, "reason": "describe what the LLM should fix", "action": "regenerate"}}
 ```
 """
 def _format_changes(changes: list[dict]) -> str:
    """Format tool calls into a readable change list for the validation prompt."""
    if not changes:
        return "*No state changes planned.*"
    lines = []
    for tc in changes:
        tool = tc.get("tool", "?")
        args = {k: v for k, v in tc.get("args", {}).items() if v is not None}
        parts = ", ".join(f"{k}={v}" for k, v in args.items())
        lines.append(f"- {tool}: {parts}" if parts else f"- {tool}")
    return "\n".join(lines)
 def validate_turn(
    player_action: str,
    *,
    narrative: str = "",
    log_entry: str = "",
    changes: list[dict] | None = None,
    story: str = "",
    log: str = "",
    on_debug: callable = None,
 ) -> tuple[bool, str, str]:
    """Validate a complete generated turn.
    Returns (valid, reason, action) where action is "ok", "reject", or "regenerate".
    """
    if not player_action and not narrative:
        return True, "", "ok"
    char = state.read_file(CHAR_PATH) or "*No character sheet.*"
    world = state.truncate_world(state.read_file(WORLD_PATH) or "") or "*No world state.*"
    recent = story.strip() or state.read_recent_book() or "*No prior story.*"
    log_entries = log.strip() or state.read_recent_log() or "*No recent events.*"
    change_summary = _format_changes(changes or [])
    prompt = TURN_VALIDATION_PROMPT.format(
        character=char, world=world, story=recent,
        log=log_entries, action=player_action,
        narrative=narrative, log_entry=log_entry or "*No log entry provided.*",
        changes=change_summary,
    )
    messages = [{"role": "user", "content": prompt}]
    for attempt in range(2):
        text = call_llm(
            messages,
            max_tokens=1024,
            temperature=0.2,
            label="Turn validation",
            on_debug=on_debug,
        )
        if not text:
            return False, "Not sure", "reject"
        cleaned = text.strip()
        m = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
        if m:
            cleaned = m.group(1).strip()
        try:
            data = json.loads(cleaned)
            valid = data.get("valid", True)
            reason = data.get("reason", "")
            action = data.get("action", "ok")
            if action not in ("ok", "reject", "regenerate"):
                action = "ok" if valid else "reject"
            if on_debug:
                on_debug("turn_validation", {"valid": valid, "reason": reason, "action": action})
            return valid, reason, action
        except (json.JSONDecodeError, ValueError):
            if on_debug:
                on_debug("turn_validation", {"valid": True, "reason": "parse_failed", "raw": text[:200]})
            if attempt == 0:
                messages.append({
                    "role": "system",
                    "content": "Your previous response was not valid JSON. Reply with ONLY a JSON object:\n\n```json\n{\"valid\": true, \"reason\": \"ok\", \"action\": \"ok\"}\n```\nor\n```json\n{\"valid\": false, \"reason\": \"...\", \"action\": \"reject\"}\n```\nor\n```json\n{\"valid\": false, \"reason\": \"...\", \"action\": \"regenerate\"}\n```"
                })
    return False, "Unrecognized", "reject"
--- a/tools/test_runtime.py
+++ b/tools/test_runtime.py
@ -30,7 +30,7 @@ def test_engine_import():
        ('engine_lib.state', ['read_file', 'apply_state', 'append_log', 'append_llm_log', 'next_turn_number']),
        ('engine_lib.tools_handler', ['execute_tool', 'extract_tool_calls', 'TOOL_REGISTRY']),
        ('engine_lib.llm', ['call_llm']),
-        ('engine_lib.validation', ['validate_action']),
+        ('engine_lib.validation', ['validate_action', 'validate_turn']),
        ('engine_lib.parsing', ['log_turn_details']),
        ('engine', ['GameEngine']),
    ]
--- a/tools/test_validation.py
+++ b/tools/test_validation.py
@ -127,6 +127,150 @@ def test_on_debug_called(mock_call_llm, mock_truncate_world, mock_read_file):
    print("✓ on_debug callback receives action_validation event")
 # ── validate_turn tests ────────────────────────────────────
 def test_turn_empty_inputs():
    """No action and no narrative should return (True, '', 'ok')."""
    from engine_lib.validation import validate_turn
    valid, reason, action = validate_turn("")
    assert valid is True
    assert reason == ""
    assert action == "ok"
    print("✓ empty inputs returns (True, '', 'ok')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
 def test_turn_valid(mock_call_llm, mock_truncate_world, mock_read_file):
    from engine_lib.validation import validate_turn
    mock_read_file.side_effect = lambda p: "HP: 10\nGold: 5\nInventory:\n- Healing Salve" if "character" in str(p).lower() else "## Location\nTavern"
    mock_truncate_world.return_value = "## Location\nTavern"
    mock_call_llm.return_value = '{"valid": true, "reason": "ok", "action": "ok"}'
    valid, reason, action = validate_turn(
        "I use my healing salve",
        narrative="Dillion applies the salve to his wound.",
        log_entry="Dillion used his healing salve to restore 2 HP.",
        changes=[{"tool": "remove_from_inventory", "args": {"item": "Healing Salve"}},
                  {"tool": "modify_vitals", "args": {"current_hp": 8}}],
        story="At the tavern",
        log="- Entered the tavern",
    )
    assert valid is True
    assert reason == "ok"
    assert action == "ok"
    print("✓ turn validation returns (True, 'ok', 'ok')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
 def test_turn_reject(mock_call_llm, mock_truncate_world, mock_read_file):
    from engine_lib.validation import validate_turn
    mock_read_file.side_effect = lambda p: "HP: 10\nGold: 0" if "character" in str(p).lower() else "## Location\nTavern"
    mock_truncate_world.return_value = "## Location\nTavern"
    mock_call_llm.return_value = '{"valid": false, "reason": "Player has no gold", "action": "reject"}'
    valid, reason, action = validate_turn(
        "I buy a round for the house",
        narrative="Dillion orders drinks for everyone.",
        log_entry="Dillion bought a round at the tavern.",
        changes=[{"tool": "modify_vitals", "args": {"cash": 0}}],
        story="At the tavern",
        log="- Entered the tavern",
    )
    assert valid is False
    assert reason == "Player has no gold"
    assert action == "reject"
    print("✓ turn validation returns (False, reason, 'reject')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
 def test_turn_regenerate(mock_call_llm, mock_truncate_world, mock_read_file):
    from engine_lib.validation import validate_turn
    mock_read_file.side_effect = lambda p: "HP: 10\nInventory:\n- Healing Salve" if "character" in str(p).lower() else "## Location\nTavern"
    mock_truncate_world.return_value = "## Location\nTavern"
    mock_call_llm.return_value = '{"valid": false, "reason": "Narrative says salve used but no remove_from_inventory", "action": "regenerate"}'
    valid, reason, action = validate_turn(
        "I use my healing salve",
        narrative="Dillion applies the salve to his wound.",
        log_entry="Dillion used his healing salve.",
        changes=[{"tool": "modify_vitals", "args": {"current_hp": 8}}],
        story="At the tavern",
        log="- Entered the tavern",
    )
    assert valid is False
    assert reason == "Narrative says salve used but no remove_from_inventory"
    assert action == "regenerate"
    print("✓ turn validation returns (False, reason, 'regenerate')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
 def test_turn_bad_json(mock_call_llm, mock_truncate_world, mock_read_file):
    from engine_lib.validation import validate_turn
    mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern"
    mock_truncate_world.return_value = "## Location\nTavern"
    mock_call_llm.return_value = "not valid json"
    valid, reason, action = validate_turn(
        "I attack the dragon",
        narrative="Dillion swings his sword.",
        log_entry="Dillion attacked the dragon.",
        changes=[{"tool": "roll", "args": {"dice": "1d6"}}],
        story="A dragon appears!",
        log="- Dragon spotted",
    )
    assert valid is False
    assert reason == "Unrecognized"
    assert action == "reject"
    print("✓ turn validation bad JSON gives (False, 'Unrecognized', 'reject')")
@patch("engine_lib.validation.state.read_file")
@patch("engine_lib.validation.state.truncate_world")
@patch("engine_lib.validation.call_llm")
 def test_turn_on_debug(mock_call_llm, mock_truncate_world, mock_read_file):
    from engine_lib.validation import validate_turn
    mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern"
    mock_truncate_world.return_value = "## Location\nTavern"
    mock_call_llm.return_value = '{"valid": true, "reason": "ok", "action": "ok"}'
    events = []
    def debug_cb(key, data):
        events.append((key, data))
    valid, reason, action = validate_turn(
        "I open the door",
        narrative="Dillion opens the door.",
        log_entry="Dillion opened the door and entered the hall.",
        story="In a hallway",
        log="- Heard noises",
        on_debug=debug_cb,
    )
    assert valid is True
    assert len(events) == 1
    assert events[0][0] == "turn_validation"
    assert events[0][1]["valid"] is True
    print("✓ on_debug callback receives turn_validation event")
 if __name__ == "__main__":
    test_empty_action()
    test_valid_action()
@ -135,4 +279,10 @@ if __name__ == "__main__":
    test_llm_returns_bad_json()
    test_missing_character_sheet()
    test_on_debug_called()
    test_turn_empty_inputs()
    test_turn_valid()
    test_turn_reject()
    test_turn_regenerate()
    test_turn_bad_json()
    test_turn_on_debug()
    print("\n✓ All validation tests passed")