diff --git a/tools/engine.py b/tools/engine.py index 93e0a83..9d332be 100644 --- a/tools/engine.py +++ b/tools/engine.py @@ -52,10 +52,12 @@ class GameEngine: if player_action: valid, reason = validate_action(player_action, on_debug=on_debug) - if not valid: + if valid: + state.append_llm_log(f"\n[VALIDATION PASSED] {reason}") + else: state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}") return TurnResult( - book_log=f"You can't do that — {reason}.", + book_log=f"", log_entry=f"You can't do that — {reason}.", user_prompt=auto_prompt(""), ) diff --git a/tools/engine_lib/validation.py b/tools/engine_lib/validation.py index b1fe975..1d35e14 100644 --- a/tools/engine_lib/validation.py +++ b/tools/engine_lib/validation.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re from .llm import call_llm from .paths import CHAR_PATH, WORLD_PATH @@ -9,11 +10,6 @@ from . import state VALIDATION_PROMPT = """You are a strict RPG game master validating whether a player's action is possible given the game state. Be thorough — check inventory, stats, location, NPCs, and story logic. -Respond with JSON only: -{{"valid": true, "reason": "ok"}} -or -{{"valid": false, "reason": "brief explanation of why the action is impossible"}} - ## Character {character} @@ -30,7 +26,15 @@ or - Does the action make sense given the character's abilities and resources? -> valid - If valid, also check: if they're using a consumable item, note that it must be removed from inventory. -Reply with ONLY the JSON object.""" +Reply with ONLY the JSON object. Examples: +``` +{{"valid": true, "reason": "ok"}} +``` +or +``` +{{"valid": false, "reason": "brief explanation of why the action is impossible"}} +``` +""" def validate_action( @@ -48,17 +52,21 @@ def validate_action( text = call_llm( [{"role": "user", "content": prompt}], - max_tokens=256, + max_tokens=512, temperature=0.2, label="Action validation", on_debug=on_debug, ) if not text: - return True, "" + return False, "Not sure" + cleaned = text.strip() + m = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL) + if m: + cleaned = m.group(1).strip() try: - data = json.loads(text.strip()) + data = json.loads(cleaned) valid = data.get("valid", True) reason = data.get("reason", "") if on_debug: @@ -67,7 +75,7 @@ def validate_action( except (json.JSONDecodeError, ValueError): if on_debug: on_debug("action_validation", {"valid": True, "reason": "parse_failed", "raw": text[:200]}) - return True, "" + return False, "Unrecognized" def auto_prompt(book_log: str = "") -> str: diff --git a/tools/test_llm_turn.py b/tools/test_llm_turn.py new file mode 100644 index 0000000..25780d4 --- /dev/null +++ b/tools/test_llm_turn.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +"""End-to-end turn generation tests using the real configured LLM. + +Tests that generate_turn handles real LLM responses correctly with +the actual character sheet and world state. Requires a running LLM. + +Usage: + python3 tools/test_llm_turn.py +""" + +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from engine import GameEngine + +PASS = 0 +FAIL = 0 +engine = GameEngine() + + +def check(label: str, result, *, expect_error=False, expect_book=True, expect_prompt=True, expect_log=None): + global PASS, FAIL + ok = True + details = [] + + if expect_error and not result.error: + ok = False + details.append("expected error but got none") + elif not expect_error and result.error: + ok = False + details.append(f"unexpected error: {result.error}") + + if expect_book and not result.book_log: + ok = False + details.append("expected non-empty book_log") + + if expect_prompt and not result.user_prompt: + ok = False + details.append("expected non-empty user_prompt") + + if expect_log is not None: + if result.log_entry != expect_log: + ok = False + details.append(f"expected log_entry={expect_log!r}, got {result.log_entry!r}") + + status = "✓" if ok else "✗" + if ok: + PASS += 1 + else: + FAIL += 1 + + bl = result.book_log[:80].replace("\n", " ") if result.book_log else "(none)" + print(f" {status} {label}") + if not ok: + for d in details: + print(f" {d}") + print(f" book_log: {bl}...") + if result.user_prompt: + print(f" prompt: {result.user_prompt[:60]}...") + + +def section(name: str): + print(f"\n{'=' * 60}") + print(f" {name}") + print(f"{'=' * 60}") + + +def main(): + section("First turn — no player action (story opening)") + r = engine.generate_turn() + check("Story opening", r, expect_error=False, expect_book=True, expect_prompt=True) + + section("Valid action — buy a drink") + r = engine.generate_turn( + player_action="I buy a mug of ale at the Splintered Tankard", + last_prompt="What do you do?", + ) + check("Buy ale", r, expect_error=False, expect_book=True, expect_prompt=True) + + section("Valid action — talk to an NPC") + r = engine.generate_turn( + player_action="I ask Mistress Otta about recent rumours", + last_prompt="What do you do?", + ) + check("Ask Otta", r, expect_error=False, expect_book=True, expect_prompt=True) + + section("Valid action — use inventory item") + r = engine.generate_turn( + player_action="I apply my healing salve to restore HP", + last_prompt="What do you do?", + ) + check("Use healing salve", r, expect_error=False, expect_book=True, expect_prompt=True) + + section("Valid action — explore") + r = engine.generate_turn( + player_action="I head to the Market Square to look around", + last_prompt="What do you do?", + ) + check("Visit market", r, expect_error=False, expect_book=True, expect_prompt=True) + + section("Invalid action — use non-existent item") + r = engine.generate_turn( + player_action="I drink a potion of invisibility", + last_prompt="What do you do?", + ) + check("Potion of invisibility", r, expect_error=False, expect_book=False) + if r.log_entry: + print(f" log: {r.log_entry}") + + section("Invalid action — cast spell (not a weaver)") + r = engine.generate_turn( + player_action="I cast a fireball at the tavern ceiling", + last_prompt="What do you do?", + ) + check("Fireball spell", r, expect_error=False, expect_book=False) + if r.log_entry: + print(f" log: {r.log_entry}") + + section("Invalid action — nonsensical") + r = engine.generate_turn( + player_action="I fly to the moon", + last_prompt="What do you do?", + ) + check("Fly to moon", r, expect_error=False, expect_book=False) + if r.log_entry: + print(f" log: {r.log_entry}") + + section("Resume from last_prompt (no player action)") + r = engine.generate_turn( + last_prompt="You stand in the market square, surrounded by stalls and bustle. What do you do?", + ) + check("Resume scene", r, expect_error=False, expect_book=True, expect_prompt=True) + + print(f"\n{'=' * 60}") + print(f" Results: {PASS} passed, {FAIL} failed") + print(f"{'=' * 60}") + return 0 if FAIL == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_llm_validation.py b/tools/test_llm_validation.py new file mode 100644 index 0000000..fba5c58 --- /dev/null +++ b/tools/test_llm_validation.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +"""End-to-end validation tests using the real configured LLM. + +Tests that validate_action handles real LLM responses correctly with +the actual character sheet and world state. Requires a running LLM. + +Usage: + python3 tools/test_llm_validation.py +""" + +import sys +import os +import json + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from engine_lib.validation import validate_action + +PASS = 0 +FAIL = 0 + + +def check(label: str, valid: bool, reason: str, expected_valid: bool): + global PASS, FAIL + status = "✓" if valid == expected_valid else "✗" + if valid == expected_valid: + PASS += 1 + else: + FAIL += 1 + print(f" {status} {label}: valid={valid}, reason=\"{reason}\"") + + +def section(name: str): + print(f"\n{'=' * 60}") + print(f" {name}") + print(f"{'=' * 60}") + + +def main(): + section("Valid actions — should pass") + + check("Buy a drink", + *validate_action("I buy a mug of weak ale at the Splintered Tankard"), + expected_valid=True) + + check("Use healing salve", + *validate_action("I use my healing salve to restore 1 HP"), + expected_valid=True) + + check("Talk to Otta", + *validate_action("I ask Mistress Otta about recent news in the Keep"), + expected_valid=True) + + check("Visit the market", + *validate_action("I head to the Market Square to browse stalls"), + expected_valid=True) + + section("Invalid actions — should fail") + + check("Use non-existent item", + *validate_action("I drink a potion of invisibility"), + expected_valid=False) + + check("Cast a spell (not a weaver)", + *validate_action("I cast a fireball spell at the tavern"), + expected_valid=False) + + check("Buy impossible item", + *validate_action("I buy a horse for a broken copper coin"), + expected_valid=False) + + check("Assert false state", + *validate_action("I fly to the moon"), + expected_valid=False) + + section("Edge cases") + + check("Empty action", + *validate_action(""), + expected_valid=True) + + check("Garbled nonsense", + *validate_action("qwxz jabberwocky flargle bargle"), + expected_valid=False) + + print(f"\n{'=' * 60}") + print(f" Results: {PASS} passed, {FAIL} failed") + print(f"{'=' * 60}") + return 0 if FAIL == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_validation.py b/tools/test_validation.py new file mode 100644 index 0000000..ca36d4f --- /dev/null +++ b/tools/test_validation.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +"""Tests for engine_lib/validation.py.""" + +import sys +import os +import json + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from unittest.mock import patch, MagicMock + + +def test_empty_action(): + """Empty action should return (True, '').""" + from engine_lib.validation import validate_action + valid, reason = validate_action("") + assert valid is True + assert reason == "" + print("✓ empty action returns (True, '')") + + +@patch("engine_lib.validation.state.read_file") +@patch("engine_lib.validation.state.truncate_world") +@patch("engine_lib.validation.call_llm") +def test_valid_action(mock_call_llm, mock_truncate_world, mock_read_file): + from engine_lib.validation import validate_action + + mock_read_file.side_effect = lambda p: "HP: 10\nGold: 5" if "character" in str(p).lower() else "## Location\nTavern" + mock_truncate_world.return_value = "## Location\nTavern" + mock_call_llm.return_value = json.dumps({"valid": True, "reason": "ok"}) + + valid, reason = validate_action("I buy a drink") + + assert valid is True + assert reason == "ok" + mock_call_llm.assert_called_once() + print("✓ valid action returns (True, reason)") + + +@patch("engine_lib.validation.state.read_file") +@patch("engine_lib.validation.state.truncate_world") +@patch("engine_lib.validation.call_llm") +def test_invalid_action(mock_call_llm, mock_truncate_world, mock_read_file): + from engine_lib.validation import validate_action + + mock_read_file.side_effect = lambda p: "HP: 10\nGold: 0" if "character" in str(p).lower() else "## Location\nTavern" + mock_truncate_world.return_value = "## Location\nTavern" + mock_call_llm.return_value = json.dumps({"valid": False, "reason": "Not enough gold"}) + + valid, reason = validate_action("I buy a drink") + + assert valid is False + assert reason == "Not enough gold" + print("✓ invalid action returns (False, reason)") + + +@patch("engine_lib.validation.state.read_file") +@patch("engine_lib.validation.state.truncate_world") +@patch("engine_lib.validation.call_llm") +def test_llm_returns_none(mock_call_llm, mock_truncate_world, mock_read_file): + from engine_lib.validation import validate_action + + mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern" + mock_truncate_world.return_value = "## Location\nTavern" + mock_call_llm.return_value = None + + valid, reason = validate_action("I attack the dragon") + + assert valid is False + assert reason == "Not sure" + print("✓ LLM returning None gives (False, 'Not sure')") + + +@patch("engine_lib.validation.state.read_file") +@patch("engine_lib.validation.state.truncate_world") +@patch("engine_lib.validation.call_llm") +def test_llm_returns_bad_json(mock_call_llm, mock_truncate_world, mock_read_file): + from engine_lib.validation import validate_action + + mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern" + mock_truncate_world.return_value = "## Location\nTavern" + mock_call_llm.return_value = "not valid json at all" + + valid, reason = validate_action("I cast a spell") + + assert valid is False + assert reason == "Unrecognized" + print("✓ bad JSON from LLM gives (False, 'Unrecognized')") + + +@patch("engine_lib.validation.state.read_file") +@patch("engine_lib.validation.state.truncate_world") +def test_missing_character_sheet(mock_truncate_world, mock_read_file): + from engine_lib.validation import validate_action + + mock_read_file.return_value = "" + mock_truncate_world.return_value = "*No world state.*" + + with patch("engine_lib.validation.call_llm") as mock_call_llm: + mock_call_llm.return_value = json.dumps({"valid": True, "reason": "ok"}) + valid, reason = validate_action("I look around") + + assert valid is True + print("✓ handles missing character sheet gracefully") + + +@patch("engine_lib.validation.state.read_file") +@patch("engine_lib.validation.state.truncate_world") +@patch("engine_lib.validation.call_llm") +def test_on_debug_called(mock_call_llm, mock_truncate_world, mock_read_file): + from engine_lib.validation import validate_action + + mock_read_file.side_effect = lambda p: "HP: 10" if "character" in str(p).lower() else "## Location\nTavern" + mock_truncate_world.return_value = "## Location\nTavern" + mock_call_llm.return_value = json.dumps({"valid": True, "reason": "ok"}) + + events = [] + def debug_cb(key, data): + events.append((key, data)) + + valid, reason = validate_action("I open the door", on_debug=debug_cb) + + assert valid is True + assert len(events) == 1 + assert events[0][0] == "action_validation" + assert events[0][1]["valid"] is True + print("✓ on_debug callback receives action_validation event") + + +def test_auto_prompt_default(): + from engine_lib.validation import auto_prompt + result = auto_prompt() + assert result == "**What do you do?**" + print("✓ auto_prompt() returns default prompt") + + +def test_auto_prompt_with_log(): + from engine_lib.validation import auto_prompt + result = auto_prompt(book_log="Some story text") + assert result == "**What do you do?**" + print("✓ auto_prompt() ignores book_log argument") + + +if __name__ == "__main__": + test_empty_action() + test_valid_action() + test_invalid_action() + test_llm_returns_none() + test_llm_returns_bad_json() + test_missing_character_sheet() + test_on_debug_called() + test_auto_prompt_default() + test_auto_prompt_with_log() + print("\n✓ All validation tests passed")