splinter-keep/tools/test_llm_turn.py
2026-06-30 21:44:57 +02:00

144 lines
4.4 KiB
Python

#!/usr/bin/env python3
"""End-to-end turn generation tests using the real configured LLM.
Tests that generate_turn handles real LLM responses correctly with
the actual character sheet and world state. Requires a running LLM.
Usage:
python3 tools/test_llm_turn.py
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from engine import GameEngine
PASS = 0
FAIL = 0
engine = GameEngine()
def check(label: str, result, *, expect_error=False, expect_book=True, expect_prompt=True, expect_log=None):
global PASS, FAIL
ok = True
details = []
if expect_error and not result.error:
ok = False
details.append("expected error but got none")
elif not expect_error and result.error:
ok = False
details.append(f"unexpected error: {result.error}")
if expect_book and not result.book_log:
ok = False
details.append("expected non-empty book_log")
if expect_prompt and not result.user_prompt:
ok = False
details.append("expected non-empty user_prompt")
if expect_log is not None:
if result.log_entry != expect_log:
ok = False
details.append(f"expected log_entry={expect_log!r}, got {result.log_entry!r}")
status = "" if ok else ""
if ok:
PASS += 1
else:
FAIL += 1
bl = result.book_log[:80].replace("\n", " ") if result.book_log else "(none)"
print(f" {status} {label}")
if not ok:
for d in details:
print(f" {d}")
print(f" book_log: {bl}...")
if result.user_prompt:
print(f" prompt: {result.user_prompt[:60]}...")
def section(name: str):
print(f"\n{'=' * 60}")
print(f" {name}")
print(f"{'=' * 60}")
def main():
section("First turn — no player action (story opening)")
r = engine.generate_turn()
check("Story opening", r, expect_error=False, expect_book=True, expect_prompt=True)
section("Valid action — buy a drink")
r = engine.generate_turn(
player_action="I buy a mug of ale at the Splintered Tankard",
last_prompt="What do you do?",
)
check("Buy ale", r, expect_error=False, expect_book=True, expect_prompt=True)
section("Valid action — talk to an NPC")
r = engine.generate_turn(
player_action="I ask Mistress Otta about recent rumours",
last_prompt="What do you do?",
)
check("Ask Otta", r, expect_error=False, expect_book=True, expect_prompt=True)
section("Valid action — use inventory item")
r = engine.generate_turn(
player_action="I apply my healing salve to restore HP",
last_prompt="What do you do?",
)
check("Use healing salve", r, expect_error=False, expect_book=True, expect_prompt=True)
section("Valid action — explore")
r = engine.generate_turn(
player_action="I head to the Market Square to look around",
last_prompt="What do you do?",
)
check("Visit market", r, expect_error=False, expect_book=True, expect_prompt=True)
section("Invalid action — use non-existent item")
r = engine.generate_turn(
player_action="I drink a potion of invisibility",
last_prompt="What do you do?",
)
check("Potion of invisibility", r, expect_error=False, expect_book=False)
if r.log_entry:
print(f" log: {r.log_entry}")
section("Invalid action — cast spell (not a weaver)")
r = engine.generate_turn(
player_action="I cast a fireball at the tavern ceiling",
last_prompt="What do you do?",
)
check("Fireball spell", r, expect_error=False, expect_book=False)
if r.log_entry:
print(f" log: {r.log_entry}")
section("Invalid action — nonsensical")
r = engine.generate_turn(
player_action="I fly to the moon",
last_prompt="What do you do?",
)
check("Fly to moon", r, expect_error=False, expect_book=False)
if r.log_entry:
print(f" log: {r.log_entry}")
section("Resume from last_prompt (no player action)")
r = engine.generate_turn(
last_prompt="You stand in the market square, surrounded by stalls and bustle. What do you do?",
)
check("Resume scene", r, expect_error=False, expect_book=True, expect_prompt=True)
print(f"\n{'=' * 60}")
print(f" Results: {PASS} passed, {FAIL} failed")
print(f"{'=' * 60}")
return 0 if FAIL == 0 else 1
if __name__ == "__main__":
sys.exit(main())