splinter-keep/tools/test_llm_validation.py
2026-06-30 21:44:57 +02:00

94 lines
2.4 KiB
Python

#!/usr/bin/env python3
"""End-to-end validation tests using the real configured LLM.
Tests that validate_action handles real LLM responses correctly with
the actual character sheet and world state. Requires a running LLM.
Usage:
python3 tools/test_llm_validation.py
"""
import sys
import os
import json
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from engine_lib.validation import validate_action
PASS = 0
FAIL = 0
def check(label: str, valid: bool, reason: str, expected_valid: bool):
global PASS, FAIL
status = "" if valid == expected_valid else ""
if valid == expected_valid:
PASS += 1
else:
FAIL += 1
print(f" {status} {label}: valid={valid}, reason=\"{reason}\"")
def section(name: str):
print(f"\n{'=' * 60}")
print(f" {name}")
print(f"{'=' * 60}")
def main():
section("Valid actions — should pass")
check("Buy a drink",
*validate_action("I buy a mug of weak ale at the Splintered Tankard"),
expected_valid=True)
check("Use healing salve",
*validate_action("I use my healing salve to restore 1 HP"),
expected_valid=True)
check("Talk to Otta",
*validate_action("I ask Mistress Otta about recent news in the Keep"),
expected_valid=True)
check("Visit the market",
*validate_action("I head to the Market Square to browse stalls"),
expected_valid=True)
section("Invalid actions — should fail")
check("Use non-existent item",
*validate_action("I drink a potion of invisibility"),
expected_valid=False)
check("Cast a spell (not a weaver)",
*validate_action("I cast a fireball spell at the tavern"),
expected_valid=False)
check("Buy impossible item",
*validate_action("I buy a horse for a broken copper coin"),
expected_valid=False)
check("Assert false state",
*validate_action("I fly to the moon"),
expected_valid=False)
section("Edge cases")
check("Empty action",
*validate_action(""),
expected_valid=True)
check("Garbled nonsense",
*validate_action("qwxz jabberwocky flargle bargle"),
expected_valid=False)
print(f"\n{'=' * 60}")
print(f" Results: {PASS} passed, {FAIL} failed")
print(f"{'=' * 60}")
return 0 if FAIL == 0 else 1
if __name__ == "__main__":
sys.exit(main())