94 lines
2.4 KiB
Python
94 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
|
"""End-to-end validation tests using the real configured LLM.
|
|
|
|
Tests that validate_action handles real LLM responses correctly with
|
|
the actual character sheet and world state. Requires a running LLM.
|
|
|
|
Usage:
|
|
python3 tools/test_llm_validation.py
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from engine_lib.validation import validate_action
|
|
|
|
PASS = 0
|
|
FAIL = 0
|
|
|
|
|
|
def check(label: str, valid: bool, reason: str, expected_valid: bool):
|
|
global PASS, FAIL
|
|
status = "✓" if valid == expected_valid else "✗"
|
|
if valid == expected_valid:
|
|
PASS += 1
|
|
else:
|
|
FAIL += 1
|
|
print(f" {status} {label}: valid={valid}, reason=\"{reason}\"")
|
|
|
|
|
|
def section(name: str):
|
|
print(f"\n{'=' * 60}")
|
|
print(f" {name}")
|
|
print(f"{'=' * 60}")
|
|
|
|
|
|
def main():
|
|
section("Valid actions — should pass")
|
|
|
|
check("Buy a drink",
|
|
*validate_action("I buy a mug of weak ale at the Splintered Tankard"),
|
|
expected_valid=True)
|
|
|
|
check("Use healing salve",
|
|
*validate_action("I use my healing salve to restore 1 HP"),
|
|
expected_valid=True)
|
|
|
|
check("Talk to Otta",
|
|
*validate_action("I ask Mistress Otta about recent news in the Keep"),
|
|
expected_valid=True)
|
|
|
|
check("Visit the market",
|
|
*validate_action("I head to the Market Square to browse stalls"),
|
|
expected_valid=True)
|
|
|
|
section("Invalid actions — should fail")
|
|
|
|
check("Use non-existent item",
|
|
*validate_action("I drink a potion of invisibility"),
|
|
expected_valid=False)
|
|
|
|
check("Cast a spell (not a weaver)",
|
|
*validate_action("I cast a fireball spell at the tavern"),
|
|
expected_valid=False)
|
|
|
|
check("Buy impossible item",
|
|
*validate_action("I buy a horse for a broken copper coin"),
|
|
expected_valid=False)
|
|
|
|
check("Assert false state",
|
|
*validate_action("I fly to the moon"),
|
|
expected_valid=False)
|
|
|
|
section("Edge cases")
|
|
|
|
check("Empty action",
|
|
*validate_action(""),
|
|
expected_valid=True)
|
|
|
|
check("Garbled nonsense",
|
|
*validate_action("qwxz jabberwocky flargle bargle"),
|
|
expected_valid=False)
|
|
|
|
print(f"\n{'=' * 60}")
|
|
print(f" Results: {PASS} passed, {FAIL} failed")
|
|
print(f"{'=' * 60}")
|
|
return 0 if FAIL == 0 else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|