splinter-keep/tools/engine_lib/validation.py

#!/usr/bin/env python3
"""
validation.py — Narrative quality validation for The Chaos engine.

Standalone functions — no dependency on GameEngine.
"""

from __future__ import annotations

import json
import re
from collections import Counter

from .llm import call_llm
from .paths import CHAR_PATH, WORLD_PATH
from . import state


VALIDATION_PROMPT = """You are a strict RPG game master validating whether a player's action is possible given the game state. Be thorough — check inventory, stats, location, NPCs, and story logic.

Respond with JSON only:
{{"valid": true, "reason": "ok"}}
or
{{"valid": false, "reason": "brief explanation of why the action is impossible"}}

## Character
{character}

## World
{world}


## Player Action
{action}

## Instructions
- Is the player trying to use an item they don't have? -> invalid
- Are they asserting something that contradicts the state? -> invalid
- Is the action nonsensical given the situation? -> invalid
- Does the action make sense given the character's abilities and resources? -> valid
- If valid, also check: if they're using a consumable item, note that it must be removed from inventory.

Reply with ONLY the JSON object."""


def validate_action(
    player_action: str,
    *,
    model: str | None = None,
    timeout: int | None = None,
    on_debug: callable = None,
) -> tuple[bool, str]:
    """Ask the LLM whether a player action is valid given the game state. Returns (valid, reason)."""
    if not player_action:
        return True, ""

    char = state.read_file(CHAR_PATH) or "*No character sheet.*"
    world = state.truncate_world(state.read_file(WORLD_PATH) or "") or "*No world state.*"

    prompt = VALIDATION_PROMPT.format(character=char, world=world, action=player_action)

    text = call_llm(
        [{"role": "user", "content": prompt}],
        model=model,
        timeout=timeout,
        max_tokens=256,
        temperature=0.2,
        label="Action validation",
        on_debug=on_debug,
    )

    if not text:
        return True, ""

    try:
        data = json.loads(text.strip())
        valid = data.get("valid", True)
        reason = data.get("reason", "")
        if on_debug:
            on_debug("action_validation", {"valid": valid, "reason": reason, "action": player_action})
        return valid, reason
    except (json.JSONDecodeError, ValueError):
        if on_debug:
            on_debug("action_validation", {"valid": True, "reason": "parse_failed", "raw": text[:200]})
        return True, ""


def auto_prompt(book_log: str = "") -> str:
    """Fallback player prompt."""
    return "**What do you do?**"


def validate_narrative(
    book_log: str,
    *,
    model: str | None = None,
    on_debug: callable = None,
) -> tuple[bool, str]:
    """Check if book_log is acceptable narrative. Returns (ok, reason)."""
    lines = book_log.strip().split("\n")
    if not lines:
        return False, "Empty narrative"

    common = Counter(lines).most_common(1)
    if common and common[0][1] >= 5:
        return False, f"Repetition: '{common[0][0][:60]}' ×{common[0][1]}"

    mech_lines = [l for l in lines if re.match(
        r'^\*\*(?:Roll|Damage|Success|Failure|Check|Save|Hit|Miss|'
        r'Strenght|Dexterity|Willpower|STR|DEX|WIL|'
        r'(?:[A-Z][a-z]+(?: \(\w+\))?:))',
        l
    )]
    if mech_lines:
        ratio = len(mech_lines) / len(lines)
        if ratio > 0.3:
            return False, f"Game mechanics dominate ({len(mech_lines)}/{len(lines)} lines)"

    if re.search(r'```(?:tool|json)', book_log):
        return False, "Contains unprocessed tool blocks"

    prose = re.sub(r'[*_#>`~\-\d]', '', book_log).strip()
    if len(prose) < 50:
        return False, "Too short to be meaningful"

    text = call_llm([
        {"role": "user", "content":
            f"Rate this RPG narrative quality 1-5.\n"
            f"1 = unreadable (spam, repetition, pure mechanics, garbled)\n"
            f"2 = poor (mostly mechanics, little story)\n"
            f"3 = acceptable (some narrative but rough)\n"
            f"4 = good (solid prose, minor issues)\n"
            f"5 = excellent (vivid, engaging)\n"
            f"Reply with ONLY a single digit 1-5.\n\n"
            f"{book_log[:600]}"}
    ], model=model, max_tokens=2, temperature=0.2,
       label="Narrative validation", on_debug=on_debug)

    if text and text.strip().isdigit():
        score = int(text.strip())
        if score < 3:
            return False, f"Quality score: {score}/5"

    return True, ""