Improve llm calling

2026-06-30 20:40:40 +02:00 · 2026-06-30 20:40:40 +02:00 · 66da60225a
commit 66da60225a
parent 545d3bcac0
9 changed files with 163 additions and 46 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -65,10 +65,11 @@ the-chaos/
 2. **Scene**: Call `engine.generate()` → receive narrative + choices.
 3. **Display**: Show narrative in main pane, render choice buttons.
 4. **Input**: Player clicks a choice or types free text, presses Enter.
-5. **Resolve**: Call `engine.generate(player_action)` → receive outcome + state changes.
-6. **Archive**: Append the full turn (scene + action + outcome) to `book.md`.
-7. **Apply**: Write state changes to `character.md`, `world.md`, `log/`, `ambience.md`, `journal.md`.
-8. **Loop**: Display the next scene → go to step 3.
+5. **Validate**: A separate lightweight LLM call checks if the action is possible given the game state. If invalid, the turn is rejected with a failure narrative.
+6. **Resolve**: Call `engine.generate(player_action)` → receive outcome + state changes.
+7. **Archive**: Append the full turn (scene + action + outcome) to `book.md`.
+8. **Apply**: Write state changes to `character.md`, `world.md`, `log/`, `ambience.md`, `journal.md`.
+9. **Loop**: Display the next scene → go to step 3.

 ### The Engine (engine.py)

@ -198,6 +199,16 @@ Default is "tools" for faster single-call generation.
 - The engine extracts both `content` and `reasoning_content` fields from responses (for OpenAI-compatible servers)
 - The `generate_with_tools_single()` method handles single-call tool-based generation

+## Action Validation
+
+Before every turn, a separate lightweight LLM call (`validate_action` in `tools/engine_lib/validation.py`) checks whether the player's action is possible given the character sheet and world state. This catches impossible actions like using items not in inventory, asserting false facts, or attempting nonsensical actions.
+
+- Uses `VALIDATION_PROMPT` template with character + world state
+- Low temperature (0.2), low max tokens (256)
+- Expects JSON response: `{"valid": true/false, "reason": "..."}`
+- If invalid, the turn is rejected with the reason as the narrative
+- Operates in both "tools" and "conversational" strategies
+
 ## LLM Logging

 The engine logs detailed information to `llm.log`:
--- a/tools/engine.py
+++ b/tools/engine.py
@ -66,8 +66,6 @@ class GameEngine:
            temperature=self.temperature,
            timeout=self.timeout,
            max_tokens=self.max_tokens,
-            api_key=self.api_key,
-            api_base=self.api_base,
        )

    def generate_stream(self, player_action=None, last_narrative=None):
@ -78,8 +76,6 @@ class GameEngine:
            temperature=self.temperature,
            timeout=self.timeout,
            max_tokens=self.max_tokens,
-            api_key=self.api_key,
-            api_base=self.api_base,
        )

    def generate_with_tools(
@ -102,8 +98,6 @@ class GameEngine:
            temperature=self.temperature,
            timeout=self.timeout,
            max_tokens=self.max_tokens,
-            api_key=self.api_key,
-            api_base=self.api_base,
        )

    def generate_with_tools_single(
@ -126,8 +120,6 @@ class GameEngine:
            temperature=self.temperature,
            timeout=self.timeout,
            max_tokens=self.max_tokens,
-            api_key=self.api_key,
-            api_base=self.api_base,
        )


--- a/tools/engine_lib/llm.py
+++ b/tools/engine_lib/llm.py
@ -25,14 +25,23 @@ def set_llm_env(model: str, api_key: str | None, api_base: str | None) -> None:
 def call_llm(
    messages: list[dict],
    *,
-    model: str,
-    temperature: float,
-    timeout: int,
-    max_tokens: int,
+    model: str | None = None,
+    temperature: float | None = None,
+    timeout: int | None = None,
+    max_tokens: int | None = None,
    label: str = "",
    on_debug: callable = None,
 ) -> str | None:
-    """Make a single LLM call. Returns content text or None on error."""
+    """Make a single LLM call. Loads config automatically. Returns content text or None on error."""
+    from .config import load_config
+    cfg = load_config().get("llm", {})
+    model = model or cfg.get("model", "ollama/llama3.1")
+    temperature = temperature if temperature is not None else cfg.get("temperature", 0.8)
+    timeout = timeout if timeout is not None else cfg.get("timeout", 120)
+    max_tokens = max_tokens if max_tokens is not None else cfg.get("max_tokens", 4096)
+    api_key = cfg.get("api_key")
+    api_base = cfg.get("api_base")
+    set_llm_env(model, api_key, api_base)
    try:
        import litellm
    except ImportError:
--- a/tools/engine_lib/paths.py
+++ b/tools/engine_lib/paths.py
@ -9,7 +9,7 @@ from datetime import date
 from pathlib import Path


-BASE_DIR = Path(__file__).resolve().parent.parent
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
 SESSION_DIR = BASE_DIR / 'session'
 CONFIG_PATH = SESSION_DIR / 'config.json'
 CHAR_PATH = SESSION_DIR / 'character.md'
--- a/tools/engine_lib/prompts.py
+++ b/tools/engine_lib/prompts.py
@ -29,7 +29,11 @@ Wrap in ```tool to perform an action:
 - **world_update** — content: "full world" (if NPCs/locations/threads change)
 - **journal_update** — add: [...], done: [...]

-You have the full state above — no need to look anything up. Just write the story and use tools when the player's action changes something. If a player action is impossible (e.g. they try to use an item they don't have), narrate the failure and DO NOT use any state-changing tools.
+You have the full state above — no need to look anything up. Just write the story and use tools when the player's action changes something.
+
+You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats/abilities they don't have, or asserting events that didn't happen), narrate the failure and DO NOT use any state-changing tools. The character sheet is the single source of truth.
+
+**Inventory rule**: If the player wants to use an item, you must first verify it's on their character sheet. If it is, you MUST call `remove_from_inventory` for that item AND apply the effects (e.g. `modify_vitals` for HP potions). If it's not on the sheet, reject the action — do not let them use items they don't have.

 ## State

@ -69,6 +73,10 @@ End your response with a `### Changes` block listing what changed:
 - Journal done: Defeat the demon
 - Journal add: Investigate the mine

+You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have), do NOT include any change lines and instead narrate the failure.
+
+**Inventory rule**: If the player wants to use an item, verify it's on the character sheet. If it is, include `- Removed from inventory: <item>` and any other relevant change lines (e.g. `- Current Health: <new HP>`). If it's not on the sheet, reject the action — no change lines.
+
 Only include lines for things that actually changed. Omit unused lines entirely.

 ## State
--- a/tools/engine_lib/strategies.py
+++ b/tools/engine_lib/strategies.py
@ -17,13 +17,13 @@ from typing import Iterator

 from .models import GenerationResult, TurnResult
 from .prompts import PROSE_PROMPT
-from .llm import set_llm_env, call_llm
+from .llm import call_llm
 from .tools_handler import (
    execute_tool, describe_tool_action, describe_change,
    parse_changes_block, extract_tool_calls,
 )
 from .context import build_system_prompt, build_user_message, build_prose_prompt
-from .validation import auto_prompt, validate_narrative
+from .validation import auto_prompt, validate_narrative, validate_action
 from .parsing import parse_response, log_turn_details
 from . import state

@ -38,8 +38,6 @@ def generate(
    temperature: float,
    timeout: int,
    max_tokens: int,
-    api_key: str | None = None,
-    api_base: str | None = None,
 ) -> GenerationResult:
    """
    Synchronous generation. Calls the LLM, parses the response,
@ -63,8 +61,6 @@ def generate(
            error="litellm is not installed. Run: pip install litellm",
        )

-    set_llm_env(model, api_key, api_base)
-
    try:
        response = litellm.completion(
            model=model,
@ -93,8 +89,6 @@ def generate_stream(
    temperature: float,
    timeout: int,
    max_tokens: int,
-    api_key: str | None = None,
-    api_base: str | None = None,
 ) -> Iterator[str]:
    """
    Streaming generator. Yields text chunks as they arrive from the LLM.
@ -118,8 +112,6 @@ def generate_stream(
        })
        return

-    set_llm_env(model, api_key, api_base)
-
    try:
        response = litellm.completion(
            model=model,
@ -153,8 +145,6 @@ def generate_with_tools(
    temperature: float,
    timeout: int,
    max_tokens: int,
-    api_key: str | None = None,
-    api_base: str | None = None,
 ) -> TurnResult:
    """
    Three-phase generation:
@ -163,7 +153,6 @@ def generate_with_tools(
    2. **Summarize** — LLM condenses the book_log into one log line.
    3. **Extract** — LLM reads the book_log and outputs tool calls for state changes.
    """
-    set_llm_env(model, api_key, api_base)
    datetime_now = datetime.now()
    state.append_llm_log(f"\n{'='*60}")
    state.append_llm_log(f"=== Turn — {datetime_now.strftime('%Y-%m-%d %H:%M:%S')} ===")
@ -176,6 +165,23 @@ def generate_with_tools(
    die_roll = random.randint(1, 6)
    state.append_llm_log(f"Dice: {die_roll} (1d6)")

+    # ── Pre-generation validation ────────────────────────────────────
+    if player_action:
+        valid, reason = validate_action(
+            player_action,
+            model=model,
+            timeout=timeout,
+            on_debug=on_debug,
+        )
+        if not valid:
+            state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}")
+            fail_narrative = f"You can't do that — {reason}."
+            return TurnResult(
+                book_log=fail_narrative,
+                log_entry=fail_narrative,
+                user_prompt=auto_prompt(""),
+            )
+
    book_log = None
    changes_block = ""
    log_entry = None
@ -251,7 +257,8 @@ def generate_with_tools(
                    f"## Session Log\n{log_context}\n\n"
                    f"## New Story\n{context}"}
            ], model=model, temperature=temperature, timeout=timeout,
-               max_tokens=max_tokens, label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug)
+               max_tokens=max_tokens,
+               label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug)
            if text and text.strip():
                log_entry = text.strip().split("\n")[0][:300]
                if on_debug:
@ -339,7 +346,8 @@ def generate_with_tools(
                    f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n"
                    f"Only output tools for things that actually changed. Omit unchanged fields."}
            ], model=model, temperature=temperature, timeout=timeout,
-               max_tokens=max_tokens, label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug)
+               max_tokens=max_tokens,
+               label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug)

            if not text or not text.strip():
                if on_debug:
@ -460,8 +468,6 @@ def generate_with_tools_single(
    temperature: float,
    timeout: int,
    max_tokens: int,
-    api_key: str | None = None,
-    api_base: str | None = None,
 ) -> TurnResult:
    """
    Single-call generation using tools.
@ -489,6 +495,10 @@ def generate_with_tools_single(

    system = """You are an RPG dungeon master. The player just took an action.

+You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats they don't have), narrate the failure with the narrative tool and do NOT call any state-changing tools.
+
+**Inventory rule**: If the player wants to use an item, verify it's on the character sheet first. If it is, you MUST call `remove_from_inventory` for that item AND apply effects (e.g. `modify_vitals`). If it's not on the sheet, narrate the failure — do not let them use items they don't have.
+
 Output ONLY ```tool blocks — no prose, no reasoning, no explanation outside tool blocks. Every piece of output must be in a tool block.

 Use these tools to perform every action. Wrap each in its own ```tool block:
@ -534,8 +544,24 @@ Use these tools to perform every action. Wrap each in its own ```tool block:
    )
    user += f"\n\n*A die is cast: **{die_roll}** (1d6).*"

+    # ── Pre-generation validation ────────────────────────────────────
+    if player_action:
+        valid, reason = validate_action(
+            player_action,
+            model=model,
+            timeout=timeout,
+            on_debug=on_debug,
+        )
+        if not valid:
+            state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}")
+            fail_narrative = f"You can't do that — {reason}."
+            return TurnResult(
+                book_log=fail_narrative,
+                log_entry=fail_narrative,
+                user_prompt=auto_prompt(""),
+            )
+
    start_time = datetime.now()
-    set_llm_env(model, api_key, api_base)
    state.append_llm_log(f"\n[TOOL] Single call — {len(system)} chars system, {len(user)} chars user")
    state.append_llm_log(f"System preview: {system.split(chr(10))[0][:80]}...")
    state.append_llm_log(f"User preview: {user.split(chr(10))[0][:80]}...")
--- a/tools/engine_lib/validation.py
+++ b/tools/engine_lib/validation.py
@ -7,10 +7,82 @@ Standalone functions — no dependency on GameEngine.

 from __future__ import annotations

+import json
 import re
 from collections import Counter

 from .llm import call_llm
+from .paths import CHAR_PATH, WORLD_PATH
+from . import state
+
+
+VALIDATION_PROMPT = """You are a strict RPG game master validating whether a player's action is possible given the game state. Be thorough — check inventory, stats, location, NPCs, and story logic.
+
+Respond with JSON only:
+{{"valid": true, "reason": "ok"}}
+or
+{{"valid": false, "reason": "brief explanation of why the action is impossible"}}
+
+## Character
+{character}
+
+## World
+{world}
+
+
+## Player Action
+{action}
+
+## Instructions
+- Is the player trying to use an item they don't have? -> invalid
+- Are they asserting something that contradicts the state? -> invalid
+- Is the action nonsensical given the situation? -> invalid
+- Does the action make sense given the character's abilities and resources? -> valid
+- If valid, also check: if they're using a consumable item, note that it must be removed from inventory.
+
+Reply with ONLY the JSON object."""
+
+
+def validate_action(
+    player_action: str,
+    *,
+    model: str | None = None,
+    timeout: int | None = None,
+    on_debug: callable = None,
+) -> tuple[bool, str]:
+    """Ask the LLM whether a player action is valid given the game state. Returns (valid, reason)."""
+    if not player_action:
+        return True, ""
+
+    char = state.read_file(CHAR_PATH) or "*No character sheet.*"
+    world = state.truncate_world(state.read_file(WORLD_PATH) or "") or "*No world state.*"
+
+    prompt = VALIDATION_PROMPT.format(character=char, world=world, action=player_action)
+
+    text = call_llm(
+        [{"role": "user", "content": prompt}],
+        model=model,
+        timeout=timeout,
+        max_tokens=256,
+        temperature=0.2,
+        label="Action validation",
+        on_debug=on_debug,
+    )
+
+    if not text:
+        return True, ""
+
+    try:
+        data = json.loads(text.strip())
+        valid = data.get("valid", True)
+        reason = data.get("reason", "")
+        if on_debug:
+            on_debug("action_validation", {"valid": valid, "reason": reason, "action": player_action})
+        return valid, reason
+    except (json.JSONDecodeError, ValueError):
+        if on_debug:
+            on_debug("action_validation", {"valid": True, "reason": "parse_failed", "raw": text[:200]})
+        return True, ""


 def auto_prompt(book_log: str = "") -> str:
@ -21,9 +93,7 @@ def auto_prompt(book_log: str = "") -> str:
 def validate_narrative(
    book_log: str,
    *,
-    model: str,
-    temperature: float,
-    timeout: int,
+    model: str | None = None,
    on_debug: callable = None,
 ) -> tuple[bool, str]:
    """Check if book_log is acceptable narrative. Returns (ok, reason)."""
@ -63,8 +133,8 @@ def validate_narrative(
            f"5 = excellent (vivid, engaging)\n"
            f"Reply with ONLY a single digit 1-5.\n\n"
            f"{book_log[:600]}"}
-    ], model=model, temperature=temperature, timeout=timeout,
-       max_tokens=2, label="Narrative validation", on_debug=on_debug)
+    ], model=model, max_tokens=2, temperature=0.2,
+       label="Narrative validation", on_debug=on_debug)

    if text and text.strip().isdigit():
        score = int(text.strip())
--- a/tools/run.py
+++ b/tools/run.py
@ -476,11 +476,11 @@ class ChaosTUI(App):
        self._set_narrative("\n\n".join(parts) if parts else "")
        self._enable_input()

-    def _enable_input(self) -> None:
+    def _enable_input(self, value: str = "") -> None:
        inp = self.query_one("#play-input", Input)
        inp.disabled = False
        inp.placeholder = "Type your action and press Enter..."
-        inp.value = ""
+        inp.value = value
        inp.focus()

    def _set_narrative(self, text: str) -> None:
@ -490,7 +490,7 @@ class ChaosTUI(App):
    def _show_error(self, error: str, debug_info: str = "") -> None:
        t = f"**Error:** {error}\n\n" + (f"**Debug Info:**\n\n{debug_info}\n\n" if debug_info else "")
        self._set_narrative(t + "Check your session/config.json and ensure your LLM provider is running.")
-        self._enable_input()
+        self._enable_input(value=self._last_player_action if hasattr(self, '_last_player_action') else "")

    def on_input_submitted(self, event: Input.Submitted) -> None:
        action = event.value.strip()
@ -498,6 +498,7 @@ class ChaosTUI(App):
            event.stop()
            return
        event.stop()
+        self._last_player_action = action
        self._call_llm(player_action=action)

    def _init_book(self):
--- a/tools/test_runtime.py
+++ b/tools/test_runtime.py
@ -30,7 +30,7 @@ def test_engine_import():
        ('engine_lib.state', ['read_file', 'apply_state', 'append_log', 'append_llm_log']),
        ('engine_lib.tools_handler', ['execute_tool', 'extract_tool_calls', 'TOOL_REGISTRY']),
        ('engine_lib.llm', ['call_llm', 'set_llm_env']),
-        ('engine_lib.validation', ['validate_narrative', 'auto_prompt']),
+        ('engine_lib.validation', ['validate_narrative', 'auto_prompt', 'validate_action']),
        ('engine_lib.parsing', ['parse_response', 'log_turn_details']),
        ('engine_lib.strategies', ['generate_with_tools', 'generate_with_tools_single']),
        ('engine', ['GameEngine']),