diff --git a/AGENTS.md b/AGENTS.md index a380e78..9b69270 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -65,10 +65,11 @@ the-chaos/ 2. **Scene**: Call `engine.generate()` → receive narrative + choices. 3. **Display**: Show narrative in main pane, render choice buttons. 4. **Input**: Player clicks a choice or types free text, presses Enter. -5. **Resolve**: Call `engine.generate(player_action)` → receive outcome + state changes. -6. **Archive**: Append the full turn (scene + action + outcome) to `book.md`. -7. **Apply**: Write state changes to `character.md`, `world.md`, `log/`, `ambience.md`, `journal.md`. -8. **Loop**: Display the next scene → go to step 3. +5. **Validate**: A separate lightweight LLM call checks if the action is possible given the game state. If invalid, the turn is rejected with a failure narrative. +6. **Resolve**: Call `engine.generate(player_action)` → receive outcome + state changes. +7. **Archive**: Append the full turn (scene + action + outcome) to `book.md`. +8. **Apply**: Write state changes to `character.md`, `world.md`, `log/`, `ambience.md`, `journal.md`. +9. **Loop**: Display the next scene → go to step 3. ### The Engine (engine.py) @@ -198,6 +199,16 @@ Default is "tools" for faster single-call generation. - The engine extracts both `content` and `reasoning_content` fields from responses (for OpenAI-compatible servers) - The `generate_with_tools_single()` method handles single-call tool-based generation +## Action Validation + +Before every turn, a separate lightweight LLM call (`validate_action` in `tools/engine_lib/validation.py`) checks whether the player's action is possible given the character sheet and world state. This catches impossible actions like using items not in inventory, asserting false facts, or attempting nonsensical actions. + +- Uses `VALIDATION_PROMPT` template with character + world state +- Low temperature (0.2), low max tokens (256) +- Expects JSON response: `{"valid": true/false, "reason": "..."}` +- If invalid, the turn is rejected with the reason as the narrative +- Operates in both "tools" and "conversational" strategies + ## LLM Logging The engine logs detailed information to `llm.log`: diff --git a/tools/engine.py b/tools/engine.py index f00e9c7..f0ef1f6 100644 --- a/tools/engine.py +++ b/tools/engine.py @@ -66,8 +66,6 @@ class GameEngine: temperature=self.temperature, timeout=self.timeout, max_tokens=self.max_tokens, - api_key=self.api_key, - api_base=self.api_base, ) def generate_stream(self, player_action=None, last_narrative=None): @@ -78,8 +76,6 @@ class GameEngine: temperature=self.temperature, timeout=self.timeout, max_tokens=self.max_tokens, - api_key=self.api_key, - api_base=self.api_base, ) def generate_with_tools( @@ -102,8 +98,6 @@ class GameEngine: temperature=self.temperature, timeout=self.timeout, max_tokens=self.max_tokens, - api_key=self.api_key, - api_base=self.api_base, ) def generate_with_tools_single( @@ -126,8 +120,6 @@ class GameEngine: temperature=self.temperature, timeout=self.timeout, max_tokens=self.max_tokens, - api_key=self.api_key, - api_base=self.api_base, ) diff --git a/tools/engine_lib/llm.py b/tools/engine_lib/llm.py index c05cecf..67b4edb 100644 --- a/tools/engine_lib/llm.py +++ b/tools/engine_lib/llm.py @@ -25,14 +25,23 @@ def set_llm_env(model: str, api_key: str | None, api_base: str | None) -> None: def call_llm( messages: list[dict], *, - model: str, - temperature: float, - timeout: int, - max_tokens: int, + model: str | None = None, + temperature: float | None = None, + timeout: int | None = None, + max_tokens: int | None = None, label: str = "", on_debug: callable = None, ) -> str | None: - """Make a single LLM call. Returns content text or None on error.""" + """Make a single LLM call. Loads config automatically. Returns content text or None on error.""" + from .config import load_config + cfg = load_config().get("llm", {}) + model = model or cfg.get("model", "ollama/llama3.1") + temperature = temperature if temperature is not None else cfg.get("temperature", 0.8) + timeout = timeout if timeout is not None else cfg.get("timeout", 120) + max_tokens = max_tokens if max_tokens is not None else cfg.get("max_tokens", 4096) + api_key = cfg.get("api_key") + api_base = cfg.get("api_base") + set_llm_env(model, api_key, api_base) try: import litellm except ImportError: diff --git a/tools/engine_lib/paths.py b/tools/engine_lib/paths.py index 6f611a8..e9bb48c 100644 --- a/tools/engine_lib/paths.py +++ b/tools/engine_lib/paths.py @@ -9,7 +9,7 @@ from datetime import date from pathlib import Path -BASE_DIR = Path(__file__).resolve().parent.parent +BASE_DIR = Path(__file__).resolve().parent.parent.parent SESSION_DIR = BASE_DIR / 'session' CONFIG_PATH = SESSION_DIR / 'config.json' CHAR_PATH = SESSION_DIR / 'character.md' diff --git a/tools/engine_lib/prompts.py b/tools/engine_lib/prompts.py index 59940ec..d946fee 100644 --- a/tools/engine_lib/prompts.py +++ b/tools/engine_lib/prompts.py @@ -29,7 +29,11 @@ Wrap in ```tool to perform an action: - **world_update** — content: "full world" (if NPCs/locations/threads change) - **journal_update** — add: [...], done: [...] -You have the full state above — no need to look anything up. Just write the story and use tools when the player's action changes something. If a player action is impossible (e.g. they try to use an item they don't have), narrate the failure and DO NOT use any state-changing tools. +You have the full state above — no need to look anything up. Just write the story and use tools when the player's action changes something. + +You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats/abilities they don't have, or asserting events that didn't happen), narrate the failure and DO NOT use any state-changing tools. The character sheet is the single source of truth. + +**Inventory rule**: If the player wants to use an item, you must first verify it's on their character sheet. If it is, you MUST call `remove_from_inventory` for that item AND apply the effects (e.g. `modify_vitals` for HP potions). If it's not on the sheet, reject the action — do not let them use items they don't have. ## State @@ -69,6 +73,10 @@ End your response with a `### Changes` block listing what changed: - Journal done: Defeat the demon - Journal add: Investigate the mine +You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have), do NOT include any change lines and instead narrate the failure. + +**Inventory rule**: If the player wants to use an item, verify it's on the character sheet. If it is, include `- Removed from inventory: ` and any other relevant change lines (e.g. `- Current Health: `). If it's not on the sheet, reject the action — no change lines. + Only include lines for things that actually changed. Omit unused lines entirely. ## State diff --git a/tools/engine_lib/strategies.py b/tools/engine_lib/strategies.py index 0462f1a..f983bf8 100644 --- a/tools/engine_lib/strategies.py +++ b/tools/engine_lib/strategies.py @@ -17,13 +17,13 @@ from typing import Iterator from .models import GenerationResult, TurnResult from .prompts import PROSE_PROMPT -from .llm import set_llm_env, call_llm +from .llm import call_llm from .tools_handler import ( execute_tool, describe_tool_action, describe_change, parse_changes_block, extract_tool_calls, ) from .context import build_system_prompt, build_user_message, build_prose_prompt -from .validation import auto_prompt, validate_narrative +from .validation import auto_prompt, validate_narrative, validate_action from .parsing import parse_response, log_turn_details from . import state @@ -38,8 +38,6 @@ def generate( temperature: float, timeout: int, max_tokens: int, - api_key: str | None = None, - api_base: str | None = None, ) -> GenerationResult: """ Synchronous generation. Calls the LLM, parses the response, @@ -63,8 +61,6 @@ def generate( error="litellm is not installed. Run: pip install litellm", ) - set_llm_env(model, api_key, api_base) - try: response = litellm.completion( model=model, @@ -93,8 +89,6 @@ def generate_stream( temperature: float, timeout: int, max_tokens: int, - api_key: str | None = None, - api_base: str | None = None, ) -> Iterator[str]: """ Streaming generator. Yields text chunks as they arrive from the LLM. @@ -118,8 +112,6 @@ def generate_stream( }) return - set_llm_env(model, api_key, api_base) - try: response = litellm.completion( model=model, @@ -153,8 +145,6 @@ def generate_with_tools( temperature: float, timeout: int, max_tokens: int, - api_key: str | None = None, - api_base: str | None = None, ) -> TurnResult: """ Three-phase generation: @@ -163,7 +153,6 @@ def generate_with_tools( 2. **Summarize** — LLM condenses the book_log into one log line. 3. **Extract** — LLM reads the book_log and outputs tool calls for state changes. """ - set_llm_env(model, api_key, api_base) datetime_now = datetime.now() state.append_llm_log(f"\n{'='*60}") state.append_llm_log(f"=== Turn — {datetime_now.strftime('%Y-%m-%d %H:%M:%S')} ===") @@ -176,6 +165,23 @@ def generate_with_tools( die_roll = random.randint(1, 6) state.append_llm_log(f"Dice: {die_roll} (1d6)") + # ── Pre-generation validation ──────────────────────────────────── + if player_action: + valid, reason = validate_action( + player_action, + model=model, + timeout=timeout, + on_debug=on_debug, + ) + if not valid: + state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}") + fail_narrative = f"You can't do that — {reason}." + return TurnResult( + book_log=fail_narrative, + log_entry=fail_narrative, + user_prompt=auto_prompt(""), + ) + book_log = None changes_block = "" log_entry = None @@ -251,7 +257,8 @@ def generate_with_tools( f"## Session Log\n{log_context}\n\n" f"## New Story\n{context}"} ], model=model, temperature=temperature, timeout=timeout, - max_tokens=max_tokens, label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug) + max_tokens=max_tokens, + label=f"Summarize attempt {p2_attempt + 1}", on_debug=on_debug) if text and text.strip(): log_entry = text.strip().split("\n")[0][:300] if on_debug: @@ -339,7 +346,8 @@ def generate_with_tools( f"```tool\n{{\"tool\": \"finalize_turn\", \"args\": {{\"user_prompt\": \"What do you do?\", \"ambience\": \"dungeon\"}}}}\n```\n\n" f"Only output tools for things that actually changed. Omit unchanged fields."} ], model=model, temperature=temperature, timeout=timeout, - max_tokens=max_tokens, label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug) + max_tokens=max_tokens, + label=f"Extract attempt {p3_attempt + 1}", on_debug=on_debug) if not text or not text.strip(): if on_debug: @@ -460,8 +468,6 @@ def generate_with_tools_single( temperature: float, timeout: int, max_tokens: int, - api_key: str | None = None, - api_base: str | None = None, ) -> TurnResult: """ Single-call generation using tools. @@ -489,6 +495,10 @@ def generate_with_tools_single( system = """You are an RPG dungeon master. The player just took an action. +You are the sole authority over the game state. The player's action is a **proposal**, not a fact. If their action contradicts the character sheet (e.g. using an item they don't have, spending cash they don't have, claiming stats they don't have), narrate the failure with the narrative tool and do NOT call any state-changing tools. + +**Inventory rule**: If the player wants to use an item, verify it's on the character sheet first. If it is, you MUST call `remove_from_inventory` for that item AND apply effects (e.g. `modify_vitals`). If it's not on the sheet, narrate the failure — do not let them use items they don't have. + Output ONLY ```tool blocks — no prose, no reasoning, no explanation outside tool blocks. Every piece of output must be in a tool block. Use these tools to perform every action. Wrap each in its own ```tool block: @@ -534,8 +544,24 @@ Use these tools to perform every action. Wrap each in its own ```tool block: ) user += f"\n\n*A die is cast: **{die_roll}** (1d6).*" + # ── Pre-generation validation ──────────────────────────────────── + if player_action: + valid, reason = validate_action( + player_action, + model=model, + timeout=timeout, + on_debug=on_debug, + ) + if not valid: + state.append_llm_log(f"\n[VALIDATION REJECTED] {reason}") + fail_narrative = f"You can't do that — {reason}." + return TurnResult( + book_log=fail_narrative, + log_entry=fail_narrative, + user_prompt=auto_prompt(""), + ) + start_time = datetime.now() - set_llm_env(model, api_key, api_base) state.append_llm_log(f"\n[TOOL] Single call — {len(system)} chars system, {len(user)} chars user") state.append_llm_log(f"System preview: {system.split(chr(10))[0][:80]}...") state.append_llm_log(f"User preview: {user.split(chr(10))[0][:80]}...") diff --git a/tools/engine_lib/validation.py b/tools/engine_lib/validation.py index 605f2c2..1ec4cd2 100644 --- a/tools/engine_lib/validation.py +++ b/tools/engine_lib/validation.py @@ -7,10 +7,82 @@ Standalone functions — no dependency on GameEngine. from __future__ import annotations +import json import re from collections import Counter from .llm import call_llm +from .paths import CHAR_PATH, WORLD_PATH +from . import state + + +VALIDATION_PROMPT = """You are a strict RPG game master validating whether a player's action is possible given the game state. Be thorough — check inventory, stats, location, NPCs, and story logic. + +Respond with JSON only: +{{"valid": true, "reason": "ok"}} +or +{{"valid": false, "reason": "brief explanation of why the action is impossible"}} + +## Character +{character} + +## World +{world} + + +## Player Action +{action} + +## Instructions +- Is the player trying to use an item they don't have? -> invalid +- Are they asserting something that contradicts the state? -> invalid +- Is the action nonsensical given the situation? -> invalid +- Does the action make sense given the character's abilities and resources? -> valid +- If valid, also check: if they're using a consumable item, note that it must be removed from inventory. + +Reply with ONLY the JSON object.""" + + +def validate_action( + player_action: str, + *, + model: str | None = None, + timeout: int | None = None, + on_debug: callable = None, +) -> tuple[bool, str]: + """Ask the LLM whether a player action is valid given the game state. Returns (valid, reason).""" + if not player_action: + return True, "" + + char = state.read_file(CHAR_PATH) or "*No character sheet.*" + world = state.truncate_world(state.read_file(WORLD_PATH) or "") or "*No world state.*" + + prompt = VALIDATION_PROMPT.format(character=char, world=world, action=player_action) + + text = call_llm( + [{"role": "user", "content": prompt}], + model=model, + timeout=timeout, + max_tokens=256, + temperature=0.2, + label="Action validation", + on_debug=on_debug, + ) + + if not text: + return True, "" + + try: + data = json.loads(text.strip()) + valid = data.get("valid", True) + reason = data.get("reason", "") + if on_debug: + on_debug("action_validation", {"valid": valid, "reason": reason, "action": player_action}) + return valid, reason + except (json.JSONDecodeError, ValueError): + if on_debug: + on_debug("action_validation", {"valid": True, "reason": "parse_failed", "raw": text[:200]}) + return True, "" def auto_prompt(book_log: str = "") -> str: @@ -21,9 +93,7 @@ def auto_prompt(book_log: str = "") -> str: def validate_narrative( book_log: str, *, - model: str, - temperature: float, - timeout: int, + model: str | None = None, on_debug: callable = None, ) -> tuple[bool, str]: """Check if book_log is acceptable narrative. Returns (ok, reason).""" @@ -63,8 +133,8 @@ def validate_narrative( f"5 = excellent (vivid, engaging)\n" f"Reply with ONLY a single digit 1-5.\n\n" f"{book_log[:600]}"} - ], model=model, temperature=temperature, timeout=timeout, - max_tokens=2, label="Narrative validation", on_debug=on_debug) + ], model=model, max_tokens=2, temperature=0.2, + label="Narrative validation", on_debug=on_debug) if text and text.strip().isdigit(): score = int(text.strip()) diff --git a/tools/run.py b/tools/run.py index 70df999..6d0c9bd 100755 --- a/tools/run.py +++ b/tools/run.py @@ -476,11 +476,11 @@ class ChaosTUI(App): self._set_narrative("\n\n".join(parts) if parts else "") self._enable_input() - def _enable_input(self) -> None: + def _enable_input(self, value: str = "") -> None: inp = self.query_one("#play-input", Input) inp.disabled = False inp.placeholder = "Type your action and press Enter..." - inp.value = "" + inp.value = value inp.focus() def _set_narrative(self, text: str) -> None: @@ -490,7 +490,7 @@ class ChaosTUI(App): def _show_error(self, error: str, debug_info: str = "") -> None: t = f"**Error:** {error}\n\n" + (f"**Debug Info:**\n\n{debug_info}\n\n" if debug_info else "") self._set_narrative(t + "Check your session/config.json and ensure your LLM provider is running.") - self._enable_input() + self._enable_input(value=self._last_player_action if hasattr(self, '_last_player_action') else "") def on_input_submitted(self, event: Input.Submitted) -> None: action = event.value.strip() @@ -498,6 +498,7 @@ class ChaosTUI(App): event.stop() return event.stop() + self._last_player_action = action self._call_llm(player_action=action) def _init_book(self): diff --git a/tools/test_runtime.py b/tools/test_runtime.py index 792a1b0..15e8aed 100755 --- a/tools/test_runtime.py +++ b/tools/test_runtime.py @@ -30,7 +30,7 @@ def test_engine_import(): ('engine_lib.state', ['read_file', 'apply_state', 'append_log', 'append_llm_log']), ('engine_lib.tools_handler', ['execute_tool', 'extract_tool_calls', 'TOOL_REGISTRY']), ('engine_lib.llm', ['call_llm', 'set_llm_env']), - ('engine_lib.validation', ['validate_narrative', 'auto_prompt']), + ('engine_lib.validation', ['validate_narrative', 'auto_prompt', 'validate_action']), ('engine_lib.parsing', ['parse_response', 'log_turn_details']), ('engine_lib.strategies', ['generate_with_tools', 'generate_with_tools_single']), ('engine', ['GameEngine']),