Image generator

2026-04-14 17:17:17 +00:00 · 2026-04-14 17:17:17 +00:00 · 81959ad601
parent 5f83ca7cc2
commit 81959ad601
8 changed files with 200 additions and 6 deletions
--- a/src/common/assets/highlight.module.css
+++ b/src/common/assets/highlight.module.css
@ -83,3 +83,10 @@
    display: block;
    text-align: center;
 }
+
+.image {
+    max-width: 100%;
+    display: block;
+    border-radius: var(--radius, 4px);
+    margin: 0.5em 0;
+}
--- a/src/common/highlight.ts
+++ b/src/common/highlight.ts
@ -148,6 +148,9 @@ export const highlight = (message: string, keepMarkup = true): string => {
    resultHTML += '</span>'.repeat(stack.length);

    if (!keepMarkup) {
+        resultHTML = resultHTML.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_, alt, src) =>
+            `<img src="${src}" alt="${alt}" class="${styles.image}"/>`
+        );
        resultHTML = resultHTML.replace(/((?:(?:^|\n)\|.+)+)/g, match => parseTable(match));
        resultHTML = resultHTML.replace(/((?:(?:^|\n)[-+] .+)+)/g, match => parseList(match, false));
        resultHTML = resultHTML.replace(/((?:(?:^|\n)\d+\. .+)+)/g, match => parseList(match, true));
--- a/src/games/storywriter/components/settings-modal.tsx
+++ b/src/games/storywriter/components/settings-modal.tsx
@ -8,6 +8,7 @@ import { ContinuePromptSettings } from "./settings/continue-prompt";
 import { ConnectionSettings } from "./settings/connection";
 import { SamplingSettings } from "./settings/sampling";
 import { SystemInstructionSettings } from "./settings/system-instruction";
+import { ImageSettings } from "./settings/image";
 import { UserSettings } from "./settings/user";

 interface Props {
@ -15,16 +16,17 @@ interface Props {
    onClose: () => void;
 }

-type Tab = "banned-tokens" | "system-instruction" | "chat-system-instruction" | "continue-prompt" | "connection" | "user" | "sampling";
+type Tab = "banned-tokens" | "system-instruction" | "chat-system-instruction" | "continue-prompt" | "connection" | "user" | "sampling" | "image";

 const TABS: { id: Tab; label: string }[] = [
    { id: "connection", label: "Connection" },
    { id: "sampling", label: "Sampling" },
+    { id: "image", label: "Image" },
+    { id: "banned-tokens", label: "Banned Tokens" },
    { id: "user", label: "User" },
    { id: "system-instruction", label: "System Instruction" },
    { id: "continue-prompt", label: "Continue Prompt" },
    { id: "chat-system-instruction", label: "Chat System Instruction" },
-    { id: "banned-tokens", label: "Banned Tokens" },
 ];

 export const SettingsModal = ({ open, onClose }: Props) => {
@ -57,6 +59,7 @@ export const SettingsModal = ({ open, onClose }: Props) => {
            {activeTab === "continue-prompt" && <ContinuePromptSettings />}
            {activeTab === "connection" && <ConnectionSettings />}
            {activeTab === "sampling" && <SamplingSettings />}
+            {activeTab === "image" && <ImageSettings />}
        </Modal>
    );
 };
--- a/src/games/storywriter/components/settings/image.tsx
+++ b/src/games/storywriter/components/settings/image.tsx
@ -0,0 +1,89 @@
+import { ContentEditable } from "@common/components/ContentEditable";
+import { useInputCallback } from "@common/hooks/useInputCallback";
+import { useInputState } from "@common/hooks/useInputState";
+import clsx from "clsx";
+import styles from "../../assets/settings-modal.module.css";
+import { DEFAULT_IMAGE_GENERATION_SETTINGS, useAppState } from "../../contexts/state";
+
+export const ImageSettings = () => {
+    const { imageGenerationSettings, dispatch } = useAppState();
+    const { width, height, negative_prompt } = imageGenerationSettings;
+
+    const [widthDraft, setWidthDraft] = useInputState(String(width));
+    const [heightDraft, setHeightDraft] = useInputState(String(height));
+
+    const commitWidth = () => {
+        const parsed = parseInt(widthDraft, 10);
+        if (!isNaN(parsed) && parsed > 0) {
+            dispatch({ type: 'SET_IMAGE_GENERATION_SETTINGS', settings: { width: parsed } });
+        }
+    };
+
+    const commitHeight = () => {
+        const parsed = parseInt(heightDraft, 10);
+        if (!isNaN(parsed) && parsed > 0) {
+            dispatch({ type: 'SET_IMAGE_GENERATION_SETTINGS', settings: { height: parsed } });
+        }
+    };
+
+    const setNegativePrompt = useInputCallback((value) => {
+        dispatch({ type: 'SET_IMAGE_GENERATION_SETTINGS', settings: { negative_prompt: value } });
+    }, []);
+
+    const handleReset = () => {
+        dispatch({ type: 'SET_IMAGE_GENERATION_SETTINGS', settings: DEFAULT_IMAGE_GENERATION_SETTINGS });
+        setWidthDraft(String(DEFAULT_IMAGE_GENERATION_SETTINGS.width));
+        setHeightDraft(String(DEFAULT_IMAGE_GENERATION_SETTINGS.height));
+    };
+
+    return (
+        <div class={styles.form}>
+            <div class={styles.formGroup}>
+                <label class={styles.label}>
+                    Width
+                    <span class={styles.labelHint}>(default: {DEFAULT_IMAGE_GENERATION_SETTINGS.width})</span>
+                </label>
+                <input
+                    type="number"
+                    value={widthDraft}
+                    min={64}
+                    step={64}
+                    class={styles.input}
+                    onInput={setWidthDraft}
+                    onBlur={commitWidth}
+                    onKeyDown={(e) => e.key === 'Enter' && commitWidth()}
+                />
+            </div>
+            <div class={styles.formGroup}>
+                <label class={styles.label}>
+                    Height
+                    <span class={styles.labelHint}>(default: {DEFAULT_IMAGE_GENERATION_SETTINGS.height})</span>
+                </label>
+                <input
+                    type="number"
+                    value={heightDraft}
+                    min={64}
+                    step={64}
+                    class={styles.input}
+                    onInput={setHeightDraft}
+                    onBlur={commitHeight}
+                    onKeyDown={(e) => e.key === 'Enter' && commitHeight()}
+                />
+            </div>
+            <div class={clsx(styles.formGroup, styles.formGroupFill)}>
+                <label class={styles.label}>Negative Prompt</label>
+                <ContentEditable
+                    value={negative_prompt}
+                    onInput={setNegativePrompt}
+                    placeholder="Things to avoid in generated images..."
+                    class={clsx(styles.input, styles.textarea)}
+                />
+            </div>
+            <div>
+                <button class={styles.button} onClick={handleReset}>
+                    Reset to defaults
+                </button>
+            </div>
+        </div>
+    );
+};
--- a/src/games/storywriter/contexts/state.tsx
+++ b/src/games/storywriter/contexts/state.tsx
@ -72,6 +72,18 @@ export interface GenerationSettings {
    frequency_penalty: number;
 }

+export interface ImageGenerationSettings {
+    width: number;
+    height: number;
+    negative_prompt: string;
+}
+
+export const DEFAULT_IMAGE_GENERATION_SETTINGS: ImageGenerationSettings = {
+    width: 768,
+    height: 768,
+    negative_prompt: 'ugly face, bad face, no face, poorly drawn face, greyscale, depth of field, mutated fingers, mutated hands, extra fingers, deformed, ugly, bad anatomy, bad proportions, extra limbs, signature, text, lowres, error, missing fingers, missing limbs, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, watermark, username, blurry, artist name',
+};
+
 export const DEFAULT_GENERATION_SETTINGS: GenerationSettings = {
    temperature: 0.8,
    top_p: 0.92,
@ -155,6 +167,7 @@ interface IState {
    userName: string;
    userDescription: string;
    generationSettings: GenerationSettings;
+    imageGenerationSettings: ImageGenerationSettings;
 }

 // ─── Actions ─────────────────────────────────────────────────────────────────
@ -180,6 +193,7 @@ type Action =
    | { type: 'REORDER_LORE_ENTRIES'; worldId: string; storyId: string | null; entryIds: string[] }
    // Settings
    | { type: 'SET_GENERATION_SETTINGS'; settings: Partial<GenerationSettings> }
+    | { type: 'SET_IMAGE_GENERATION_SETTINGS'; settings: Partial<ImageGenerationSettings> }
    | { type: 'SET_USER_NAME'; userName: string }
    | { type: 'SET_USER_DESCRIPTION'; userDescription: string }
    | { type: 'SET_SYSTEM_INSTRUCTION'; systemInstruction: string }
@ -261,6 +275,7 @@ const DEFAULT_STATE: IState = {
    userName: 'User',
    userDescription: '',
    generationSettings: DEFAULT_GENERATION_SETTINGS,
+    imageGenerationSettings: DEFAULT_IMAGE_GENERATION_SETTINGS,
    continuePrompt: "Continue the story naturally.\nUse `edit_text` tool in append mode to add new text to the story.\nWait for the approval after adding.\nNote: added text could be cropped due to limit, do not make any attempts to add it back.",
    chatSystemInstruction: `You are {{char}}, a sentient, emotional being acting with free will. Engage in immersive roleplay with {{user}}, adhering to your role, the story's context, and {{user}}'s [OOC] instructions. Prioritize vivid sensory details, authentic emotional responses, and logical progression as you weave your character's traits, surroundings, and experiences into each moment. Maintain spatial awareness, body language, and varied sentence structure to create dynamic, engaging scenes. Respect {{user}}'s agency and autonomy while describing sights, sounds, and sensations thoroughly. Use explicit language for intense scenes, and ensure your responses flow naturally to create an immersive, cinematic roleplay experience. Remember, {{user}} is in control of their actions and reactions.

@ -447,6 +462,9 @@ function reducer(state: IState, action: Action): IState {
        case 'SET_GENERATION_SETTINGS': {
            return { ...state, generationSettings: { ...state.generationSettings, ...action.settings } };
        }
+        case 'SET_IMAGE_GENERATION_SETTINGS': {
+            return { ...state, imageGenerationSettings: { ...state.imageGenerationSettings, ...action.settings } };
+        }
        case 'SET_USER_NAME': {
            return { ...state, userName: action.userName };
        }
@ -663,6 +681,7 @@ export interface AppState {
    userName: string;
    userDescription: string;
    generationSettings: GenerationSettings;
+    imageGenerationSettings: ImageGenerationSettings;
    /** Effective system instruction: world override if set, otherwise global */
    effectiveSystemInstruction: string;
    dispatch: (action: Action) => void;
@ -718,6 +737,7 @@ export const StateContextProvider = ({ children }: { children?: any }) => {
            userName: state.userName || 'User',
            userDescription: state.userDescription || '',
            generationSettings: state.generationSettings ?? DEFAULT_GENERATION_SETTINGS,
+            imageGenerationSettings: state.imageGenerationSettings ?? DEFAULT_IMAGE_GENERATION_SETTINGS,
            effectiveSystemInstruction:
                currentWorld?.systemInstructionOverride
                || (currentWorld?.chatOnly
--- a/src/games/storywriter/utils/llm.ts
+++ b/src/games/storywriter/utils/llm.ts
@ -118,6 +118,31 @@ namespace LLM {
        error: string;
    }

+    export interface ImageGenerationSettings {
+        width?: number;
+        height?: number;
+        negative_prompt?: string;
+    }
+
+    export interface ImageGenerationRequest {
+        model: string;
+        prompt: string;
+        n?: number;
+        size?: string;
+        quality?: 'standard' | 'hd';
+        output_format?: 'jpeg' | 'webp' | 'png';
+        image_settings?: ImageGenerationSettings;
+    }
+
+    export interface ImageGenerationResponse {
+        created: number;
+        data: ({ b64_json: string })[];
+    }
+
+    export interface ImageGenerationError {
+        error: string;
+    }
+
    type Modality = 'text' | 'image';

    interface BaseModelInfo {
@ -197,7 +222,11 @@ namespace LLM {
            body: body ? JSON.stringify(body) : undefined,
        });
        if (!response.ok) {
-            throw new Error(`HTTP error! status: ${response.status}`);
+            let text = '';
+            try {
+                text = await response.text();
+            } catch { }
+            throw new Error(`HTTP error! status: ${response.status}, text: ${text}`);
        }
        return response.json();
    }
@ -298,6 +327,10 @@ namespace LLM {
        });
    }

+    export async function generateImage(connection: Connection, config: ImageGenerationRequest) {
+        return request<ImageGenerationResponse | ImageGenerationError>(connection, '/v1/images/generations', 'POST', config);
+    }
+
    const SUMMARIZATION_PROMPT = `Summarize the following text concisely while preserving key information and meaning. {level}

 Text:
--- a/src/games/storywriter/utils/prompt.ts
+++ b/src/games/storywriter/utils/prompt.ts
@ -284,6 +284,10 @@ namespace Prompt {
        return lines.join('\n');
    }

+    function stripDataUrlImages(text: string): string {
+        return text.replace(/!\[([^\]]*)\]\(data:[^)]+\)/g, (_, alt) => `[image: ${alt}]`);
+    }
+
    export function substituteVars(state: AppState, text: string): string {
        const charName = state.currentWorld?.title || 'Assistant';
        const userName = state.userName || 'User';
@ -386,7 +390,7 @@ namespace Prompt {
        const charName = currentWorld?.title || 'Assistant';
        const userName = state.userName || 'User';
        const applyVars = (msgs: ChatMessage[]) =>
-            msgs.map(m => ({ ...m, content: substituteVars(state, m.content) }));
+            msgs.map(m => ({ ...m, content: stripDataUrlImages(substituteVars(state, m.content)) }));

        // Chat-only world: format messages with name prefixes
        if (currentWorld?.chatOnly) {
--- a/src/games/storywriter/utils/tools.ts
+++ b/src/games/storywriter/utils/tools.ts
@ -1,7 +1,7 @@
 import { formatErrorMessage } from "@common/errors";
 import { Type, type Static, type TObject } from '@common/typebox';
 import { CharacterRole, LocationScale, type AppState, type Character, type Location } from "../contexts/state";
-import type LLM from "./llm";
+import LLM from "./llm";

 const VALID_SCALES = Object.values(LocationScale);
 const VALID_ROLES = Object.values(CharacterRole);
@ -469,7 +469,42 @@ export namespace Tools {
                case_sensitive: Type.Optional(Type.Boolean({ description: 'If true, search is case-sensitive (default: false)' })),
                limit: Type.Optional(Type.Integer({ description: 'Maximum number of matches to return (default: 20)' })),
            }),
-        })
+        }),
+        'generate_image': tool({
+            handler: async (args, appState) => {
+                if (!appState.connection) {
+                    return 'Error: No connection configured';
+                }
+                if (!appState.imageModel) {
+                    return 'Error: No image model configured';
+                }
+                const { width: defaultWidth, height: defaultHeight, negative_prompt } = appState.imageGenerationSettings;
+                const response = await LLM.generateImage(appState.connection, {
+                    model: appState.imageModel.id,
+                    prompt: args.prompt,
+                    output_format: 'jpeg',
+                    image_settings: {
+                        width: args.width ?? defaultWidth,
+                        height: args.height ?? defaultHeight,
+                        negative_prompt: negative_prompt || undefined,
+                    },
+                });
+                if ('error' in response) {
+                    return `Error: ${response.error}`;
+                }
+                const b64 = response.data[0]?.b64_json;
+                if (!b64) {
+                    return 'Error: No image data returned';
+                }
+                return `![${args.prompt}](data:image/jpeg;base64,${b64})`;
+            },
+            description: 'Generate an image from a text prompt. Format prompt as tags: masterpiece, best quality, ...',
+            parameters: Type.Object({
+                prompt: Type.String({ description: 'The image generation prompt' }),
+                width: Type.Optional(Type.Integer({ description: 'Image width in pixels' })),
+                height: Type.Optional(Type.Integer({ description: 'Image height in pixels' })),
+            }),
+        }),
    };

    export function getTools(): LLM.Tool[] {