1
0
Fork 0

Compare commits

..

No commits in common. "277b3157953be21ca1dcdf0c760292e18965cc96" and "ece1621e73d99a8292b5a0e51510c38e6a8112e6" have entirely different histories.

16 changed files with 298 additions and 738 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -14,7 +14,6 @@
"@inquirer/select": "2.3.10",
"ace-builds": "1.36.3",
"classnames": "2.5.1",
"delay": "6.0.0",
"preact": "10.22.0"
},
"devDependencies": {

View File

@ -1,4 +0,0 @@
import { useEffect } from "preact/hooks";
export const useAsyncEffect = (fx: () => any, deps: any[]) =>
useEffect(() => void fx(), deps);

View File

@ -1,16 +0,0 @@
import { useCallback } from "preact/hooks";
export function useInputCallback<T>(callback: (value: string) => T, deps: any[]): ((value: string | Event) => T) {
return useCallback((e: Event | string) => {
if (typeof e === 'string') {
return callback(e);
} else {
const { target } = e;
if (target && 'value' in target && typeof target.value === 'string') {
return callback(target.value);
}
}
return callback('');
}, deps);
}

View File

@ -1,3 +1,4 @@
export const delay = async (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
export const nextFrame = async (): Promise<number> => new Promise((resolve) => requestAnimationFrame(resolve));
export const randInt = (min: number, max: number) => Math.round(min + (max - min - 1) * Math.random());
@ -48,33 +49,4 @@ export const intHash = (seed: number, ...parts: number[]) => {
h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909);
return h1;
};
export const sinHash = (...data: number[]) => data.reduce((hash, n) => Math.sin((hash * 123.12 + n) * 756.12), 0) / 2 + 0.5;
export const throttle = function <T, A extends unknown[], R, F extends (this: T, ...args: A) => R>(func: F, ms: number, trailing = false): F {
let isThrottled = false;
let savedResult: R;
let savedThis: T;
let savedArgs: A | undefined;
const wrapper: F = function (...args: A) {
if (isThrottled) {
savedThis = this;
savedArgs = args;
} else {
savedResult = func.apply(this, args);
savedArgs = undefined;
isThrottled = true;
setTimeout(function () {
isThrottled = false;
if (trailing && savedArgs) {
savedResult = wrapper.apply(savedThis, savedArgs);
}
}, ms);
}
return savedResult;
} as F;
return wrapper;
}
export const sinHash = (...data: number[]) => data.reduce((hash, n) => Math.sin((hash * 123.12 + n) * 756.12), 0) / 2 + 0.5;

View File

@ -32,10 +32,6 @@ select {
outline: none;
}
option, optgroup {
background-color: var(--backgroundColor);
}
textarea {
resize: vertical;
width: 100%;

View File

@ -15,7 +15,7 @@ export const Chat = () => {
const lastAssistantId = messages.findLastIndex(m => m.role === 'assistant');
useEffect(() => {
setTimeout(() => DOMTools.scrollDown(chatRef.current, false), 100);
DOMTools.scrollDown(chatRef.current);
}, [messages.length, lastMessageContent]);
return (

View File

@ -1,146 +0,0 @@
import { useCallback, useContext, useEffect, useMemo, useState } from 'preact/hooks';
import styles from './header.module.css';
import { Connection, HORDE_ANON_KEY, isHordeConnection, isKoboldConnection, type IConnection, type IHordeModel } from '../../connection';
import { Instruct, StateContext } from '../../contexts/state';
import { useInputState } from '@common/hooks/useInputState';
import { useInputCallback } from '@common/hooks/useInputCallback';
import { Huggingface } from '../../huggingface';
interface IProps {
connection: IConnection;
setConnection: (c: IConnection) => void;
}
export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
const [connectionUrl, setConnectionUrl] = useInputState('');
const [apiKey, setApiKey] = useInputState(HORDE_ANON_KEY);
const [modelName, setModelName] = useInputState('');
const [modelTemplate, setModelTemplate] = useInputState(Instruct.CHATML);
const [hordeModels, setHordeModels] = useState<IHordeModel[]>([]);
const [contextLength, setContextLength] = useState<number>(0);
const backendType = useMemo(() => {
if (isKoboldConnection(connection)) return 'kobold';
if (isHordeConnection(connection)) return 'horde';
return 'unknown';
}, [connection]);
const urlValid = useMemo(() => contextLength > 0, [contextLength]);
useEffect(() => {
if (isKoboldConnection(connection)) {
setConnectionUrl(connection.url);
} else if (isHordeConnection(connection)) {
setModelName(connection.model);
setApiKey(connection.apiKey || HORDE_ANON_KEY);
Connection.getHordeModels()
.then(m => setHordeModels(Array.from(m.values()).sort((a, b) => a.name.localeCompare(b.name))));
}
Connection.getContextLength(connection).then(setContextLength);
Connection.getModelName(connection).then(setModelName);
}, [connection]);
useEffect(() => {
if (modelName) {
Huggingface.findModelTemplate(modelName)
.then(template => {
if (template) {
setModelTemplate(template);
}
});
}
}, [modelName]);
const setInstruct = useInputCallback((instruct) => {
setConnection({ ...connection, instruct });
}, [connection, setConnection]);
const setBackendType = useInputCallback((type) => {
if (type === 'kobold') {
setConnection({
instruct: connection.instruct,
url: connectionUrl,
});
} else if (type === 'horde') {
setConnection({
instruct: connection.instruct,
apiKey,
model: modelName,
});
}
}, [connection, setConnection, connectionUrl, apiKey, modelName]);
const handleBlurUrl = useCallback(() => {
const regex = /^(?:http(s?):\/\/)?(.*?)\/?$/i;
const url = connectionUrl.replace(regex, 'http$1://$2');
setConnection({
instruct: connection.instruct,
url,
});
}, [connection, connectionUrl, setConnection]);
const handleBlurHorde = useCallback(() => {
setConnection({
instruct: connection.instruct,
apiKey,
model: modelName,
});
}, [connection, apiKey, modelName, setConnection]);
return (
<div class={styles.connectionEditor}>
<select value={backendType} onChange={setBackendType}>
<option value='kobold'>Kobold CPP</option>
<option value='horde'>Horde</option>
</select>
<select value={connection.instruct} onChange={setInstruct} title='Instruct template'>
{modelName && modelTemplate && <optgroup label='Native model template'>
<option value={modelTemplate} title='Native for model'>{modelName}</option>
</optgroup>}
<optgroup label='Manual templates'>
{Object.entries(Instruct).map(([label, value]) => (
<option value={value} key={value}>
{label.toLowerCase()}
</option>
))}
</optgroup>
<optgroup label='Custom'>
<option value={connection.instruct}>Custom</option>
</optgroup>
</select>
{isKoboldConnection(connection) && <input
value={connectionUrl}
onInput={setConnectionUrl}
onBlur={handleBlurUrl}
class={urlValid ? styles.valid : styles.invalid}
/>}
{isHordeConnection(connection) && <>
<input
placeholder='Horde API key'
title='Horde API key'
value={apiKey}
onInput={setApiKey}
onBlur={handleBlurHorde}
/>
<select
value={modelName}
onChange={setModelName}
onBlur={handleBlurHorde}
title='Horde model'
>
{hordeModels.map((m) => (
<option value={m.name} key={m.name}>
{m.name} ({m.maxLength}/{m.maxContext})
</option>
))}
</select>
</>}
</div>
);
};

View File

@ -44,11 +44,4 @@
textarea {
overflow: hidden;
}
}
.connectionEditor {
display: flex;
flex-direction: row;
gap: 8px;
flex-wrap: wrap;
}

View File

@ -2,29 +2,35 @@ import { useCallback, useContext, useEffect, useMemo, useState } from "preact/ho
import { useBool } from "@common/hooks/useBool";
import { Modal } from "@common/components/modal/modal";
import { StateContext } from "../../contexts/state";
import { Instruct, StateContext } from "../../contexts/state";
import { LLMContext } from "../../contexts/llm";
import { MiniChat } from "../minichat/minichat";
import { AutoTextarea } from "../autoTextarea";
import { Ace } from "../ace";
import { ConnectionEditor } from "./connectionEditor";
import styles from './header.module.css';
import { Ace } from "../ace";
export const Header = () => {
const { contextLength, promptTokens, modelName } = useContext(LLMContext);
const { modelName, modelTemplate, contextLength, promptTokens, blockConnection } = useContext(LLMContext);
const {
messages, connection, systemPrompt, lore, userPrompt, bannedWords, summarizePrompt, summaryEnabled,
setSystemPrompt, setLore, setUserPrompt, addSwipe, setBannedWords, setInstruct, setSummarizePrompt, setSummaryEnabled, setConnection,
messages, connectionUrl, systemPrompt, lore, userPrompt, bannedWords, instruct, summarizePrompt, summaryEnabled,
setConnectionUrl, setSystemPrompt, setLore, setUserPrompt, addSwipe, setBannedWords, setInstruct, setSummarizePrompt, setSummaryEnabled,
} = useContext(StateContext);
const connectionsOpen = useBool();
const loreOpen = useBool();
const promptsOpen = useBool();
const genparamsOpen = useBool();
const assistantOpen = useBool();
const bannedWordsInput = useMemo(() => bannedWords.join('\n'), [bannedWords]);
const urlValid = useMemo(() => contextLength > 0, [contextLength]);
const handleBlurUrl = useCallback(() => {
const regex = /^(?:http(s?):\/\/)?(.*?)\/?$/i
const normalizedConnectionUrl = connectionUrl.replace(regex, 'http$1://$2');
setConnectionUrl(normalizedConnectionUrl);
blockConnection.setFalse();
}, [connectionUrl, setConnectionUrl, blockConnection]);
const handleAssistantAddSwipe = useCallback((answer: string) => {
const index = messages.findLastIndex(m => m.role === 'assistant');
@ -55,13 +61,29 @@ export const Header = () => {
return (
<div class={styles.header}>
<div class={styles.inputs}>
<div class={styles.buttons}>
<button class='icon' onClick={connectionsOpen.setTrue} title='Connection settings'>
🔌
</button>
</div>
<input value={connectionUrl}
onInput={setConnectionUrl}
onFocus={blockConnection.setTrue}
onBlur={handleBlurUrl}
class={blockConnection.value ? '' : urlValid ? styles.valid : styles.invalid}
/>
<select value={instruct} onChange={setInstruct} title='Instruct template'>
{modelName && modelTemplate && <optgroup label='Native model template'>
<option value={modelTemplate} title='Native for model'>{modelName}</option>
</optgroup>}
<optgroup label='Manual templates'>
{Object.entries(Instruct).map(([label, value]) => (
<option value={value} key={value}>
{label.toLowerCase()}
</option>
))}
</optgroup>
<optgroup label='Custom'>
<option value={instruct}>Custom</option>
</optgroup>
</select>
<div class={styles.info}>
{modelName} - {promptTokens} / {contextLength}
{promptTokens} / {contextLength}
</div>
</div>
<div class={styles.buttons}>
@ -80,10 +102,6 @@ export const Header = () => {
</button>
</div>
<Modal open={connectionsOpen.value} onClose={connectionsOpen.setFalse}>
<h3 class={styles.modalTitle}>Connection settings</h3>
<ConnectionEditor connection={connection} setConnection={setConnection} />
</Modal>
<Modal open={loreOpen.value} onClose={loreOpen.setFalse}>
<h3 class={styles.modalTitle}>Lore Editor</h3>
<AutoTextarea
@ -117,12 +135,12 @@ export const Header = () => {
<h4 class={styles.modalTitle}>Summary template</h4>
<Ace value={summarizePrompt} onInput={setSummarizePrompt} />
<label>
<input type='checkbox' checked={summaryEnabled} onChange={handleSetSummaryEnabled} />
<input type='checkbox' checked={summaryEnabled} onChange={handleSetSummaryEnabled}/>
&nbsp;Enable summarization
</label>
<hr />
<h4 class={styles.modalTitle}>Instruct template</h4>
<Ace value={connection.instruct} onInput={setInstruct} />
<Ace value={instruct} onInput={setInstruct} />
</div>
</Modal>
<MiniChat

View File

@ -5,7 +5,7 @@ import { AutoTextarea } from "./autoTextarea";
export const Input = () => {
const { input, setInput, addMessage, continueMessage } = useContext(StateContext);
const { generating, stopGeneration } = useContext(LLMContext);
const { generating } = useContext(LLMContext);
const handleSend = useCallback(async () => {
if (!generating) {
@ -29,10 +29,7 @@ export const Input = () => {
return (
<div class="chat-input">
<AutoTextarea onInput={setInput} onKeyDown={handleKeyDown} value={input} />
{generating
? <button onClick={stopGeneration}>Stop</button>
: <button onClick={handleSend}>{input ? 'Send' : 'Continue'}</button>
}
<button onClick={handleSend} class={`${generating ? 'disabled' : ''}`}>{input ? 'Send' : 'Continue'}</button>
</div>
);
}

View File

@ -16,7 +16,7 @@ interface IProps {
}
export const MiniChat = ({ history = [], buttons = {}, open, onClose }: IProps) => {
const { generating, stopGeneration, generate, compilePrompt } = useContext(LLMContext);
const { generating, generate, compilePrompt } = useContext(LLMContext);
const [messages, setMessages] = useState<IMessage[]>([]);
const ref = useRef<HTMLDivElement>(null);
@ -105,10 +105,9 @@ export const MiniChat = ({ history = [], buttons = {}, open, onClose }: IProps)
</div>
</div>
<div class={styles.buttons}>
{generating
? <button onClick={stopGeneration}>Stop</button>
: <button onClick={handleGenerate}>Generate</button>
}
<button onClick={handleGenerate} class={`${generating ? 'disabled' : ''}`}>
Generate
</button>
<button onClick={() => handleInit()} class={`${generating ? 'disabled' : ''}`}>
Clear
</button>

View File

@ -1,376 +0,0 @@
import Lock from "@common/lock";
import SSE from "@common/sse";
import { throttle } from "@common/utils";
import delay, { clearDelay } from "delay";
interface IBaseConnection {
instruct: string;
}
interface IKoboldConnection extends IBaseConnection {
url: string;
}
interface IHordeConnection extends IBaseConnection {
apiKey?: string;
model: string;
}
export const isKoboldConnection = (obj: unknown): obj is IKoboldConnection => (
obj != null && typeof obj === 'object' && 'url' in obj && typeof obj.url === 'string'
);
export const isHordeConnection = (obj: unknown): obj is IHordeConnection => (
obj != null && typeof obj === 'object' && 'model' in obj && typeof obj.model === 'string'
);
export type IConnection = IKoboldConnection | IHordeConnection;
interface IHordeWorker {
id: string;
models: string[];
flagged: boolean;
online: boolean;
maintenance_mode: boolean;
max_context_length: number;
max_length: number;
performance: string;
}
export interface IHordeModel {
name: string;
hordeNames: string[];
maxLength: number;
maxContext: number;
workers: string[];
}
interface IHordeResult {
faulted: boolean;
done: boolean;
finished: number;
generations?: {
text: string;
}[];
}
const DEFAULT_GENERATION_SETTINGS = {
temperature: 0.8,
min_p: 0.1,
rep_pen: 1.08,
rep_pen_range: -1,
rep_pen_slope: 0.7,
top_k: 100,
top_p: 0.92,
banned_tokens: ['anticipat'],
max_length: 300,
trim_stop: true,
stop_sequence: ['[INST]', '[/INST]', '</s>', '<|'],
dry_allowed_length: 5,
dry_multiplier: 0.8,
dry_base: 1,
dry_sequence_breakers: ["\n", ":", "\"", "*"],
dry_penalty_last_n: 0
}
const MIN_PERFORMANCE = 2.0;
const MIN_WORKER_CONTEXT = 8192;
const MAX_HORDE_LENGTH = 512;
const MAX_HORDE_CONTEXT = 32000;
export const HORDE_ANON_KEY = '0000000000';
export const normalizeModel = (model: string) => {
let currentModel = model.split(/[\\\/]/).at(-1);
currentModel = currentModel.split('::').at(0);
let normalizedModel: string;
do {
normalizedModel = currentModel;
currentModel = currentModel
.replace(/[ ._-]\d+(k$|-context)/i, '') // remove context length, i.e. -32k
.replace(/[ ._-](gptq|awq|exl2?|imat|i\d|h\d)/i, '') // remove quant name
.replace(/([ ._-]?gg(uf|ml)[ ._-]?(v[ ._-]?\d)?)/i, '') // remove gguf-v3/ggml/etc
.replace(/[ ._-]i?q([ ._-]?\d[ ._-]?(k?[ ._-]?x*[ ._-]?[lms]?)?)+/i, '') // remove quant size
.replace(/[ ._-]\d+(\.\d+)?bpw/i, '') // remove bpw
.replace(/[ ._-]f(p|loat)?(8|16|32)/i, '')
.replace(/^(debug-?)+/i, '')
.trim();
} while (normalizedModel !== currentModel);
return normalizedModel
.replace(/[ _-]+/ig, '-')
.replace(/\.{2,}/, '-')
.replace(/[ ._-]+$/ig, '')
.trim();
}
export const approximateTokens = (prompt: string): number => prompt.split(/[^a-z0-9]+/i).length;
export type IGenerationSettings = Partial<typeof DEFAULT_GENERATION_SETTINGS>;
export namespace Connection {
const AIHORDE = 'https://aihorde.net';
let abortController = new AbortController();
async function* generateKobold(url: string, prompt: string, extraSettings: IGenerationSettings = {}): AsyncGenerator<string> {
const sse = new SSE(`${url}/api/extra/generate/stream`, {
payload: JSON.stringify({
...DEFAULT_GENERATION_SETTINGS,
...extraSettings,
prompt,
}),
});
const messages: string[] = [];
const messageLock = new Lock();
let end = false;
sse.addEventListener('message', (e) => {
if (e.data) {
{
const { token, finish_reason } = JSON.parse(e.data);
messages.push(token);
if (finish_reason && finish_reason !== 'null') {
end = true;
}
}
}
messageLock.release();
});
const handleEnd = () => {
end = true;
messageLock.release();
};
abortController.signal.addEventListener('abort', handleEnd);
sse.addEventListener('error', handleEnd);
sse.addEventListener('abort', handleEnd);
sse.addEventListener('readystatechange', (e) => {
if (e.readyState === SSE.CLOSED) handleEnd();
});
while (!end || messages.length) {
while (messages.length > 0) {
const message = messages.shift();
if (message != null) {
try {
yield message;
} catch { }
}
}
if (!end) {
await messageLock.wait();
}
}
sse.close();
}
async function generateHorde(connection: Omit<IHordeConnection, keyof IBaseConnection>, prompt: string, extraSettings: IGenerationSettings = {}): Promise<string> {
const models = await getHordeModels();
const model = models.get(connection.model);
if (model) {
let maxLength = Math.min(model.maxLength, DEFAULT_GENERATION_SETTINGS.max_length);
if (extraSettings.max_length && extraSettings.max_length < maxLength) {
maxLength = extraSettings.max_length;
}
const requestData = {
prompt,
params: {
...DEFAULT_GENERATION_SETTINGS,
...extraSettings,
n: 1,
max_context_length: model.maxContext,
max_length: maxLength,
rep_pen_range: Math.min(model.maxContext, 4096),
},
models: model.hordeNames,
workers: model.workers,
};
const { signal } = abortController;
const generateResponse = await fetch(`${AIHORDE}/api/v2/generate/text/async`, {
method: 'POST',
body: JSON.stringify(requestData),
headers: {
'Content-Type': 'application/json',
apikey: connection.apiKey || HORDE_ANON_KEY,
},
signal,
});
if (!generateResponse.ok || generateResponse.status >= 400) {
throw new Error(`Error starting generation: ${generateResponse.statusText}: ${await generateResponse.text()}`);
}
const { id } = await generateResponse.json() as { id: string };
const request = async (method = 'GET'): Promise<string | null> => {
const response = await fetch(`${AIHORDE}/api/v2/generate/text/status/${id}`, { method });
if (response.ok && response.status < 400) {
const result: IHordeResult = await response.json();
if (result.generations?.length === 1) {
const { text } = result.generations[0];
return text;
}
} else {
throw new Error(await response.text());
}
return null;
};
const deleteRequest = async () => (await request('DELETE')) ?? '';
while (true) {
try {
await delay(2500, { signal });
const text = await request();
if (text) {
return text;
}
} catch (e) {
console.error('Error in horde generation:', e);
return deleteRequest();
}
}
}
throw new Error(`Model ${connection.model} is offline`);
}
export async function* generate(connection: IConnection, prompt: string, extraSettings: IGenerationSettings = {}) {
if (isKoboldConnection(connection)) {
yield* generateKobold(connection.url, prompt, extraSettings);
} else if (isHordeConnection(connection)) {
yield await generateHorde(connection, prompt, extraSettings);
}
}
export function stopGeneration() {
abortController.abort();
abortController = new AbortController(); // refresh
}
async function requestHordeModels(): Promise<Map<string, IHordeModel>> {
try {
const response = await fetch(`${AIHORDE}/api/v2/workers?type=text`);
if (response.ok) {
const workers: IHordeWorker[] = await response.json();
const goodWorkers = workers.filter(w =>
w.online
&& !w.maintenance_mode
&& !w.flagged
&& w.max_context_length >= MIN_WORKER_CONTEXT
&& parseFloat(w.performance) >= MIN_PERFORMANCE
);
const models = new Map<string, IHordeModel>();
for (const worker of goodWorkers) {
for (const modelName of worker.models) {
const normName = normalizeModel(modelName.toLowerCase());
let model = models.get(normName);
if (!model) {
model = {
hordeNames: [],
maxContext: MAX_HORDE_CONTEXT,
maxLength: MAX_HORDE_LENGTH,
name: normName,
workers: []
}
}
if (!model.hordeNames.includes(modelName)) {
model.hordeNames.push(modelName);
}
if (!model.workers.includes(worker.id)) {
model.workers.push(worker.id);
}
model.maxContext = Math.min(model.maxContext, worker.max_context_length);
model.maxLength = Math.min(model.maxLength, worker.max_length);
models.set(normName, model);
}
}
return models;
}
} catch (e) {
console.error(e);
}
return new Map();
};
export const getHordeModels = throttle(requestHordeModels, 10000);
export async function getModelName(connection: IConnection): Promise<string> {
if (isKoboldConnection(connection)) {
try {
const response = await fetch(`${connection.url}/api/v1/model`);
if (response.ok) {
const { result } = await response.json();
return result;
}
} catch (e) {
console.error('Error getting max tokens', e);
}
} else if (isHordeConnection(connection)) {
return connection.model;
}
return '';
}
export async function getContextLength(connection: IConnection): Promise<number> {
if (isKoboldConnection(connection)) {
try {
const response = await fetch(`${connection.url}/api/extra/true_max_context_length`);
if (response.ok) {
const { value } = await response.json();
return value;
}
} catch (e) {
console.error('Error getting max tokens', e);
}
} else if (isHordeConnection(connection)) {
const models = await getHordeModels();
const model = models.get(connection.model);
if (model) {
return model.maxContext;
}
}
return 0;
}
export async function countTokens(connection: IConnection, prompt: string) {
if (isKoboldConnection(connection)) {
try {
const response = await fetch(`${connection.url}/api/extra/tokencount`, {
body: JSON.stringify({ prompt }),
headers: { 'Content-Type': 'applicarion/json' },
method: 'POST',
});
if (response.ok) {
const { value } = await response.json();
return value;
}
} catch (e) {
console.error('Error counting tokens', e);
}
}
return approximateTokens(prompt);
}
}

View File

@ -1,13 +1,12 @@
import Lock from "@common/lock";
import SSE from "@common/sse";
import { createContext } from "preact";
import { useCallback, useContext, useEffect, useMemo, useState } from "preact/hooks";
import { MessageTools, type IMessage } from "../messages";
import { StateContext } from "./state";
import { Instruct, StateContext } from "./state";
import { useBool } from "@common/hooks/useBool";
import { Template } from "@huggingface/jinja";
import { Huggingface } from "../huggingface";
import { approximateTokens, Connection, normalizeModel, type IGenerationSettings } from "../connection";
import { throttle } from "@common/utils";
import { useAsyncEffect } from "@common/hooks/useAsyncEffect";
interface ICompileArgs {
keepUsers?: number;
@ -22,23 +21,71 @@ interface ICompiledPrompt {
interface IContext {
generating: boolean;
blockConnection: ReturnType<typeof useBool>;
modelName: string;
modelTemplate: string;
hasToolCalls: boolean;
promptTokens: number;
contextLength: number;
}
const DEFAULT_GENERATION_SETTINGS = {
temperature: 0.8,
min_p: 0.1,
rep_pen: 1.08,
rep_pen_range: -1,
rep_pen_slope: 0.7,
top_k: 100,
top_p: 0.92,
banned_tokens: [],
max_length: 300,
trim_stop: true,
stop_sequence: ['[INST]', '[/INST]', '</s>', '<|'],
dry_allowed_length: 5,
dry_multiplier: 0.8,
dry_base: 1,
dry_sequence_breakers: ["\n", ":", "\"", "*"],
dry_penalty_last_n: 0
}
const MESSAGES_TO_KEEP = 10;
type IGenerationSettings = Partial<typeof DEFAULT_GENERATION_SETTINGS>;
interface IActions {
compilePrompt: (messages: IMessage[], args?: ICompileArgs) => Promise<ICompiledPrompt>;
generate: (prompt: string, extraSettings?: IGenerationSettings) => AsyncGenerator<string>;
stopGeneration: () => void;
summarize: (content: string) => Promise<string>;
countTokens: (prompt: string) => Promise<number>;
}
export type ILLMContext = IContext & IActions;
export const normalizeModel = (model: string) => {
let currentModel = model.split(/[\\\/]/).at(-1);
currentModel = currentModel.split('::').at(0);
let normalizedModel: string;
do {
normalizedModel = currentModel;
currentModel = currentModel
.replace(/[ ._-]\d+(k$|-context)/i, '') // remove context length, i.e. -32k
.replace(/[ ._-](gptq|awq|exl2?|imat|i\d)/i, '') // remove quant name
.replace(/([ ._-]?gg(uf|ml)[ ._-]?(v[ ._-]?\d)?)/i, '') // remove gguf-v3/ggml/etc
.replace(/[ ._-]i?q([ ._-]?\d[ ._-]?(k?[ ._-]?x*[ ._-]?[lms]?)?)+/i, '') // remove quant size
.replace(/[ ._-]\d+(\.\d+)?bpw/i, '') // remove bpw
.replace(/[ ._-]f(p|loat)?(8|16|32)/i, '')
.replace(/^(debug-?)+/i, '')
.trim();
} while (normalizedModel !== currentModel);
return normalizedModel
.replace(/[ _-]+/ig, '-')
.replace(/\.{2,}/, '-')
.replace(/[ ._-]+$/ig, '')
.trim();
}
export const LLMContext = createContext<ILLMContext>({} as ILLMContext);
const processing = {
@ -48,14 +95,16 @@ const processing = {
export const LLMContextProvider = ({ children }: { children?: any }) => {
const {
connection, messages, triggerNext, lore, userPrompt, systemPrompt, bannedWords, summarizePrompt, summaryEnabled,
setTriggerNext, addMessage, editMessage, editSummary,
connectionUrl, messages, triggerNext, lore, userPrompt, systemPrompt, bannedWords, instruct, summarizePrompt, summaryEnabled,
setTriggerNext, addMessage, editMessage, editSummary, setInstruct,
} = useContext(StateContext);
const generating = useBool(false);
const blockConnection = useBool(false);
const [promptTokens, setPromptTokens] = useState(0);
const [contextLength, setContextLength] = useState(0);
const [modelName, setModelName] = useState('');
const [modelTemplate, setModelTemplate] = useState('');
const [hasToolCalls, setHasToolCalls] = useState(false);
const userPromptTemplate = useMemo(() => {
@ -68,6 +117,40 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
}
}, [userPrompt]);
const getContextLength = useCallback(async () => {
if (!connectionUrl || blockConnection.value) {
return 0;
}
try {
const response = await fetch(`${connectionUrl}/api/extra/true_max_context_length`);
if (response.ok) {
const { value } = await response.json();
return value;
}
} catch (e) {
console.log('Error getting max tokens', e);
}
return 0;
}, [connectionUrl, blockConnection.value]);
const getModelName = useCallback(async () => {
if (!connectionUrl || blockConnection.value) {
return '';
}
try {
const response = await fetch(`${connectionUrl}/api/v1/model`);
if (response.ok) {
const { result } = await response.json();
return result;
}
} catch (e) {
console.log('Error getting max tokens', e);
}
return '';
}, [connectionUrl, blockConnection.value]);
const actions: IActions = useMemo(() => ({
compilePrompt: async (messages, { keepUsers } = {}) => {
const promptMessages = messages.slice();
@ -153,7 +236,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
templateMessages[1].content = `${lore}\n\n${templateMessages[1].content}`;
const prompt = Huggingface.applyChatTemplate(connection.instruct, templateMessages);
const prompt = Huggingface.applyChatTemplate(instruct, templateMessages);
return {
prompt,
isContinue,
@ -161,44 +244,102 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
};
},
generate: async function* (prompt, extraSettings = {}) {
if (!connectionUrl) {
return;
}
try {
generating.setTrue();
console.log('[LLM.generate]', prompt);
yield* Connection.generate(connection, prompt, {
...extraSettings,
banned_tokens: bannedWords.filter(w => w.trim()),
const sse = new SSE(`${connectionUrl}/api/extra/generate/stream`, {
payload: JSON.stringify({
...DEFAULT_GENERATION_SETTINGS,
banned_tokens: bannedWords.filter(w => w.trim()),
...extraSettings,
prompt,
}),
});
} catch (e) {
if (e instanceof Error && e.name !== 'AbortError') {
alert(e.message);
} else {
console.error('[LLM.generate]', e);
const messages: string[] = [];
const messageLock = new Lock();
let end = false;
sse.addEventListener('message', (e) => {
if (e.data) {
{
const { token, finish_reason } = JSON.parse(e.data);
messages.push(token);
if (finish_reason && finish_reason !== 'null') {
end = true;
}
}
}
messageLock.release();
});
const handleEnd = () => {
end = true;
messageLock.release();
};
sse.addEventListener('error', handleEnd);
sse.addEventListener('abort', handleEnd);
sse.addEventListener('readystatechange', (e) => {
if (e.readyState === SSE.CLOSED) handleEnd();
});
while (!end || messages.length) {
while (messages.length > 0) {
const message = messages.shift();
if (message != null) {
try {
yield message;
} catch { }
}
}
if (!end) {
await messageLock.wait();
}
}
sse.close();
} finally {
generating.setFalse();
}
},
summarize: async (message) => {
try {
const content = Huggingface.applyTemplate(summarizePrompt, { message });
const prompt = Huggingface.applyChatTemplate(connection.instruct, [{ role: 'user', content }]);
console.log('[LLM.summarize]', prompt);
const content = Huggingface.applyTemplate(summarizePrompt, { message });
const prompt = Huggingface.applyChatTemplate(instruct, [{ role: 'user', content }]);
const tokens = await Array.fromAsync(Connection.generate(connection, prompt, {}));
const tokens = await Array.fromAsync(actions.generate(prompt));
return MessageTools.trimSentence(tokens.join(''));
} catch (e) {
console.error('Error summarizing:', e);
return '';
}
return MessageTools.trimSentence(tokens.join(''));
},
countTokens: async (prompt) => {
return await Connection.countTokens(connection, prompt);
},
stopGeneration: () => {
Connection.stopGeneration();
},
}), [connection, lore, userPromptTemplate, systemPrompt, bannedWords, summarizePrompt]);
if (!connectionUrl) {
return 0;
}
try {
const response = await fetch(`${connectionUrl}/api/extra/tokencount`, {
body: JSON.stringify({ prompt }),
headers: { 'Content-Type': 'applicarion/json' },
method: 'POST',
});
if (response.ok) {
const { value } = await response.json();
return value;
}
} catch (e) {
console.log('Error counting tokens', e);
}
useAsyncEffect(async () => {
return 0;
},
}), [connectionUrl, lore, userPromptTemplate, systemPrompt, bannedWords, instruct, summarizePrompt]);
useEffect(() => void (async () => {
if (triggerNext && !generating.value) {
setTriggerNext(false);
@ -212,14 +353,12 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
messageId++;
}
generating.setTrue();
editSummary(messageId, 'Generating...');
for await (const chunk of actions.generate(prompt)) {
text += chunk;
setPromptTokens(promptTokens + approximateTokens(text));
setPromptTokens(promptTokens + Math.round(text.length * 0.25));
editMessage(messageId, text.trim());
}
generating.setFalse();
text = MessageTools.trimSentence(text);
editMessage(messageId, text);
@ -227,10 +366,10 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
MessageTools.playReady();
}
}, [triggerNext]);
})(), [triggerNext]);
useAsyncEffect(async () => {
if (summaryEnabled && !processing.summarizing) {
useEffect(() => void (async () => {
if (summaryEnabled && !generating.value && !processing.summarizing) {
try {
processing.summarizing = true;
for (let id = 0; id < messages.length; id++) {
@ -247,15 +386,36 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
processing.summarizing = false;
}
}
}, [messages, summaryEnabled]);
})(), [messages]);
useEffect(throttle(() => {
Connection.getContextLength(connection).then(setContextLength);
Connection.getModelName(connection).then(normalizeModel).then(setModelName);
}, 1000, true), [connection]);
useEffect(() => {
if (!blockConnection.value) {
setPromptTokens(0);
setContextLength(0);
setModelName('');
const calculateTokens = useCallback(throttle(async () => {
if (!processing.tokenizing && !generating.value) {
getContextLength().then(setContextLength);
getModelName().then(normalizeModel).then(setModelName);
}
}, [connectionUrl, blockConnection.value]);
useEffect(() => {
setModelTemplate('');
if (modelName) {
Huggingface.findModelTemplate(modelName)
.then((template) => {
if (template) {
setModelTemplate(template);
setInstruct(template);
} else {
setInstruct(Instruct.CHATML);
}
});
}
}, [modelName]);
const calculateTokens = useCallback(async () => {
if (!processing.tokenizing && !blockConnection.value && !generating.value) {
try {
processing.tokenizing = true;
const { prompt } = await actions.compilePrompt(messages);
@ -267,24 +427,26 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
processing.tokenizing = false;
}
}
}, 1000, true), [actions, messages]);
}, [actions, messages, blockConnection.value]);
useEffect(() => {
calculateTokens();
}, [messages, connection, systemPrompt, lore, userPrompt]);
}, [messages, connectionUrl, blockConnection.value, instruct, /* systemPrompt, lore, userPrompt TODO debounce*/]);
useEffect(() => {
try {
const hasTools = Huggingface.testToolCalls(connection.instruct);
const hasTools = Huggingface.testToolCalls(instruct);
setHasToolCalls(hasTools);
} catch {
setHasToolCalls(false);
}
}, [connection.instruct]);
}, [instruct]);
const rawContext: IContext = {
generating: generating.value,
blockConnection,
modelName,
modelTemplate,
hasToolCalls,
promptTokens,
contextLength,

View File

@ -1,13 +1,12 @@
import { createContext } from "preact";
import { useCallback, useEffect, useMemo, useState } from "preact/hooks";
import { useEffect, useMemo, useState } from "preact/hooks";
import { MessageTools, type IMessage } from "../messages";
import { useInputState } from "@common/hooks/useInputState";
import { type IConnection } from "../connection";
interface IContext {
currentConnection: number;
availableConnections: IConnection[];
connectionUrl: string;
input: string;
instruct: string;
systemPrompt: string;
lore: string;
userPrompt: string;
@ -18,14 +17,8 @@ interface IContext {
triggerNext: boolean;
}
interface IComputableContext {
connection: IConnection;
}
interface IActions {
setConnection: (connection: IConnection) => void;
setAvailableConnections: (connections: IConnection[]) => void;
setCurrentConnection: (connection: number) => void;
setConnectionUrl: (url: string | Event) => void;
setInput: (url: string | Event) => void;
setInstruct: (template: string | Event) => void;
setLore: (lore: string | Event) => void;
@ -56,40 +49,11 @@ export enum Instruct {
MISTRAL = `{%- if messages[0]['role'] == 'system' %}{%- set system_message = messages[0]['content'] %}{%- set loop_messages = messages[1:] %}{%- else %}{%- set loop_messages = messages %}{%- endif %}{%- for message in loop_messages %}{%- if message['role'] == 'user' %}{%- if loop.first and system_message is defined %}{{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}{%- else %}{{- ' [INST] ' + message['content'] + ' [/INST]' }}{%- endif %}{%- elif message['role'] == 'assistant' %}{{- ' ' + message['content'] + '</s>'}}{%- endif %}{%- endfor %}`,
METHARME = `{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>' + message['content'] }}{% elif message['role'] == 'user' %}{{'<|user|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{'<|model|>' + message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|model|>' }}{% endif %}`,
GEMMA = `{% for message in messages %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}`,
ALPACA = `{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{ message['content'] + '\\n\\n'}}{% elif message['role'] == 'user' %}{{'### Instruction:\\n\\n' + message['content'] + '\\n\\n'}}{% elif message['role'] == 'assistant' %}{{'### Response:\\n\\n' + message['content'] + '\\n\\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response:\\n\\n' }}{% endif %}`,
};
const DEFAULT_CONTEXT: IContext = {
currentConnection: 0,
availableConnections: [{
url: 'http://localhost:5001',
instruct: Instruct.CHATML,
}],
input: '',
systemPrompt: 'You are a creative writer. Write a story based on the world description below. Story should be adult and mature; and could include swearing, violence and unfairness. Portray characters realistically and stay in the lore.',
lore: '',
userPrompt: `{% if isStart -%}
Write a novel using information above as a reference.
{%- else -%}
Continue the story forward.
{%- endif %}
{% if prompt -%}
What should happen next in your answer: {{ prompt | trim }}
{% endif %}
Remember that this story should be infinite and go forever.
Make sure to follow the world description and rules exactly. Avoid cliffhangers and pauses, be creative.`,
summarizePrompt: 'Shrink following text down to one paragraph, keeping all important details:\n\n{{ message }}\n\nAnswer with shortened text only.',
summaryEnabled: false,
bannedWords: [],
messages: [],
triggerNext: false,
};
export const saveContext = (context: IContext) => {
const contextToSave: Partial<IContext> = { ...context };
delete contextToSave.triggerNext;
@ -98,6 +62,30 @@ export const saveContext = (context: IContext) => {
}
export const loadContext = (): IContext => {
const defaultContext: IContext = {
connectionUrl: 'http://localhost:5001',
input: '',
instruct: Instruct.CHATML,
systemPrompt: 'You are a creative writer. Write a story based on the world description below. Story should be adult and mature; and could include swearing, violence and unfairness. Portray characters realistically and stay in the lore.',
lore: '',
userPrompt: `{% if isStart -%}
Write a novel using information above as a reference.
{%- else -%}
Continue the story forward.
{%- endif %}
{% if prompt -%}
This is the description of what I want to happen next: {{ prompt | trim }}
{% endif %}
Remember that this story should be infinite and go forever.
Make sure to follow the world description and rules exactly. Avoid cliffhangers and pauses, be creative.`,
summarizePrompt: 'Shrink following text down, keeping all important details:\n\n{{ message }}\n\nAnswer with shortened text only.',
summaryEnabled: false,
bannedWords: [],
messages: [],
triggerNext: false,
};
let loadedContext: Partial<IContext> = {};
try {
@ -107,18 +95,18 @@ export const loadContext = (): IContext => {
}
} catch { }
return { ...DEFAULT_CONTEXT, ...loadedContext };
return { ...defaultContext, ...loadedContext };
}
export type IStateContext = IContext & IActions & IComputableContext;
export type IStateContext = IContext & IActions;
export const StateContext = createContext<IStateContext>({} as IStateContext);
export const StateContextProvider = ({ children }: { children?: any }) => {
const loadedContext = useMemo(() => loadContext(), []);
const [currentConnection, setCurrentConnection] = useState<number>(loadedContext.currentConnection);
const [availableConnections, setAvailableConnections] = useState<IConnection[]>(loadedContext.availableConnections);
const [connectionUrl, setConnectionUrl] = useInputState(loadedContext.connectionUrl);
const [input, setInput] = useInputState(loadedContext.input);
const [instruct, setInstruct] = useInputState(loadedContext.instruct);
const [lore, setLore] = useInputState(loadedContext.lore);
const [systemPrompt, setSystemPrompt] = useInputState(loadedContext.systemPrompt);
const [userPrompt, setUserPrompt] = useInputState(loadedContext.userPrompt);
@ -127,26 +115,10 @@ export const StateContextProvider = ({ children }: { children?: any }) => {
const [messages, setMessages] = useState(loadedContext.messages);
const [summaryEnabled, setSummaryEnabled] = useState(loadedContext.summaryEnabled);
const connection = availableConnections[currentConnection] ?? DEFAULT_CONTEXT.availableConnections[0];
const [triggerNext, setTriggerNext] = useState(false);
const [instruct, setInstruct] = useInputState(connection.instruct);
const setConnection = useCallback((c: IConnection) => {
setAvailableConnections(availableConnections.map((ac, ai) => {
if (ai === currentConnection) {
return c;
} else {
return ac;
}
}));
}, [availableConnections, currentConnection]);
useEffect(() => setConnection({ ...connection, instruct }), [instruct]);
const actions: IActions = useMemo(() => ({
setConnection,
setCurrentConnection,
setConnectionUrl,
setInput,
setInstruct,
setSystemPrompt,
@ -155,8 +127,7 @@ export const StateContextProvider = ({ children }: { children?: any }) => {
setLore,
setTriggerNext,
setSummaryEnabled,
setBannedWords: (words) => setBannedWords(words.slice()),
setAvailableConnections: (connections) => setAvailableConnections(connections.slice()),
setBannedWords: (words) => setBannedWords([...words]),
setMessages: (newMessages) => setMessages(newMessages.slice()),
addMessage: (content, role, triggerNext = false) => {
@ -227,11 +198,10 @@ export const StateContextProvider = ({ children }: { children?: any }) => {
continueMessage: () => setTriggerNext(true),
}), []);
const rawContext: IContext & IComputableContext = {
connection,
currentConnection,
availableConnections,
const rawContext: IContext = {
connectionUrl,
input,
instruct,
systemPrompt,
lore,
userPrompt,

View File

@ -1,7 +1,6 @@
import { gguf } from '@huggingface/gguf';
import * as hub from '@huggingface/hub';
import { Template } from '@huggingface/jinja';
import { normalizeModel } from './connection';
export namespace Huggingface {
export interface ITemplateMessage {
@ -93,12 +92,11 @@ export namespace Huggingface {
const loadHuggingfaceTokenizerConfig = async (modelName: string): Promise<TokenizerConfig | null> => {
console.log(`[huggingface] searching config for '${modelName}'`);
const searchModel = normalizeModel(modelName);
const hubModels = await Array.fromAsync(hub.listModels({ search: { query: searchModel }, additionalFields: ['config'] }));
const hubModels = await Array.fromAsync(hub.listModels({ search: { query: modelName }, additionalFields: ['config'] }));
const models = hubModels.filter(m => {
if (m.gated) return false;
if (!normalizeModel(m.name).includes(searchModel)) return false;
if (!m.name.toLowerCase().includes(modelName.toLowerCase())) return false;
return true;
}).sort((a, b) => b.downloads - a.downloads);
@ -232,9 +230,7 @@ export namespace Huggingface {
}
export const findModelTemplate = async (modelName: string): Promise<string | null> => {
const modelKey = modelName.toLowerCase().trim();
if (!modelKey) return '';
const modelKey = modelName.toLowerCase();
let template = templateCache[modelKey] ?? null;
if (template) {