1
0
Fork 0

AIStory: Add HF tokenizers

This commit is contained in:
Pabloader 2024-11-14 16:51:05 +00:00
parent a213e0407c
commit c480f5a7d1
18 changed files with 234 additions and 149 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -11,6 +11,7 @@
"@huggingface/gguf": "0.1.12",
"@huggingface/hub": "0.19.0",
"@huggingface/jinja": "0.3.1",
"@huggingface/transformers": "3.0.2",
"@inquirer/select": "2.3.10",
"ace-builds": "1.36.3",
"classnames": "2.5.1",

View File

@ -7,6 +7,8 @@
--green: #AFAFAF;
--red: #7F0000;
--green: #007F00;
--brightRed: #DD0000;
--brightGreen: #00DD00;
--shadeColor: rgba(0, 128, 128, 0.3);
--border: 1px solid var(--color);

View File

@ -2,7 +2,7 @@ import { useEffect, useRef } from "preact/hooks";
import type { JSX } from "preact/jsx-runtime"
import { useIsVisible } from '@common/hooks/useIsVisible';
import { DOMTools } from "../dom";
import { DOMTools } from "../tools/dom";
export const AutoTextarea = (props: JSX.HTMLAttributes<HTMLTextAreaElement>) => {
const { value } = props;

View File

@ -1,8 +1,8 @@
import { useCallback, useContext, useEffect, useRef } from "preact/hooks";
import { StateContext } from "../contexts/state";
import { Message } from "./message/message";
import { MessageTools } from "../messages";
import { DOMTools } from "../dom";
import { MessageTools } from "../tools/messages";
import { DOMTools } from "../tools/dom";
export const Chat = () => {
const { messages } = useContext(StateContext);

View File

@ -1,11 +1,10 @@
import { useCallback, useEffect, useMemo, useState } from 'preact/hooks';
import styles from './header.module.css';
import { Connection, HORDE_ANON_KEY, isHordeConnection, isKoboldConnection, type IConnection, type IHordeModel } from '../../connection';
import { Connection, HORDE_ANON_KEY, isHordeConnection, isKoboldConnection, type IConnection, type IHordeModel } from '../../tools/connection';
import { Instruct } from '../../contexts/state';
import { useInputState } from '@common/hooks/useInputState';
import { useInputCallback } from '@common/hooks/useInputCallback';
import { Huggingface } from '../../huggingface';
import { Huggingface } from '../../tools/huggingface';
interface IProps {
connection: IConnection;
@ -13,10 +12,13 @@ interface IProps {
}
export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
// kobold
const [connectionUrl, setConnectionUrl] = useInputState('');
// horde
const [apiKey, setApiKey] = useInputState(HORDE_ANON_KEY);
const [modelName, setModelName] = useInputState('');
const [instruct, setInstruct] = useInputState('');
const [modelTemplate, setModelTemplate] = useInputState('');
const [hordeModels, setHordeModels] = useState<IHordeModel[]>([]);
const [contextLength, setContextLength] = useState<number>(0);
@ -27,11 +29,14 @@ export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
return 'unknown';
}, [connection]);
const urlValid = useMemo(() => contextLength > 0, [contextLength]);
const isOnline = useMemo(() => contextLength > 0, [contextLength]);
useEffect(() => {
setInstruct(connection.instruct);
if (isKoboldConnection(connection)) {
setConnectionUrl(connection.url);
Connection.getContextLength(connection).then(setContextLength);
} else if (isHordeConnection(connection)) {
setModelName(connection.model);
setApiKey(connection.apiKey || HORDE_ANON_KEY);
@ -39,9 +44,6 @@ export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
Connection.getHordeModels()
.then(m => setHordeModels(Array.from(m.values()).sort((a, b) => a.name.localeCompare(b.name))));
}
Connection.getContextLength(connection).then(setContextLength);
Connection.getModelName(connection).then(setModelName);
}, [connection]);
useEffect(() => {
@ -50,47 +52,44 @@ export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
.then(template => {
if (template) {
setModelTemplate(template);
setInstruct(template);
}
});
}
}, [modelName]);
const setInstruct = useInputCallback((instruct) => {
setConnection({ ...connection, instruct });
}, [connection, setConnection]);
const setBackendType = useInputCallback((type) => {
if (type === 'kobold') {
setConnection({
instruct: connection.instruct,
instruct,
url: connectionUrl,
});
} else if (type === 'horde') {
setConnection({
instruct: connection.instruct,
instruct,
apiKey,
model: modelName,
});
}
}, [connection, setConnection, connectionUrl, apiKey, modelName]);
}, [setConnection, connectionUrl, apiKey, modelName, instruct]);
const handleBlurUrl = useCallback(() => {
const regex = /^(?:http(s?):\/\/)?(.*?)\/?$/i;
const url = connectionUrl.replace(regex, 'http$1://$2');
setConnection({
instruct: connection.instruct,
instruct,
url,
});
}, [connection, connectionUrl, setConnection]);
}, [connectionUrl, instruct, setConnection]);
const handleBlurHorde = useCallback(() => {
setConnection({
instruct: connection.instruct,
instruct,
apiKey,
model: modelName,
});
}, [connection, apiKey, modelName, setConnection]);
}, [apiKey, modelName, instruct, setConnection]);
return (
<div class={styles.connectionEditor}>
@ -98,7 +97,7 @@ export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
<option value='kobold'>Kobold CPP</option>
<option value='horde'>Horde</option>
</select>
<select value={connection.instruct} onChange={setInstruct} title='Instruct template'>
<select value={instruct} onChange={setInstruct} title='Instruct template'>
{modelName && modelTemplate && <optgroup label='Native model template'>
<option value={modelTemplate} title='Native for model'>{modelName}</option>
</optgroup>}
@ -109,15 +108,15 @@ export const ConnectionEditor = ({ connection, setConnection }: IProps) => {
</option>
))}
</optgroup>
<optgroup label='Custom'>
{instruct !== modelTemplate && <optgroup label='Custom'>
<option value={connection.instruct}>Custom</option>
</optgroup>
</optgroup>}
</select>
{isKoboldConnection(connection) && <input
value={connectionUrl}
onInput={setConnectionUrl}
onBlur={handleBlurUrl}
class={urlValid ? styles.valid : styles.invalid}
class={isOnline ? styles.valid : styles.invalid}
/>}
{isHordeConnection(connection) && <>
<input

View File

@ -29,6 +29,14 @@
flex-direction: row;
gap: 8px;
padding: 0 8px;
.online {
color: var(--brightGreen);
}
.offline {
color: var(--brightRed);
}
}
}

View File

@ -23,6 +23,7 @@ export const Header = () => {
const promptsOpen = useBool();
const genparamsOpen = useBool();
const assistantOpen = useBool();
const isOnline = useMemo(() => contextLength > 0, [contextLength]);
const bannedWordsInput = useMemo(() => bannedWords.join('\n'), [bannedWords]);
@ -56,7 +57,7 @@ export const Header = () => {
<div class={styles.header}>
<div class={styles.inputs}>
<div class={styles.buttons}>
<button class='icon' onClick={connectionsOpen.setTrue} title='Connection settings'>
<button class={`icon ${isOnline ? styles.online: styles.offline}`} onClick={connectionsOpen.setTrue} title='Connection settings'>
🔌
</button>
</div>

View File

@ -1,5 +1,5 @@
import { useMemo } from "preact/hooks";
import { MessageTools } from "../../messages";
import { MessageTools } from "../../tools/messages";
import styles from './message.module.css';

View File

@ -1,7 +1,7 @@
import { useCallback, useContext, useEffect, useMemo, useRef, useState } from "preact/hooks";
import { MessageTools, type IMessage } from "../../messages";
import { MessageTools, type IMessage } from "../../tools/messages";
import { StateContext } from "../../contexts/state";
import { DOMTools } from "../../dom";
import { DOMTools } from "../../tools/dom";
import styles from './message.module.css';
import { AutoTextarea } from "../autoTextarea";

View File

@ -1,7 +1,7 @@
import { MessageTools, type IMessage } from "../../messages"
import { MessageTools, type IMessage } from "../../tools/messages"
import { useCallback, useContext, useEffect, useMemo, useRef, useState } from "preact/hooks";
import { Modal } from "@common/components/modal/modal";
import { DOMTools } from "../../dom";
import { DOMTools } from "../../tools/dom";
import styles from './minichat.module.css';
import { LLMContext } from "../../contexts/llm";

View File

@ -1,13 +1,13 @@
import { createContext } from "preact";
import { useCallback, useContext, useEffect, useMemo, useState } from "preact/hooks";
import { MessageTools, type IMessage } from "../messages";
import { MessageTools, type IMessage } from "../tools/messages";
import { StateContext } from "./state";
import { useBool } from "@common/hooks/useBool";
import { Template } from "@huggingface/jinja";
import { Huggingface } from "../huggingface";
import { approximateTokens, Connection, normalizeModel, type IGenerationSettings } from "../connection";
import { Huggingface } from "../tools/huggingface";
import { Connection, type IGenerationSettings } from "../tools/connection";
import { throttle } from "@common/utils";
import { useAsyncEffect } from "@common/hooks/useAsyncEffect";
import { approximateTokens, normalizeModel } from "../tools/model";
interface ICompileArgs {
keepUsers?: number;
@ -58,15 +58,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
const [modelName, setModelName] = useState('');
const [hasToolCalls, setHasToolCalls] = useState(false);
const userPromptTemplate = useMemo(() => {
try {
return new Template(userPrompt)
} catch {
return {
render: () => userPrompt,
}
}
}, [userPrompt]);
const isOnline = useMemo(() => contextLength > 0, [contextLength]);
const actions: IActions = useMemo(() => ({
compilePrompt: async (messages, { keepUsers, continueLast = false } = {}) => {
@ -86,7 +78,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
const promptMessages = continueLast ? messages.slice(0, -1) : messages.slice();
if (isContinue) {
promptMessages.push(MessageTools.create(userPromptTemplate.render({})));
promptMessages.push(MessageTools.create(Huggingface.applyTemplate(userPrompt, {})));
}
const userMessages = promptMessages.filter(m => m.role === 'user');
@ -113,7 +105,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
} else if (role === 'user' && !message.technical) {
templateMessages.push({
role: message.role,
content: userPromptTemplate.render({ prompt: content, isStart: !wasStory }),
content: Huggingface.applyTemplate(userPrompt, { prompt: content, isStart: !wasStory }),
});
} else {
if (role === 'assistant') {
@ -137,17 +129,17 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
if (story.length > 0) {
const prompt = MessageTools.getSwipe(firstUserMessage)?.content;
templateMessages.push({ role: 'user', content: userPromptTemplate.render({ prompt, isStart: true }) });
templateMessages.push({ role: 'user', content: Huggingface.applyTemplate(userPrompt, { prompt, isStart: true }) });
templateMessages.push({ role: 'assistant', content: story });
}
let userPrompt = MessageTools.getSwipe(lastUserMessage)?.content;
if (!lastUserMessage?.technical && !isContinue && userPrompt) {
userPrompt = userPromptTemplate.render({ prompt: userPrompt, isStart: story.length === 0 });
let userMessage = MessageTools.getSwipe(lastUserMessage)?.content;
if (!lastUserMessage?.technical && !isContinue && userMessage) {
userMessage = Huggingface.applyTemplate(userPrompt, { prompt: userMessage, isStart: story.length === 0 });
}
if (userPrompt) {
templateMessages.push({ role: 'user', content: userPrompt });
if (userMessage) {
templateMessages.push({ role: 'user', content: userMessage });
}
}
@ -156,7 +148,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
templateMessages.splice(1, 0, {
role: 'user',
content: userPromptTemplate.render({ prompt, isStart: true }),
content: Huggingface.applyTemplate(userPrompt, { prompt, isStart: true }),
});
}
@ -210,10 +202,10 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
stopGeneration: () => {
Connection.stopGeneration();
},
}), [connection, lore, userPromptTemplate, systemPrompt, bannedWords, summarizePrompt]);
}), [connection, lore, userPrompt, systemPrompt, bannedWords, summarizePrompt]);
useAsyncEffect(async () => {
if (triggerNext && !generating.value) {
if (isOnline && triggerNext && !generating.value) {
setTriggerNext(false);
setContinueLast(false);
@ -244,10 +236,10 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
MessageTools.playReady();
}
}, [triggerNext]);
}, [triggerNext, isOnline]);
useAsyncEffect(async () => {
if (summaryEnabled && !processing.summarizing) {
if (isOnline && summaryEnabled && !processing.summarizing) {
try {
processing.summarizing = true;
for (let id = 0; id < messages.length; id++) {
@ -264,7 +256,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
processing.summarizing = false;
}
}
}, [messages, summaryEnabled]);
}, [messages, summaryEnabled, isOnline]);
useEffect(throttle(() => {
Connection.getContextLength(connection).then(setContextLength);
@ -272,7 +264,7 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
}, 1000, true), [connection]);
const calculateTokens = useCallback(throttle(async () => {
if (!processing.tokenizing && !generating.value) {
if (isOnline && !processing.tokenizing && !generating.value) {
try {
processing.tokenizing = true;
const { prompt } = await actions.compilePrompt(messages);
@ -284,11 +276,11 @@ export const LLMContextProvider = ({ children }: { children?: any }) => {
processing.tokenizing = false;
}
}
}, 1000, true), [actions, messages]);
}, 1000, true), [actions, messages, isOnline]);
useEffect(() => {
calculateTokens();
}, [messages, connection, systemPrompt, lore, userPrompt]);
}, [messages, connection, systemPrompt, lore, userPrompt, isOnline]);
useEffect(() => {
try {

View File

@ -1,8 +1,8 @@
import { createContext } from "preact";
import { useCallback, useEffect, useMemo, useState } from "preact/hooks";
import { MessageTools, type IMessage } from "../messages";
import { MessageTools, type IMessage } from "../tools/messages";
import { useInputState } from "@common/hooks/useInputState";
import { type IConnection } from "../connection";
import { type IConnection } from "../tools/connection";
interface IContext {
currentConnection: number;
@ -83,7 +83,7 @@ Continue the story forward.
{%- endif %}
{% if prompt -%}
This is the description of What should happen next in your answer: {{ prompt | trim }}
This is the description of what should happen next in your answer: {{ prompt | trim }}
{% endif %}
Remember that this story should be infinite and go forever.
Make sure to follow the world description and rules exactly. Avoid cliffhangers and pauses, be creative.`,

View File

@ -2,6 +2,8 @@ import Lock from "@common/lock";
import SSE from "@common/sse";
import { throttle } from "@common/utils";
import delay from "delay";
import { Huggingface } from "./huggingface";
import { approximateTokens, normalizeModel } from "./model";
interface IBaseConnection {
instruct: string;
@ -79,34 +81,6 @@ const MAX_HORDE_LENGTH = 512;
const MAX_HORDE_CONTEXT = 32000;
export const HORDE_ANON_KEY = '0000000000';
export const normalizeModel = (model: string) => {
let currentModel = model.split(/[\\\/]/).at(-1);
currentModel = currentModel.split('::').at(0);
let normalizedModel: string;
do {
normalizedModel = currentModel;
currentModel = currentModel
.replace(/[ ._-]\d+(k$|-context)/i, '') // remove context length, i.e. -32k
.replace(/[ ._-](gptq|awq|exl2?|imat|i\d|h\d)/i, '') // remove quant name
.replace(/([ ._-]?gg(uf|ml)[ ._-]?(v[ ._-]?\d)?)/i, '') // remove gguf-v3/ggml/etc
.replace(/[ ._-]i?q([ ._-]?\d[ ._-]?(k?[ ._-]?x*[ ._-]?[lms]?)?)+/i, '') // remove quant size
.replace(/[ ._-]\d+(\.\d+)?bpw/i, '') // remove bpw
.replace(/[ ._-]f(p|loat)?(8|16|32)/i, '')
.replace(/^(debug-?)+/i, '')
.trim();
} while (normalizedModel !== currentModel);
return normalizedModel
.replace(/[ _-]+/ig, '-')
.replace(/\.{2,}/, '-')
.replace(/[ ._-]+$/ig, '')
.trim();
}
export const approximateTokens = (prompt: string): number => Math.round(prompt.length / 4);
export type IGenerationSettings = Partial<typeof DEFAULT_GENERATION_SETTINGS>;
export namespace Connection {
@ -171,7 +145,11 @@ export namespace Connection {
sse.close();
}
async function generateHorde(connection: Omit<IHordeConnection, keyof IBaseConnection>, prompt: string, extraSettings: IGenerationSettings = {}): Promise<string> {
async function* generateHorde(connection: IHordeConnection, prompt: string, extraSettings: IGenerationSettings = {}): AsyncGenerator<string> {
if (!connection.model) {
throw new Error('Horde not connected');
}
const models = await getHordeModels();
const model = models.get(connection.model);
if (model) {
@ -192,9 +170,11 @@ export namespace Connection {
models: model.hordeNames,
workers: model.workers,
};
const bannedTokens = requestData.params.banned_tokens ?? [];
const { signal } = abortController;
while (true) {
const generateResponse = await fetch(`${AIHORDE}/api/v2/generate/text/async`, {
method: 'POST',
body: JSON.stringify(requestData),
@ -227,19 +207,41 @@ export namespace Connection {
};
const deleteRequest = async () => (await request('DELETE')) ?? '';
let text: string | null = null;
while (true) {
while (!text) {
try {
await delay(2500, { signal });
const text = await request();
text = await request();
if (text) {
return text;
const locaseText = text.toLowerCase();
let unsloppedText = text;
for (const ban of bannedTokens) {
const slopIdx = locaseText.indexOf(ban.toLowerCase());
if (slopIdx >= 0) {
console.log(`[horde] slop '${ban}' detected at ${slopIdx}`);
unsloppedText = unsloppedText.slice(0, slopIdx);
}
}
yield unsloppedText;
requestData.prompt += unsloppedText;
if (unsloppedText === text) {
return; // we are finished
}
if (unsloppedText.length === 0) {
requestData.params.temperature += 0.05;
}
}
} catch (e) {
console.error('Error in horde generation:', e);
return deleteRequest();
return yield deleteRequest();
}
}
}
}
@ -251,7 +253,7 @@ export namespace Connection {
if (isKoboldConnection(connection)) {
yield* generateKobold(connection.url, prompt, extraSettings);
} else if (isHordeConnection(connection)) {
yield await generateHorde(connection, prompt, extraSettings);
yield* generateHorde(connection, prompt, extraSettings);
}
}
@ -277,7 +279,7 @@ export namespace Connection {
for (const worker of goodWorkers) {
for (const modelName of worker.models) {
const normName = normalizeModel(modelName.toLowerCase());
const normName = normalizeModel(modelName);
let model = models.get(normName);
if (!model) {
model = {
@ -343,7 +345,7 @@ export namespace Connection {
} catch (e) {
console.error('Error getting max tokens', e);
}
} else if (isHordeConnection(connection)) {
} else if (isHordeConnection(connection) && connection.model) {
const models = await getHordeModels();
const model = models.get(connection.model);
if (model) {
@ -367,7 +369,18 @@ export namespace Connection {
return value;
}
} catch (e) {
console.error('Error counting tokens', e);
console.error('Error counting tokens:', e);
}
} else {
const model = await getModelName(connection);
const tokenizer = await Huggingface.findTokenizer(model);
if (tokenizer) {
try {
const { input_ids } = await tokenizer(prompt);
return input_ids.data.length;
} catch (e) {
console.error('Error counting tokens with tokenizer:', e);
}
}
}

View File

@ -1,7 +1,8 @@
import { gguf } from '@huggingface/gguf';
import * as hub from '@huggingface/hub';
import { Template } from '@huggingface/jinja';
import { normalizeModel } from './connection';
import { AutoTokenizer, PreTrainedTokenizer } from '@huggingface/transformers';
import { normalizeModel } from './model';
export namespace Huggingface {
export interface ITemplateMessage {
@ -81,6 +82,7 @@ export namespace Huggingface {
const templateCache: Record<string, string> = loadCache();
const compiledTemplates = new Map<string, Template>();
const tokenizerCache = new Map<string, PreTrainedTokenizer | null>();
const hasField = <T extends string>(obj: unknown, field: T): obj is Record<T, unknown> => (
obj != null && typeof obj === 'object' && (field in obj)
@ -92,13 +94,13 @@ export namespace Huggingface {
);
const loadHuggingfaceTokenizerConfig = async (modelName: string): Promise<TokenizerConfig | null> => {
modelName = normalizeModel(modelName);
console.log(`[huggingface] searching config for '${modelName}'`);
const searchModel = normalizeModel(modelName);
const hubModels = await Array.fromAsync(hub.listModels({ search: { query: searchModel }, additionalFields: ['config'] }));
const hubModels = await Array.fromAsync(hub.listModels({ search: { query: modelName }, additionalFields: ['config'] }));
const models = hubModels.filter(m => {
if (m.gated) return false;
if (!normalizeModel(m.name).includes(searchModel)) return false;
if (!normalizeModel(m.name).includes(modelName)) return false;
return true;
}).sort((a, b) => b.downloads - a.downloads);
@ -116,8 +118,8 @@ export namespace Huggingface {
}
try {
console.log(`[huggingface] searching config in '${model.name}/tokenizer_config.json'`);
const fileResponse = await hub.downloadFile({ repo: model.name, path: 'tokenizer_config.json' });
console.log(`[huggingface] searching config in '${name}/tokenizer_config.json'`);
const fileResponse = await hub.downloadFile({ repo: name, path: 'tokenizer_config.json' });
if (fileResponse?.ok) {
const maybeConfig = await fileResponse.json();
if (isTokenizerConfig(maybeConfig)) {
@ -232,10 +234,10 @@ export namespace Huggingface {
}
export const findModelTemplate = async (modelName: string): Promise<string | null> => {
const modelKey = modelName.toLowerCase().trim();
if (!modelKey) return '';
modelName = normalizeModel(modelName);
if (!modelName) return '';
let template = templateCache[modelKey] ?? null;
let template = templateCache[modelName] ?? null;
if (template) {
console.log(`[huggingface] found cached template for '${modelName}'`);
@ -254,12 +256,53 @@ export namespace Huggingface {
}
}
templateCache[modelKey] = template;
templateCache[modelName] = template;
saveCache(templateCache);
return template;
}
export const findTokenizer = async (modelName: string): Promise<PreTrainedTokenizer | null> => {
modelName = normalizeModel(modelName);
let tokenizer = tokenizerCache.get(modelName) ?? null;
if (tokenizer) {
return tokenizer;
} else if (!tokenizerCache.has(modelName)) {
console.log(`[huggingface] searching tokenizer for '${modelName}'`);
const hubModels = await Array.fromAsync(hub.listModels({ search: { query: modelName } }));
const models = hubModels.filter(m => {
if (m.gated) return false;
if (m.name.toLowerCase().includes('gguf')) return false;
if (!normalizeModel(m.name).includes(modelName)) return false;
return true;
});
for (const model of models) {
const { name } = model;
try {
console.log(`[huggingface] searching tokenizer in '${name}'`);
tokenizer = await AutoTokenizer.from_pretrained(name);
break;
} catch { }
}
}
tokenizerCache.set(modelName, tokenizer);
if (tokenizer) {
console.log(`[huggingface] found tokenizer for '${modelName}'`);
} else {
console.log(`[huggingface] not found tokenizer for '${modelName}'`);
}
return tokenizer;
}
export const applyChatTemplate = (templateString: string, messages: ITemplateMessage[], functions?: IFunction[]) => (
applyTemplate(templateString, {
messages,

View File

@ -1,5 +1,4 @@
import { Template } from "@huggingface/jinja";
import messageSound from './assets/message.mp3';
import messageSound from '../assets/message.mp3';
export interface ISwipe {
content: string;

View File

@ -0,0 +1,27 @@
export const normalizeModel = (model: string) => {
let currentModel = model.split(/[\\\/]/).at(-1);
currentModel = currentModel.split('::').at(0).toLowerCase();
let normalizedModel: string;
do {
normalizedModel = currentModel;
currentModel = currentModel
.replace(/[ ._-]\d+(k$|-context)/i, '') // remove context length, i.e. -32k
.replace(/[ ._-](gptq|awq|exl2?|imat|i\d|h\d)/i, '') // remove quant name
.replace(/([ ._-]?gg(uf|ml)[ ._-]?(v[ ._-]?\d)?)/i, '') // remove gguf-v3/ggml/etc
.replace(/[ ._-]i?q([ ._-]?\d[ ._-]?(k?[ ._-]?x*[ ._-]?[lms]?)?)+/i, '') // remove quant size
.replace(/[ ._-]\d+(\.\d+)?bpw/i, '') // remove bpw
.replace(/[ ._-]f(p|loat)?(8|16|32)/i, '')
.replace(/^(debug-?)+/i, '')
.trim();
} while (normalizedModel !== currentModel);
return normalizedModel
.replace(/[ _-]+/ig, '-')
.replace(/\.{2,}/, '-')
.replace(/[ ._-]+$/ig, '')
.trim();
}
export const approximateTokens = (prompt: string): number => Math.round(prompt.length / 4);