Refactor the api to be compatible with openrouter
This commit is contained in:
parent
36099bc078
commit
602abb1175
|
|
@ -128,14 +128,14 @@ export const ChatPanel = ({ visible }: { visible: boolean }) => {
|
||||||
model: model.id,
|
model: model.id,
|
||||||
input: chatRequest?.messages ?? [],
|
input: chatRequest?.messages ?? [],
|
||||||
tools: chatRequest?.tools,
|
tools: chatRequest?.tools,
|
||||||
enable_thinking: chatRequest?.enable_thinking,
|
reasoning: chatRequest?.reasoning,
|
||||||
};
|
};
|
||||||
|
|
||||||
const response = await LLM.countTokens(connection, countRequest);
|
const response = await LLM.countTokens(connection, countRequest);
|
||||||
|
|
||||||
setTokenCount({
|
setTokenCount({
|
||||||
taken: response.input_tokens,
|
taken: response.input_tokens,
|
||||||
total: model.max_context ?? response.input_tokens,
|
total: model.context_length ?? response.input_tokens,
|
||||||
});
|
});
|
||||||
} catch {
|
} catch {
|
||||||
setTokenCount(null);
|
setTokenCount(null);
|
||||||
|
|
|
||||||
|
|
@ -41,15 +41,13 @@ export const ConnectionSettings = () => {
|
||||||
|
|
||||||
const groupedModels = useMemo(() => {
|
const groupedModels = useMemo(() => {
|
||||||
const sorted = (modelsData ?? []).sort((a, b) => {
|
const sorted = (modelsData ?? []).sort((a, b) => {
|
||||||
const aWeight = Number(a.support_tools) * 2 + Number(a.support_thinking);
|
const aWeight = Number(a.supported_parameters.includes('tools')) * 2 + Number(a.supported_parameters.includes('reasoning'));
|
||||||
const bWeight = Number(b.support_tools) * 2 + Number(b.support_thinking);
|
const bWeight = Number(b.supported_parameters.includes('tools')) * 2 + Number(b.supported_parameters.includes('reasoning'));
|
||||||
if (aWeight !== bWeight) return bWeight - aWeight;
|
if (aWeight !== bWeight) return bWeight - aWeight;
|
||||||
const aContext = a.max_context ?? 0;
|
if (a.context_length !== b.context_length) return b.context_length - a.context_length;
|
||||||
const bContext = b.max_context ?? 0;
|
|
||||||
if (aContext !== bContext) return bContext - aContext;
|
|
||||||
return a.id.localeCompare(b.id);
|
return a.id.localeCompare(b.id);
|
||||||
});
|
});
|
||||||
const groups = Map.groupBy(sorted, m => m.max_context ?? 0);
|
const groups = Map.groupBy(sorted, m => m.context_length);
|
||||||
return Array.from(groups.entries())
|
return Array.from(groups.entries())
|
||||||
.sort((a, b) => b[0] - a[0])
|
.sort((a, b) => b[0] - a[0])
|
||||||
.map(([context, models]) => ({ context, models }));
|
.map(([context, models]) => ({ context, models }));
|
||||||
|
|
@ -161,7 +159,7 @@ export const ConnectionSettings = () => {
|
||||||
<optgroup key={context} label={`${context} context`}>
|
<optgroup key={context} label={`${context} context`}>
|
||||||
{models.map(m => (
|
{models.map(m => (
|
||||||
<option key={m.id} value={m.id}>
|
<option key={m.id} value={m.id}>
|
||||||
{m.support_tools ? '🔨' : ''}{m.support_thinking ? '🧠' : ''}{m.id} {m.max_length ? `(len: ${m.max_length})` : ''}
|
{m.supported_parameters.includes('tools') ? '🔨' : ''}{m.supported_parameters.includes('reasoning') ? '🧠' : ''}{m.id} {m.top_provider.max_completion_tokens ? `(len: ${m.top_provider.max_completion_tokens})` : ''}
|
||||||
</option>
|
</option>
|
||||||
))}
|
))}
|
||||||
</optgroup>
|
</optgroup>
|
||||||
|
|
|
||||||
|
|
@ -54,14 +54,23 @@ namespace LLM {
|
||||||
model: string;
|
model: string;
|
||||||
messages: ChatMessage[];
|
messages: ChatMessage[];
|
||||||
tools?: Tool[];
|
tools?: Tool[];
|
||||||
|
tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } };
|
||||||
|
parallel_tool_calls?: boolean;
|
||||||
temperature?: number;
|
temperature?: number;
|
||||||
max_tokens?: number;
|
max_tokens?: number;
|
||||||
|
max_completion_tokens?: number;
|
||||||
stop?: string | string[];
|
stop?: string | string[];
|
||||||
banned_tokens?: string[];
|
banned_tokens?: string[];
|
||||||
top_p?: number;
|
top_p?: number;
|
||||||
|
top_k?: number;
|
||||||
|
min_p?: number;
|
||||||
frequency_penalty?: number;
|
frequency_penalty?: number;
|
||||||
presence_penalty?: number;
|
repetition_penalty?: number;
|
||||||
enable_thinking?: boolean;
|
reasoning?: {
|
||||||
|
effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';
|
||||||
|
exclude?: boolean;
|
||||||
|
max_tokens?: number;
|
||||||
|
};
|
||||||
add_generation_prompt?: boolean;
|
add_generation_prompt?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -109,11 +118,13 @@ namespace LLM {
|
||||||
object: 'model';
|
object: 'model';
|
||||||
created: number;
|
created: number;
|
||||||
owned_by: string;
|
owned_by: string;
|
||||||
support_tools: boolean;
|
context_length: number;
|
||||||
support_infill: boolean;
|
supported_parameters: string[];
|
||||||
support_thinking: boolean;
|
top_provider: {
|
||||||
max_context?: number;
|
context_length: number;
|
||||||
max_length?: number;
|
max_completion_tokens: number;
|
||||||
|
is_moderated: boolean;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ModelsResponse {
|
export interface ModelsResponse {
|
||||||
|
|
@ -132,7 +143,11 @@ namespace LLM {
|
||||||
input: LLM.ChatMessage[];
|
input: LLM.ChatMessage[];
|
||||||
tools?: LLM.Tool[];
|
tools?: LLM.Tool[];
|
||||||
add_generation_prompt?: boolean;
|
add_generation_prompt?: boolean;
|
||||||
enable_thinking?: boolean;
|
reasoning?: {
|
||||||
|
effort?: string;
|
||||||
|
exclude?: boolean;
|
||||||
|
max_tokens?: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export type CountTokensRequest = CountTokensRequestString | CountTokensRequestMessages;
|
export type CountTokensRequest = CountTokensRequestString | CountTokensRequestMessages;
|
||||||
|
|
@ -190,7 +205,7 @@ namespace LLM {
|
||||||
});
|
});
|
||||||
|
|
||||||
let closed = false;
|
let closed = false;
|
||||||
const handleEnd = (e?: unknown) => {
|
const handleEnd = (_e?: unknown) => {
|
||||||
if (closed) return;
|
if (closed) return;
|
||||||
closed = true;
|
closed = true;
|
||||||
controller.close();
|
controller.close();
|
||||||
|
|
|
||||||
|
|
@ -395,21 +395,19 @@ namespace Prompt {
|
||||||
return {
|
return {
|
||||||
model: model.id,
|
model: model.id,
|
||||||
messages: applyVars(formattedMessages),
|
messages: applyVars(formattedMessages),
|
||||||
enable_thinking: false,
|
max_tokens: model.top_provider.max_completion_tokens || 2048,
|
||||||
max_tokens: model.max_length ? model.max_length : 2048,
|
|
||||||
add_generation_prompt: true,
|
|
||||||
banned_tokens: state.bannedTokens,
|
banned_tokens: state.bannedTokens,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Estimate token budget for story text
|
// Estimate token budget for story text
|
||||||
let storyTokenBudget = 0;
|
let storyTokenBudget = 0;
|
||||||
if (model.max_context) {
|
if (model.context_length) {
|
||||||
const nonStorySystem = formatSystemPrompt(state, 0);
|
const nonStorySystem = formatSystemPrompt(state, 0);
|
||||||
const chatText = messages.map(m => m.content).join('\n');
|
const chatText = messages.map(m => m.content).join('\n');
|
||||||
const maxOutput = model.max_length ?? 2048;
|
const maxOutput = model.top_provider.max_completion_tokens || 2048;
|
||||||
const otherTokens = approxTokens(nonStorySystem) + approxTokens(chatText) + maxOutput;
|
const otherTokens = approxTokens(nonStorySystem) + approxTokens(chatText) + maxOutput;
|
||||||
storyTokenBudget = model.max_context - otherTokens;
|
storyTokenBudget = model.context_length - otherTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
messages.unshift({
|
messages.unshift({
|
||||||
|
|
@ -423,8 +421,8 @@ namespace Prompt {
|
||||||
messages: applyVars(messages),
|
messages: applyVars(messages),
|
||||||
tools: Tools.getTools(),
|
tools: Tools.getTools(),
|
||||||
banned_tokens: state.bannedTokens,
|
banned_tokens: state.bannedTokens,
|
||||||
enable_thinking: enableThinking,
|
reasoning: { effort: enableThinking ? 'high' : 'none' },
|
||||||
max_tokens: model.max_length ? model.max_length : 2048,
|
max_tokens: model.top_provider.max_completion_tokens || 2048,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue