feat: add multimodal image marker support with Ollama vision

This commit is contained in:
Chummy 2026-02-19 20:24:56 +08:00
parent 63aacb09ff
commit dcd0bf641d
21 changed files with 1152 additions and 78 deletions

View file

@ -9,11 +9,11 @@ pub use schema::{
DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, EmbeddingRouteConfig, GatewayConfig,
HardwareConfig, HardwareTransport, HeartbeatConfig, HttpRequestConfig, IMessageConfig,
IdentityConfig, LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig,
PeripheralBoardConfig, PeripheralsConfig, ProxyConfig, ProxyScope, QueryClassificationConfig,
ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig,
SchedulerConfig, SecretsConfig, SecurityConfig, SlackConfig, StorageConfig,
StorageProviderConfig, StorageProviderSection, StreamMode, TelegramConfig, TunnelConfig,
WebSearchConfig, WebhookConfig,
MultimodalConfig, ObservabilityConfig, PeripheralBoardConfig, PeripheralsConfig, ProxyConfig,
ProxyScope, QueryClassificationConfig, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig,
SandboxBackend, SandboxConfig, SchedulerConfig, SecretsConfig, SecurityConfig, SlackConfig,
StorageConfig, StorageProviderConfig, StorageProviderSection, StreamMode, TelegramConfig,
TunnelConfig, WebSearchConfig, WebhookConfig,
};
#[cfg(test)]

View file

@ -124,6 +124,9 @@ pub struct Config {
#[serde(default)]
pub http_request: HttpRequestConfig,
#[serde(default)]
pub multimodal: MultimodalConfig,
#[serde(default)]
pub web_search: WebSearchConfig,
@ -284,6 +287,46 @@ impl Default for AgentConfig {
}
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct MultimodalConfig {
/// Maximum number of image attachments accepted per request.
#[serde(default = "default_multimodal_max_images")]
pub max_images: usize,
/// Maximum image payload size in MiB before base64 encoding.
#[serde(default = "default_multimodal_max_image_size_mb")]
pub max_image_size_mb: usize,
/// Allow fetching remote image URLs (http/https). Disabled by default.
#[serde(default)]
pub allow_remote_fetch: bool,
}
fn default_multimodal_max_images() -> usize {
4
}
fn default_multimodal_max_image_size_mb() -> usize {
5
}
impl MultimodalConfig {
/// Clamp configured values to safe runtime bounds.
pub fn effective_limits(&self) -> (usize, usize) {
let max_images = self.max_images.clamp(1, 16);
let max_image_size_mb = self.max_image_size_mb.clamp(1, 20);
(max_images, max_image_size_mb)
}
}
impl Default for MultimodalConfig {
fn default() -> Self {
Self {
max_images: default_multimodal_max_images(),
max_image_size_mb: default_multimodal_max_image_size_mb(),
allow_remote_fetch: false,
}
}
}
// ── Identity (AIEOS / OpenClaw format) ──────────────────────────
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
@ -2534,6 +2577,7 @@ impl Default for Config {
secrets: SecretsConfig::default(),
browser: BrowserConfig::default(),
http_request: HttpRequestConfig::default(),
multimodal: MultimodalConfig::default(),
web_search: WebSearchConfig::default(),
proxy: ProxyConfig::default(),
identity: IdentityConfig::default(),
@ -3502,6 +3546,7 @@ default_temperature = 0.7
secrets: SecretsConfig::default(),
browser: BrowserConfig::default(),
http_request: HttpRequestConfig::default(),
multimodal: MultimodalConfig::default(),
web_search: WebSearchConfig::default(),
proxy: ProxyConfig::default(),
agent: AgentConfig::default(),
@ -3656,6 +3701,7 @@ tool_dispatcher = "xml"
secrets: SecretsConfig::default(),
browser: BrowserConfig::default(),
http_request: HttpRequestConfig::default(),
multimodal: MultimodalConfig::default(),
web_search: WebSearchConfig::default(),
proxy: ProxyConfig::default(),
agent: AgentConfig::default(),