feat: add multimodal image marker support with Ollama vision

This commit is contained in:
Chummy 2026-02-19 20:24:56 +08:00
parent 63aacb09ff
commit dcd0bf641d
21 changed files with 1152 additions and 78 deletions

View file

@ -37,6 +37,28 @@ impl LinqChannel {
&self.from_phone
}
fn media_part_to_image_marker(part: &serde_json::Value) -> Option<String> {
let source = part
.get("url")
.or_else(|| part.get("value"))
.and_then(|value| value.as_str())
.map(str::trim)
.filter(|value| !value.is_empty())?;
let mime_type = part
.get("mime_type")
.and_then(|value| value.as_str())
.map(str::trim)
.unwrap_or_default()
.to_ascii_lowercase();
if !mime_type.starts_with("image/") {
return None;
}
Some(format!("[IMAGE:{source}]"))
}
/// Parse an incoming webhook payload from Linq and extract messages.
///
/// Linq webhook envelope:
@ -124,25 +146,36 @@ impl LinqChannel {
return messages;
};
let text_parts: Vec<&str> = parts
let content_parts: Vec<String> = parts
.iter()
.filter_map(|part| {
let part_type = part.get("type").and_then(|t| t.as_str())?;
if part_type == "text" {
part.get("value").and_then(|v| v.as_str())
} else {
// Skip media parts for now
tracing::debug!("Linq: skipping {part_type} part");
None
match part_type {
"text" => part
.get("value")
.and_then(|v| v.as_str())
.map(ToString::to_string),
"media" | "image" => {
if let Some(marker) = Self::media_part_to_image_marker(part) {
Some(marker)
} else {
tracing::debug!("Linq: skipping unsupported {part_type} part");
None
}
}
_ => {
tracing::debug!("Linq: skipping {part_type} part");
None
}
}
})
.collect();
if text_parts.is_empty() {
if content_parts.is_empty() {
return messages;
}
let content = text_parts.join("\n");
let content = content_parts.join("\n").trim().to_string();
if content.is_empty() {
return messages;
@ -496,7 +529,7 @@ mod tests {
}
#[test]
fn linq_parse_media_only_skipped() {
fn linq_parse_media_only_translated_to_image_marker() {
let ch = LinqChannel::new("tok".into(), "+15551234567".into(), vec!["*".into()]);
let payload = serde_json::json!({
"event_type": "message.received",
@ -516,7 +549,32 @@ mod tests {
});
let msgs = ch.parse_webhook_payload(&payload);
assert!(msgs.is_empty(), "Media-only messages should be skipped");
assert_eq!(msgs.len(), 1);
assert_eq!(msgs[0].content, "[IMAGE:https://example.com/image.jpg]");
}
#[test]
fn linq_parse_media_non_image_still_skipped() {
let ch = LinqChannel::new("tok".into(), "+15551234567".into(), vec!["*".into()]);
let payload = serde_json::json!({
"event_type": "message.received",
"data": {
"chat_id": "chat-789",
"from": "+1234567890",
"is_from_me": false,
"message": {
"id": "msg-abc",
"parts": [{
"type": "media",
"url": "https://example.com/sound.mp3",
"mime_type": "audio/mpeg"
}]
}
}
});
let msgs = ch.parse_webhook_payload(&payload);
assert!(msgs.is_empty(), "Non-image media should still be skipped");
}
#[test]

View file

@ -139,6 +139,7 @@ struct ChannelRuntimeContext {
provider_runtime_options: providers::ProviderRuntimeOptions,
workspace_dir: Arc<PathBuf>,
message_timeout_secs: u64,
multimodal: crate::config::MultimodalConfig,
}
fn conversation_memory_key(msg: &traits::ChannelMessage) -> String {
@ -810,6 +811,7 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
true,
None,
msg.channel.as_str(),
&ctx.multimodal,
ctx.max_tool_iterations,
delta_tx,
),
@ -2062,6 +2064,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
provider_runtime_options,
workspace_dir: Arc::new(config.workspace_dir.clone()),
message_timeout_secs,
multimodal: config.multimodal.clone(),
});
run_message_dispatch_loop(rx, runtime_ctx, max_in_flight_messages).await;
@ -2559,6 +2562,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -2613,6 +2617,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -2676,6 +2681,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -2760,6 +2766,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -2820,6 +2827,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -2875,6 +2883,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -2981,6 +2990,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(4);
@ -3054,6 +3064,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(
@ -3451,6 +3462,7 @@ mod tests {
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
workspace_dir: Arc::new(std::env::temp_dir()),
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
multimodal: crate::config::MultimodalConfig::default(),
});
process_channel_message(