feat: add multimodal image marker support with Ollama vision
This commit is contained in:
parent
63aacb09ff
commit
dcd0bf641d
21 changed files with 1152 additions and 78 deletions
|
|
@ -37,6 +37,28 @@ impl LinqChannel {
|
|||
&self.from_phone
|
||||
}
|
||||
|
||||
fn media_part_to_image_marker(part: &serde_json::Value) -> Option<String> {
|
||||
let source = part
|
||||
.get("url")
|
||||
.or_else(|| part.get("value"))
|
||||
.and_then(|value| value.as_str())
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())?;
|
||||
|
||||
let mime_type = part
|
||||
.get("mime_type")
|
||||
.and_then(|value| value.as_str())
|
||||
.map(str::trim)
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
if !mime_type.starts_with("image/") {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(format!("[IMAGE:{source}]"))
|
||||
}
|
||||
|
||||
/// Parse an incoming webhook payload from Linq and extract messages.
|
||||
///
|
||||
/// Linq webhook envelope:
|
||||
|
|
@ -124,25 +146,36 @@ impl LinqChannel {
|
|||
return messages;
|
||||
};
|
||||
|
||||
let text_parts: Vec<&str> = parts
|
||||
let content_parts: Vec<String> = parts
|
||||
.iter()
|
||||
.filter_map(|part| {
|
||||
let part_type = part.get("type").and_then(|t| t.as_str())?;
|
||||
if part_type == "text" {
|
||||
part.get("value").and_then(|v| v.as_str())
|
||||
} else {
|
||||
// Skip media parts for now
|
||||
tracing::debug!("Linq: skipping {part_type} part");
|
||||
None
|
||||
match part_type {
|
||||
"text" => part
|
||||
.get("value")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(ToString::to_string),
|
||||
"media" | "image" => {
|
||||
if let Some(marker) = Self::media_part_to_image_marker(part) {
|
||||
Some(marker)
|
||||
} else {
|
||||
tracing::debug!("Linq: skipping unsupported {part_type} part");
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
tracing::debug!("Linq: skipping {part_type} part");
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if text_parts.is_empty() {
|
||||
if content_parts.is_empty() {
|
||||
return messages;
|
||||
}
|
||||
|
||||
let content = text_parts.join("\n");
|
||||
let content = content_parts.join("\n").trim().to_string();
|
||||
|
||||
if content.is_empty() {
|
||||
return messages;
|
||||
|
|
@ -496,7 +529,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn linq_parse_media_only_skipped() {
|
||||
fn linq_parse_media_only_translated_to_image_marker() {
|
||||
let ch = LinqChannel::new("tok".into(), "+15551234567".into(), vec!["*".into()]);
|
||||
let payload = serde_json::json!({
|
||||
"event_type": "message.received",
|
||||
|
|
@ -516,7 +549,32 @@ mod tests {
|
|||
});
|
||||
|
||||
let msgs = ch.parse_webhook_payload(&payload);
|
||||
assert!(msgs.is_empty(), "Media-only messages should be skipped");
|
||||
assert_eq!(msgs.len(), 1);
|
||||
assert_eq!(msgs[0].content, "[IMAGE:https://example.com/image.jpg]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linq_parse_media_non_image_still_skipped() {
|
||||
let ch = LinqChannel::new("tok".into(), "+15551234567".into(), vec!["*".into()]);
|
||||
let payload = serde_json::json!({
|
||||
"event_type": "message.received",
|
||||
"data": {
|
||||
"chat_id": "chat-789",
|
||||
"from": "+1234567890",
|
||||
"is_from_me": false,
|
||||
"message": {
|
||||
"id": "msg-abc",
|
||||
"parts": [{
|
||||
"type": "media",
|
||||
"url": "https://example.com/sound.mp3",
|
||||
"mime_type": "audio/mpeg"
|
||||
}]
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let msgs = ch.parse_webhook_payload(&payload);
|
||||
assert!(msgs.is_empty(), "Non-image media should still be skipped");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -139,6 +139,7 @@ struct ChannelRuntimeContext {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions,
|
||||
workspace_dir: Arc<PathBuf>,
|
||||
message_timeout_secs: u64,
|
||||
multimodal: crate::config::MultimodalConfig,
|
||||
}
|
||||
|
||||
fn conversation_memory_key(msg: &traits::ChannelMessage) -> String {
|
||||
|
|
@ -810,6 +811,7 @@ async fn process_channel_message(ctx: Arc<ChannelRuntimeContext>, msg: traits::C
|
|||
true,
|
||||
None,
|
||||
msg.channel.as_str(),
|
||||
&ctx.multimodal,
|
||||
ctx.max_tool_iterations,
|
||||
delta_tx,
|
||||
),
|
||||
|
|
@ -2062,6 +2064,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
|
|||
provider_runtime_options,
|
||||
workspace_dir: Arc::new(config.workspace_dir.clone()),
|
||||
message_timeout_secs,
|
||||
multimodal: config.multimodal.clone(),
|
||||
});
|
||||
|
||||
run_message_dispatch_loop(rx, runtime_ctx, max_in_flight_messages).await;
|
||||
|
|
@ -2559,6 +2562,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -2613,6 +2617,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -2676,6 +2681,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -2760,6 +2766,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -2820,6 +2827,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -2875,6 +2883,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -2981,6 +2990,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
let (tx, rx) = tokio::sync::mpsc::channel::<traits::ChannelMessage>(4);
|
||||
|
|
@ -3054,6 +3064,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
@ -3451,6 +3462,7 @@ mod tests {
|
|||
provider_runtime_options: providers::ProviderRuntimeOptions::default(),
|
||||
workspace_dir: Arc::new(std::env::temp_dir()),
|
||||
message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS,
|
||||
multimodal: crate::config::MultimodalConfig::default(),
|
||||
});
|
||||
|
||||
process_channel_message(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue