From 4585bb82420c97ec0b5a39f7d94fd2bc0fd73138 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Mon, 11 Nov 2024 01:28:42 +0200 Subject: [PATCH 01/22] update whisper.cpp --- .gitmodules | 2 +- sys/whisper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0863136..1561ba4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "sys/whisper.cpp"] path = sys/whisper.cpp - url = https://github.com/ggerganov/whisper.cpp + url = https://github.com/thewh1teagle/whisper.cpp diff --git a/sys/whisper.cpp b/sys/whisper.cpp index 0377596..4cb3f81 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit 0377596b77a3602e36430320cbe45f8c305ef04a +Subproject commit 4cb3f81b377f38149678351b9326d83152904bfb From 828ac50ca031442252be7d90216cd12bd1387c65 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:10:42 +0200 Subject: [PATCH 02/22] Update whisper.cpp --- sys/whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/whisper.cpp b/sys/whisper.cpp index 4cb3f81..ca99645 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit 4cb3f81b377f38149678351b9326d83152904bfb +Subproject commit ca99645e20f45a7904771192122fc79a9871a109 From fab85025be2fc90c7fbf03aad80cb0747160de09 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:25:22 +0200 Subject: [PATCH 03/22] update whisper.cpp to 7fd8d9 --- src/standalone.rs | 4 ---- sys/whisper.cpp | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/standalone.rs b/src/standalone.rs index 7319126..2b0d439 100644 --- a/src/standalone.rs +++ b/src/standalone.rs @@ -105,8 +105,6 @@ pub struct SystemInfo { pub avx2: bool, pub fma: bool, pub f16c: bool, - pub blas: bool, - pub cuda: bool, } impl Default for SystemInfo { @@ -117,8 +115,6 @@ impl Default for SystemInfo { avx2: whisper_rs_sys::ggml_cpu_has_avx2() != 0, fma: whisper_rs_sys::ggml_cpu_has_fma() != 0, f16c: whisper_rs_sys::ggml_cpu_has_f16c() != 0, - blas: whisper_rs_sys::ggml_cpu_has_blas() != 0, - cuda: whisper_rs_sys::ggml_cpu_has_cuda() != 0, } } } diff --git a/sys/whisper.cpp b/sys/whisper.cpp index 0377596..8c6a9b8 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit 0377596b77a3602e36430320cbe45f8c305ef04a +Subproject commit 8c6a9b8bb6a0273cc0b5915903ca1ff9206c6285 From 6b4cd34f0db5a3b9c562d351cbe205f33fe4980d Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:30:13 +0200 Subject: [PATCH 04/22] update whisper.cpp --- sys/whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/whisper.cpp b/sys/whisper.cpp index 8c6a9b8..ca99645 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit 8c6a9b8bb6a0273cc0b5915903ca1ff9206c6285 +Subproject commit ca99645e20f45a7904771192122fc79a9871a109 From 783a7858dbf14cc8d7af454190089f3ff6ad378b Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:46:08 +0200 Subject: [PATCH 05/22] fix: link ggml --- sys/build.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index ef72d84..b69f211 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -253,6 +253,9 @@ fn main() { println!("cargo:rustc-link-search=native={}", destination.display()); println!("cargo:rustc-link-lib=static=whisper"); println!("cargo:rustc-link-lib=static=ggml"); + println!("cargo:rustc-link-lib=static=ggml-base"); + println!("cargo:rustc-link-lib=static=ggml-cpu"); + println!("cargo:rustc-link-lib=static=ggml-blas"); println!( "cargo:WHISPER_CPP_VERSION={}", From 029c1c424b4686219d3de56ec35467f218603ba8 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:53:39 +0200 Subject: [PATCH 06/22] fix: link metal --- sys/build.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index b69f211..a45cb74 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -257,6 +257,10 @@ fn main() { println!("cargo:rustc-link-lib=static=ggml-cpu"); println!("cargo:rustc-link-lib=static=ggml-blas"); + if cfg!(feature = "metal") { + println!("cargo:rustc-link-lib=static=ggml-metal"); + } + println!( "cargo:WHISPER_CPP_VERSION={}", get_whisper_cpp_version(&whisper_root) From ea9d6bc8bbd47c1e093e7e38164a6029d763f95b Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Wed, 27 Nov 2024 19:40:26 +0200 Subject: [PATCH 07/22] fix vulkan link --- sys/build.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sys/build.rs b/sys/build.rs index a45cb74..01f5b6d 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -255,7 +255,13 @@ fn main() { println!("cargo:rustc-link-lib=static=ggml"); println!("cargo:rustc-link-lib=static=ggml-base"); println!("cargo:rustc-link-lib=static=ggml-cpu"); - println!("cargo:rustc-link-lib=static=ggml-blas"); + if cfg!(target_os = "macos") { + println!("cargo:rustc-link-lib=static=ggml-blas"); + } + if cfg!(feature = "vulkan") { + println!("cargo:rustc-link-lib=static=ggml-vulkan"); + } + if cfg!(feature = "metal") { println!("cargo:rustc-link-lib=static=ggml-metal"); From f8a886144f331b32a9838b3bd0a1a7f3ed9e28e5 Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Tue, 28 Jan 2025 15:06:27 +0100 Subject: [PATCH 08/22] Update submodule URL for whisper.cpp --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 1561ba4..0863136 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "sys/whisper.cpp"] path = sys/whisper.cpp - url = https://github.com/thewh1teagle/whisper.cpp + url = https://github.com/ggerganov/whisper.cpp From 90058a9068b49d4a34c8acba3892b4ae1189cfd6 Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Tue, 28 Jan 2025 15:16:46 +0100 Subject: [PATCH 09/22] Update submodule commit for whisper.cpp --- sys/whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/whisper.cpp b/sys/whisper.cpp index ca99645..8c6a9b8 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit ca99645e20f45a7904771192122fc79a9871a109 +Subproject commit 8c6a9b8bb6a0273cc0b5915903ca1ff9206c6285 From 251a9e3c1d3b0101c69a89c80bef605ae995cb56 Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Tue, 28 Jan 2025 15:17:34 +0100 Subject: [PATCH 10/22] Remove unnecessary blank line in build.rs --- sys/build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/build.rs b/sys/build.rs index 01f5b6d..8181d24 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -261,7 +261,6 @@ fn main() { if cfg!(feature = "vulkan") { println!("cargo:rustc-link-lib=static=ggml-vulkan"); } - if cfg!(feature = "metal") { println!("cargo:rustc-link-lib=static=ggml-metal"); From 39cf1f4d4059acb86360ed53d423c751c02e214e Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Tue, 28 Jan 2025 15:23:59 +0100 Subject: [PATCH 11/22] Remove deprecated method for suppressing non-speech tokens in FullParams --- src/whisper_params.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/whisper_params.rs b/src/whisper_params.rs index d412bf1..34b4adc 100644 --- a/src/whisper_params.rs +++ b/src/whisper_params.rs @@ -299,15 +299,6 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.suppress_blank = suppress_blank; } - /// Set suppress_non_speech_tokens. - /// See - /// for more information. - /// - /// Defaults to false. - pub fn set_suppress_non_speech_tokens(&mut self, suppress_non_speech_tokens: bool) { - self.fp.suppress_non_speech_tokens = suppress_non_speech_tokens; - } - /// Set initial decoding temperature. /// See for more information. /// From f9fe2acf952226b5abdd78af409f17683c13b506 Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Tue, 28 Jan 2025 15:24:04 +0100 Subject: [PATCH 12/22] Update submodule commit for whisper.cpp --- sys/whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/whisper.cpp b/sys/whisper.cpp index 8c6a9b8..7ffcd05 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit 8c6a9b8bb6a0273cc0b5915903ca1ff9206c6285 +Subproject commit 7ffcd05267b7ff47a271961c5eec11d1a15effe8 From 1db502bc1a3d9f3264abccb7b3c7773cca24193a Mon Sep 17 00:00:00 2001 From: Niko Date: Mon, 10 Feb 2025 14:02:19 -0700 Subject: [PATCH 13/22] Make logging generic across backends and simplify the code --- Cargo.toml | 12 ++++-- src/common_logging.rs | 73 +++++++++++++++++++++++++++++++++++++ src/ggml_logging_hook.rs | 73 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 38 ++++++++++++------- src/whisper_logging_hook.rs | 73 +++++++++++++++++++++++++++++++++++++ src/whisper_sys_log.rs | 42 --------------------- src/whisper_sys_tracing.rs | 42 --------------------- 7 files changed, 253 insertions(+), 100 deletions(-) create mode 100644 src/common_logging.rs create mode 100644 src/ggml_logging_hook.rs create mode 100644 src/whisper_logging_hook.rs delete mode 100644 src/whisper_sys_log.rs delete mode 100644 src/whisper_sys_tracing.rs diff --git a/Cargo.toml b/Cargo.toml index 5ea8246..7fa2765 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,8 +32,14 @@ hipblas = ["whisper-rs-sys/hipblas", "_gpu"] openblas = ["whisper-rs-sys/openblas"] metal = ["whisper-rs-sys/metal", "_gpu"] vulkan = ["whisper-rs-sys/vulkan", "_gpu"] +openmp = ["whisper-rs-sys/openmp"] _gpu = [] test-with-tiny-model = [] -whisper-cpp-log = ["dep:log"] -whisper-cpp-tracing = ["dep:tracing"] -openmp = ["whisper-rs-sys/openmp"] + +# Bring logs into Rust via the log crate. *Warning*: not mutually exclusive with tracing_backend, +# will result in duplicate logs if both are enabled and one consumes logs from the other. +log_backend = ["dep:log"] + +# Bring logs into Rust via the tracing crate. *Warning*: not mutually exclusive with log_backend, +# will result in duplicate logs if both are enabled and one consumes logs from the other. +tracing_backend = ["dep:tracing"] diff --git a/src/common_logging.rs b/src/common_logging.rs new file mode 100644 index 0000000..a944906 --- /dev/null +++ b/src/common_logging.rs @@ -0,0 +1,73 @@ +macro_rules! generic_error { + ($($expr:tt)*) => { + #[cfg(feature = "log_backend")] + log::error!($($expr)*); + #[cfg(feature = "tracing_backend")] + tracing::error!($($expr)*); + }; +} + +macro_rules! generic_warn { + ($($expr:tt)*) => { + #[cfg(feature = "log_backend")] + log::warn!($($expr)*); + #[cfg(feature = "tracing_backend")] + tracing::warn!($($expr)*); + } +} + +macro_rules! generic_info { + ($($expr:tt)*) => { + #[cfg(feature = "log_backend")] + log::info!($($expr)*); + #[cfg(feature = "tracing_backend")] + tracing::info!($($expr)*); + } +} + +macro_rules! generic_debug { + ($($expr:tt)*) => { + #[cfg(feature = "log_backend")] + log::debug!($($expr)*); + #[cfg(feature = "tracing_backend")] + tracing::debug!($($expr)*); + } +} + +macro_rules! generic_trace { + ($($expr:tt)*) => { + #[cfg(feature = "log_backend")] + log::trace!($($expr)*); + #[cfg(feature = "tracing_backend")] + tracing::trace!($($expr)*); + } +} + +use whisper_rs_sys::ggml_log_level; +pub(crate) use {generic_debug, generic_error, generic_info, generic_trace, generic_warn}; + +// Unsigned integer type on most platforms is 32 bit, niche platforms that whisper.cpp +// likely doesn't even support would use 16 bit and would still fit +#[repr(u32)] +pub(crate) enum GGMLLogLevel { + None = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_NONE, + Info = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_INFO, + Warn = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_WARN, + Error = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_ERROR, + Debug = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_DEBUG, + Cont = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_CONT, + Unknown(ggml_log_level), +} +impl From for GGMLLogLevel { + fn from(level: ggml_log_level) -> Self { + match level { + whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_NONE => GGMLLogLevel::None, + whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_INFO => GGMLLogLevel::Info, + whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_WARN => GGMLLogLevel::Warn, + whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_ERROR => GGMLLogLevel::Error, + whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_DEBUG => GGMLLogLevel::Debug, + whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_CONT => GGMLLogLevel::Cont, + other => GGMLLogLevel::Unknown(other), + } + } +} diff --git a/src/ggml_logging_hook.rs b/src/ggml_logging_hook.rs new file mode 100644 index 0000000..d230ba4 --- /dev/null +++ b/src/ggml_logging_hook.rs @@ -0,0 +1,73 @@ +use crate::common_logging::{ + generic_debug, generic_error, generic_info, generic_trace, generic_warn, GGMLLogLevel, +}; +use core::ffi::{c_char, c_void}; +use std::borrow::Cow; +use std::ffi::CStr; +use std::sync::Once; +use whisper_rs_sys::ggml_log_level; + +static GGML_LOG_TRAMPOLINE_INSTALL: Once = Once::new(); +pub(crate) fn install_ggml_logging_hook() { + GGML_LOG_TRAMPOLINE_INSTALL.call_once(|| unsafe { + whisper_rs_sys::ggml_log_set(Some(ggml_logging_trampoline), std::ptr::null_mut()) + }); +} + +unsafe extern "C" fn ggml_logging_trampoline( + level: ggml_log_level, + text: *const c_char, + _: *mut c_void, // user_data +) { + if text.is_null() { + generic_error!("ggml_logging_trampoline: text is nullptr"); + } + let level = GGMLLogLevel::from(level); + + // SAFETY: we must trust ggml that it will not pass us a string that does not satisfy + // from_ptr's requirements. + let log_str = unsafe { CStr::from_ptr(text) }.to_string_lossy(); + + ggml_logging_trampoline_safe(level, log_str) +} + +// this code essentially compiles down to a noop if neither feature is enabled +#[cfg_attr( + not(any(feature = "log_backend", feature = "tracing_backend")), + allow(unused_variables) +)] +fn ggml_logging_trampoline_safe(level: GGMLLogLevel, text: Cow) { + match level { + GGMLLogLevel::None => { + // no clue what to do here, trace it? + generic_trace!("{}", text.trim()); + } + GGMLLogLevel::Info => { + generic_info!("{}", text.trim()); + } + GGMLLogLevel::Warn => { + generic_warn!("{}", text.trim()); + } + GGMLLogLevel::Error => { + generic_error!("{}", text.trim()); + } + GGMLLogLevel::Debug => { + generic_debug!("{}", text.trim()); + } + GGMLLogLevel::Cont => { + // this means continue previous log + // storing state to do this is a massive pain so it's just a lot easier to not + // plus as far as i can tell it's not actually *used* anywhere + // ggml splits at 128 chars and doesn't actually change the kind of log + // so technically this is unused + generic_trace!("{}", text.trim()); + } + GGMLLogLevel::Unknown(level) => { + generic_warn!( + "ggml_logging_trampoline: unknown log level {}: message: {}", + level, + text.trim() + ); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 35214e7..893f069 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,26 +1,20 @@ #![allow(clippy::uninlined_format_args)] #![cfg_attr(test, feature(test))] +mod common_logging; mod error; +mod ggml_logging_hook; mod standalone; mod utilities; mod whisper_ctx; mod whisper_ctx_wrapper; mod whisper_grammar; +mod whisper_logging_hook; mod whisper_params; mod whisper_state; -#[cfg(feature = "whisper-cpp-log")] -mod whisper_sys_log; -#[cfg(feature = "whisper-cpp-tracing")] -mod whisper_sys_tracing; - -#[cfg(any(feature = "whisper-cpp-log", feature = "whisper-cpp-tracing"))] -static LOG_TRAMPOLINE_INSTALL: Once = Once::new(); pub use error::WhisperError; pub use standalone::*; -#[cfg(any(feature = "whisper-cpp-log", feature = "whisper-cpp-tracing"))] -use std::sync::Once; pub use utilities::*; pub use whisper_ctx::DtwMode; pub use whisper_ctx::DtwModelPreset; @@ -33,10 +27,6 @@ pub use whisper_params::{FullParams, SamplingStrategy, SegmentCallbackData}; #[cfg(feature = "raw-api")] pub use whisper_rs_sys; pub use whisper_state::WhisperState; -#[cfg(feature = "whisper-cpp-log")] -pub use whisper_sys_log::install_whisper_log_trampoline; -#[cfg(feature = "whisper-cpp-tracing")] -pub use whisper_sys_tracing::install_whisper_tracing_trampoline; pub type WhisperSysContext = whisper_rs_sys::whisper_context; pub type WhisperSysState = whisper_rs_sys::whisper_state; @@ -53,3 +43,25 @@ pub type DtwAhead = whisper_rs_sys::whisper_ahead; /// The version of whisper.cpp that whisper-rs was linked with. pub static WHISPER_CPP_VERSION: &str = env!("WHISPER_CPP_VERSION"); + +/// Redirect all whisper.cpp and GGML logs to logging hooks installed by whisper-rs. +/// +/// This will stop most logs from being output to stdout/stderr and will bring them into +/// `log` or `tracing`, if the `log_backend` or `tracing_backend` features, respectively, +/// are enabled. If neither is enabled, this will essentially disable logging, as they won't +/// be output anywhere. +/// +/// Note whisper.cpp and GGML do not reliably follow Rust logging conventions. +/// Use your logging crate's configuration to control how these logs will be output. +/// whisper-rs does not currently output any logs, but this may change in the future. +/// You should configure by module path and use `whisper_rs::ggml_logging_hook`, +/// and/or `whisper_rs::whisper_logging_hook`, to avoid possibly ignoring useful +/// `whisper-rs` logs in the future. +/// +/// Safe to call multiple times. Only has an effect the first time. +/// (note this means installing your own logging handlers with unsafe functions after this call +/// is permanent and cannot be undone) +pub fn install_logging_hooks() { + crate::whisper_logging_hook::install_whisper_logging_hook(); + crate::ggml_logging_hook::install_ggml_logging_hook(); +} diff --git a/src/whisper_logging_hook.rs b/src/whisper_logging_hook.rs new file mode 100644 index 0000000..6017e96 --- /dev/null +++ b/src/whisper_logging_hook.rs @@ -0,0 +1,73 @@ +use crate::common_logging::{ + generic_debug, generic_error, generic_info, generic_trace, generic_warn, GGMLLogLevel, +}; +use core::ffi::{c_char, c_void}; +use std::borrow::Cow; +use std::ffi::CStr; +use std::sync::Once; +use whisper_rs_sys::ggml_log_level; + +static WHISPER_LOG_TRAMPOLINE_INSTALL: Once = Once::new(); +pub(crate) fn install_whisper_logging_hook() { + WHISPER_LOG_TRAMPOLINE_INSTALL.call_once(|| unsafe { + whisper_rs_sys::whisper_log_set(Some(whisper_logging_trampoline), std::ptr::null_mut()) + }); +} + +unsafe extern "C" fn whisper_logging_trampoline( + level: ggml_log_level, + text: *const c_char, + _: *mut c_void, // user_data +) { + if text.is_null() { + generic_error!("whisper_logging_trampoline: text is nullptr"); + } + let level = GGMLLogLevel::from(level); + + // SAFETY: we must trust whisper.cpp that it will not pass us a string that does not satisfy + // from_ptr's requirements. + let log_str = unsafe { CStr::from_ptr(text) }.to_string_lossy(); + + whisper_logging_trampoline_safe(level, log_str) +} + +// this code essentially compiles down to a noop if neither feature is enabled +#[cfg_attr( + not(any(feature = "log_backend", feature = "tracing_backend")), + allow(unused_variables) +)] +fn whisper_logging_trampoline_safe(level: GGMLLogLevel, text: Cow) { + match level { + GGMLLogLevel::None => { + // no clue what to do here, trace it? + generic_trace!("{}", text.trim()); + } + GGMLLogLevel::Info => { + generic_info!("{}", text.trim()); + } + GGMLLogLevel::Warn => { + generic_warn!("{}", text.trim()); + } + GGMLLogLevel::Error => { + generic_error!("{}", text.trim()); + } + GGMLLogLevel::Debug => { + generic_debug!("{}", text.trim()); + } + GGMLLogLevel::Cont => { + // this means continue previous log + // storing state to do this is a massive pain so it's just a lot easier to not + // plus as far as i can tell it's not actually *used* anywhere + // whisper splits at 1024 chars and doesn't actually change the kind + // so technically this is unused + generic_trace!("{}", text.trim()); + } + GGMLLogLevel::Unknown(level) => { + generic_warn!( + "whisper_logging_trampoline: unknown log level {}: message: {}", + level, + text.trim() + ); + } + } +} diff --git a/src/whisper_sys_log.rs b/src/whisper_sys_log.rs deleted file mode 100644 index c056007..0000000 --- a/src/whisper_sys_log.rs +++ /dev/null @@ -1,42 +0,0 @@ -use log::{debug, error, info, warn}; -use whisper_rs_sys::ggml_log_level; - -unsafe extern "C" fn whisper_cpp_log_trampoline( - level: ggml_log_level, - text: *const std::os::raw::c_char, - _: *mut std::os::raw::c_void, // user_data -) { - if text.is_null() { - error!("whisper_cpp_log_trampoline: text is nullptr"); - } - - // SAFETY: we must trust whisper.cpp that it will not pass us a string that does not satisfy - // from_ptr's requirements. - let log_str = unsafe { std::ffi::CStr::from_ptr(text) }.to_string_lossy(); - // whisper.cpp gives newlines at the end of its log messages, so we trim them - let trimmed = log_str.trim(); - - match level { - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_DEBUG => debug!("{}", trimmed), - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_INFO => info!("{}", trimmed), - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_WARN => warn!("{}", trimmed), - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_ERROR => error!("{}", trimmed), - _ => { - warn!( - "whisper_cpp_log_trampoline: unknown log level {}: message: {}", - level, trimmed - ) - } - } -} - -/// Shortcut utility to redirect all whisper.cpp logging to the `log` crate. -/// -/// Filter for logs from the `whisper-rs` crate to see all log output from whisper.cpp. -/// -/// You should only call this once (subsequent calls have no ill effect). -pub fn install_whisper_log_trampoline() { - crate::LOG_TRAMPOLINE_INSTALL.call_once(|| unsafe { - whisper_rs_sys::whisper_log_set(Some(whisper_cpp_log_trampoline), std::ptr::null_mut()); - }); -} diff --git a/src/whisper_sys_tracing.rs b/src/whisper_sys_tracing.rs deleted file mode 100644 index be1a394..0000000 --- a/src/whisper_sys_tracing.rs +++ /dev/null @@ -1,42 +0,0 @@ -use tracing::{debug, error, info, warn}; -use whisper_rs_sys::ggml_log_level; - -unsafe extern "C" fn whisper_cpp_tracing_trampoline( - level: ggml_log_level, - text: *const std::os::raw::c_char, - _: *mut std::os::raw::c_void, // user_data -) { - if text.is_null() { - error!("whisper_cpp_tracing_trampoline: text is nullptr"); - } - - // SAFETY: we must trust whisper.cpp that it will not pass us a string that does not satisfy - // from_ptr's requirements. - let log_str = unsafe { std::ffi::CStr::from_ptr(text) }.to_string_lossy(); - // whisper.cpp gives newlines at the end of its log messages, so we trim them - let trimmed = log_str.trim(); - - match level { - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_DEBUG => debug!("{}", trimmed), - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_INFO => info!("{}", trimmed), - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_WARN => warn!("{}", trimmed), - whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_ERROR => error!("{}", trimmed), - _ => { - warn!( - "whisper_cpp_tracing_trampoline: unknown log level {}: message: {}", - level, trimmed - ) - } - } -} - -/// Shortcut utility to redirect all whisper.cpp logging to the `tracing` crate. -/// -/// Filter for logs from the `whisper-rs` crate to see all log output from whisper.cpp. -/// -/// You should only call this once (subsequent calls have no effect). -pub fn install_whisper_tracing_trampoline() { - crate::LOG_TRAMPOLINE_INSTALL.call_once(|| unsafe { - whisper_rs_sys::whisper_log_set(Some(whisper_cpp_tracing_trampoline), std::ptr::null_mut()); - }); -} From e989dead0f1eacaa0e7cf03edcf151b6b3aa4d88 Mon Sep 17 00:00:00 2001 From: Niko Date: Mon, 10 Feb 2025 14:05:36 -0700 Subject: [PATCH 14/22] Fix borked CI thanks to feature updates --- .github/workflows/pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 43a8abf..0a7085e 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -73,4 +73,4 @@ jobs: toolchain: ${{ matrix.rust-version }} - name: Check build - run: cargo build -F whisper-cpp-log,whisper-cpp-tracing --verbose --examples + run: cargo build -F log_backend,tracing_backend --verbose --examples From 03351b5f9c315e8495df2c75113edc91f274eb1c Mon Sep 17 00:00:00 2001 From: Niko Date: Mon, 10 Feb 2025 14:14:20 -0700 Subject: [PATCH 15/22] Fix Windows being special again --- src/common_logging.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common_logging.rs b/src/common_logging.rs index a944906..e3cffee 100644 --- a/src/common_logging.rs +++ b/src/common_logging.rs @@ -48,7 +48,10 @@ pub(crate) use {generic_debug, generic_error, generic_info, generic_trace, gener // Unsigned integer type on most platforms is 32 bit, niche platforms that whisper.cpp // likely doesn't even support would use 16 bit and would still fit -#[repr(u32)] +#[cfg_attr(any(not(windows), target_env = "gnu"), repr(u32))] +// Of course Windows thinks it's a special little shit and +// picks a signed integer for an unsigned type +#[cfg_attr(all(windows, not(target_env = "gnu")), repr(i32))] pub(crate) enum GGMLLogLevel { None = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_NONE, Info = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_INFO, From b752731e8344d9ecfa5923da024f448d6a7c12da Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Thu, 13 Feb 2025 18:04:52 +0100 Subject: [PATCH 16/22] Re-add suppress_nst function with shorter fn name --- src/whisper_params.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/whisper_params.rs b/src/whisper_params.rs index 34b4adc..36a709e 100644 --- a/src/whisper_params.rs +++ b/src/whisper_params.rs @@ -299,6 +299,15 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.suppress_blank = suppress_blank; } + /// Set suppress_non_speech_tokens. + /// See + /// for more information. + /// + /// Defaults to false. + pub fn set_suppress_nst(&mut self, suppress_nst: bool) { + self.fp.suppress_nst = suppress_nst; + } + /// Set initial decoding temperature. /// See for more information. /// From aaaa1ed36198f3ec61df9d5e2271c8420bcb921e Mon Sep 17 00:00:00 2001 From: Christian Meter Date: Thu, 13 Feb 2025 18:09:09 +0100 Subject: [PATCH 17/22] Pin whisper.cpp to v1.7.4 --- sys/whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/whisper.cpp b/sys/whisper.cpp index 7ffcd05..8a9ad78 160000 --- a/sys/whisper.cpp +++ b/sys/whisper.cpp @@ -1 +1 @@ -Subproject commit 7ffcd05267b7ff47a271961c5eec11d1a15effe8 +Subproject commit 8a9ad7844d6e2a10cddf4b92de4089d7ac2b14a9 From 3d33a734581d8cc8bb018489622398bac4384603 Mon Sep 17 00:00:00 2001 From: hyiip Date: Wed, 19 Feb 2025 02:55:46 +0800 Subject: [PATCH 18/22] link cuda when cuda feature is on --- sys/build.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index 8181d24..ecea43e 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -265,6 +265,10 @@ fn main() { if cfg!(feature = "metal") { println!("cargo:rustc-link-lib=static=ggml-metal"); } + + if cfg!(feature = "cuda") { + println!("cargo:rustc-link-lib=static=ggml-cuda"); + } println!( "cargo:WHISPER_CPP_VERSION={}", From c48629c172f54fa38e4d34b23b1660d8900a38c4 Mon Sep 17 00:00:00 2001 From: hyiip Date: Wed, 19 Feb 2025 03:11:51 +0800 Subject: [PATCH 19/22] fmt fixed --- sys/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/build.rs b/sys/build.rs index ecea43e..8fd47a1 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -265,7 +265,7 @@ fn main() { if cfg!(feature = "metal") { println!("cargo:rustc-link-lib=static=ggml-metal"); } - + if cfg!(feature = "cuda") { println!("cargo:rustc-link-lib=static=ggml-cuda"); } From 099faf4e2ee08ec6b2fb936907208a8ac0641f30 Mon Sep 17 00:00:00 2001 From: Niko Date: Tue, 18 Feb 2025 17:03:13 -0700 Subject: [PATCH 20/22] Convert `full_get_token_*` and similar to use an internal helper instead of duplicating code --- src/whisper_state.rs | 87 +++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/src/whisper_state.rs b/src/whisper_state.rs index c82dd18..4a9c254 100644 --- a/src/whisper_state.rs +++ b/src/whisper_state.rs @@ -425,22 +425,7 @@ impl WhisperState { Ok(unsafe { whisper_rs_sys::whisper_full_n_tokens_from_state(self.ptr, segment) }) } - /// Get the token text of the specified token in the specified segment. - /// - /// # Arguments - /// * segment: Segment index. - /// * token: Token index. - /// - /// # Returns - /// Ok(String) on success, Err(WhisperError) on failure. - /// - /// # C++ equivalent - /// `const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token)` - pub fn full_get_token_text( - &self, - segment: c_int, - token: c_int, - ) -> Result { + fn full_get_token_raw(&self, segment: c_int, token: c_int) -> Result<&CStr, WhisperError> { let ret = unsafe { whisper_rs_sys::whisper_full_get_token_text_from_state( self.ctx.ctx, @@ -452,9 +437,53 @@ impl WhisperState { if ret.is_null() { return Err(WhisperError::NullPointer); } - let c_str = unsafe { CStr::from_ptr(ret) }; - let r_str = c_str.to_str()?; - Ok(r_str.to_string()) + unsafe { Ok(CStr::from_ptr(ret)) } + } + + /// Get the raw token bytes of the specified token in the specified segment. + /// + /// Useful if you're using a language for which whisper is known to split tokens + /// away from UTF-8 character boundaries. + /// + /// # Arguments + /// * segment: Segment index. + /// * token: Token index. + /// + /// # Returns + /// `Ok(Vec)` on success, with the returned bytes or + /// `Err(WhisperError::NullPointer)` on failure (this is the only possible error) + /// + /// # C++ equivalent + /// `const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token)` + pub fn full_get_token_bytes( + &self, + segment: c_int, + token: c_int, + ) -> Result, WhisperError> { + Ok(self.full_get_token_raw(segment, token)?.to_bytes().to_vec()) + } + + /// Get the token text of the specified token in the specified segment. + /// + /// # Arguments + /// * segment: Segment index. + /// * token: Token index. + /// + /// # Returns + /// `Ok(String)` on success, with the UTF-8 validated string, or + /// `Err(WhisperError)` on failure (either `NullPointer` or `InvalidUtf8`) + /// + /// # C++ equivalent + /// `const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token)` + pub fn full_get_token_text( + &self, + segment: c_int, + token: c_int, + ) -> Result { + Ok(self + .full_get_token_raw(segment, token)? + .to_str()? + .to_string()) } /// Get the token text of the specified token in the specified segment. @@ -467,7 +496,8 @@ impl WhisperState { /// * token: Token index. /// /// # Returns - /// Ok(String) on success, Err(WhisperError) on failure. + /// `Ok(String)` on success, or + /// `Err(WhisperError::NullPointer)` on failure (this is the only possible error) /// /// # C++ equivalent /// `const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token)` @@ -476,19 +506,10 @@ impl WhisperState { segment: c_int, token: c_int, ) -> Result { - let ret = unsafe { - whisper_rs_sys::whisper_full_get_token_text_from_state( - self.ctx.ctx, - self.ptr, - segment, - token, - ) - }; - if ret.is_null() { - return Err(WhisperError::NullPointer); - } - let c_str = unsafe { CStr::from_ptr(ret) }; - Ok(c_str.to_string_lossy().to_string()) + Ok(self + .full_get_token_raw(segment, token)? + .to_string_lossy() + .to_string()) } /// Get the token ID of the specified token in the specified segment. From 9a96b0e1c3e73fd3774b9a7e044ef2ed4a17292d Mon Sep 17 00:00:00 2001 From: Niko Date: Tue, 18 Feb 2025 17:25:26 -0700 Subject: [PATCH 21/22] Convert `full_get_segment_*` and similar to use an internal helper instead of duplicating code --- src/whisper_state.rs | 70 +++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/src/whisper_state.rs b/src/whisper_state.rs index 4a9c254..22418ee 100644 --- a/src/whisper_state.rs +++ b/src/whisper_state.rs @@ -346,25 +346,43 @@ impl WhisperState { Ok(unsafe { whisper_rs_sys::whisper_full_get_segment_t1_from_state(self.ptr, segment) }) } + fn full_get_segment_raw(&self, segment: c_int) -> Result<&CStr, WhisperError> { + let ret = + unsafe { whisper_rs_sys::whisper_full_get_segment_text_from_state(self.ptr, segment) }; + if ret.is_null() { + return Err(WhisperError::NullPointer); + } + unsafe { Ok(CStr::from_ptr(ret)) } + } + + /// Get the raw bytes of the specified segment. + /// + /// # Arguments + /// * segment: Segment index. + /// + /// # Returns + /// `Ok(Vec)` on success, with the returned bytes or + /// `Err(WhisperError::NullPointer)` on failure (this is the only possible error) + /// + /// # C++ equivalent + /// `const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment)` + pub fn full_get_segment_bytes(&self, segment: c_int) -> Result, WhisperError> { + Ok(self.full_get_segment_raw(segment)?.to_bytes().to_vec()) + } + /// Get the text of the specified segment. /// /// # Arguments /// * segment: Segment index. /// /// # Returns - /// Ok(String) on success, Err(WhisperError) on failure. + /// `Ok(String)` on success, with the UTF-8 validated string, or + /// `Err(WhisperError)` on failure (either `NullPointer` or `InvalidUtf8`) /// /// # C++ equivalent /// `const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment)` pub fn full_get_segment_text(&self, segment: c_int) -> Result { - let ret = - unsafe { whisper_rs_sys::whisper_full_get_segment_text_from_state(self.ptr, segment) }; - if ret.is_null() { - return Err(WhisperError::NullPointer); - } - let c_str = unsafe { CStr::from_ptr(ret) }; - let r_str = c_str.to_str()?; - Ok(r_str.to_string()) + Ok(self.full_get_segment_raw(segment)?.to_str()?.to_string()) } /// Get the text of the specified segment. @@ -376,38 +394,16 @@ impl WhisperState { /// * segment: Segment index. /// /// # Returns - /// Ok(String) on success, Err(WhisperError) on failure. + /// `Ok(String)` on success, or + /// `Err(WhisperError::NullPointer)` on failure (this is the only possible error) /// /// # C++ equivalent /// `const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment)` pub fn full_get_segment_text_lossy(&self, segment: c_int) -> Result { - let ret = - unsafe { whisper_rs_sys::whisper_full_get_segment_text_from_state(self.ptr, segment) }; - if ret.is_null() { - return Err(WhisperError::NullPointer); - } - let c_str = unsafe { CStr::from_ptr(ret) }; - Ok(c_str.to_string_lossy().to_string()) - } - - /// Get the bytes of the specified segment. - /// - /// # Arguments - /// * segment: Segment index. - /// - /// # Returns - /// `Ok(Vec)` on success, `Err(WhisperError)` on failure. - /// - /// # C++ equivalent - /// `const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment)` - pub fn full_get_segment_bytes(&self, segment: c_int) -> Result, WhisperError> { - let ret = - unsafe { whisper_rs_sys::whisper_full_get_segment_text_from_state(self.ptr, segment) }; - if ret.is_null() { - return Err(WhisperError::NullPointer); - } - let c_str = unsafe { CStr::from_ptr(ret) }; - Ok(c_str.to_bytes().to_vec()) + Ok(self + .full_get_segment_raw(segment)? + .to_string_lossy() + .to_string()) } /// Get number of tokens in the specified segment. From 54799643e26023710e66fbe6a0fd73e650dff91b Mon Sep 17 00:00:00 2001 From: Martin Mende Date: Fri, 21 Feb 2025 23:23:21 +0100 Subject: [PATCH 22/22] Exposing GGMLLogLevel for custom log callbacks (#203) * Made GGMLLogLevel public --- src/common_logging.rs | 2 +- src/lib.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common_logging.rs b/src/common_logging.rs index e3cffee..bf192ce 100644 --- a/src/common_logging.rs +++ b/src/common_logging.rs @@ -52,7 +52,7 @@ pub(crate) use {generic_debug, generic_error, generic_info, generic_trace, gener // Of course Windows thinks it's a special little shit and // picks a signed integer for an unsigned type #[cfg_attr(all(windows, not(target_env = "gnu")), repr(i32))] -pub(crate) enum GGMLLogLevel { +pub enum GGMLLogLevel { None = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_NONE, Info = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_INFO, Warn = whisper_rs_sys::ggml_log_level_GGML_LOG_LEVEL_WARN, diff --git a/src/lib.rs b/src/lib.rs index 893f069..a6632a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ mod whisper_logging_hook; mod whisper_params; mod whisper_state; +pub use common_logging::GGMLLogLevel; pub use error::WhisperError; pub use standalone::*; pub use utilities::*;