From 60784be43ab1aeea2d1ab54978ee9534a4799e42 Mon Sep 17 00:00:00 2001 From: Kusaanko <39370373+kusaanko@users.noreply.github.com> Date: Fri, 28 Feb 2025 01:00:12 +0900 Subject: [PATCH 1/5] Use UTF-8 to build sys on Windows --- sys/build.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index 8fd47a1..a1716f9 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -164,6 +164,10 @@ fn main() { .very_verbose(true) .pic(true); + if cfg!(target_os = "windows") { + config.cxxflag("/utf-8"); + } + if cfg!(feature = "coreml") { config.define("WHISPER_COREML", "ON"); config.define("WHISPER_COREML_ALLOW_FALLBACK", "1"); From dc4eace7e600df4f7d9560d6faa13fa05d6f8f5c Mon Sep 17 00:00:00 2001 From: Kusaanko <39370373+kusaanko@users.noreply.github.com> Date: Fri, 28 Feb 2025 18:45:59 +0900 Subject: [PATCH 2/5] Fix to be able to compile with openblas --- sys/build.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sys/build.rs b/sys/build.rs index 8fd47a1..171a1b2 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -214,6 +214,12 @@ fn main() { if cfg!(feature = "openblas") { config.define("GGML_BLAS", "ON"); + config.define("GGML_BLAS_VENDOR", "OpenBLAS"); + if env::var("BLAS_INCLUDE_DIRS").is_err() { + panic!("BLAS_INCLUDE_DIRS environment variable must be set when using OpenBLAS"); + } + config.define("BLAS_INCLUDE_DIRS", env::var("BLAS_INCLUDE_DIRS").unwrap()); + println!("cargo:rerun-if-env-changed=BLAS_INCLUDE_DIRS"); } if cfg!(feature = "metal") { @@ -255,7 +261,7 @@ fn main() { println!("cargo:rustc-link-lib=static=ggml"); println!("cargo:rustc-link-lib=static=ggml-base"); println!("cargo:rustc-link-lib=static=ggml-cpu"); - if cfg!(target_os = "macos") { + if cfg!(target_os = "macos") || cfg!(target_os = "openblas") { println!("cargo:rustc-link-lib=static=ggml-blas"); } if cfg!(feature = "vulkan") { From e3c289b6a4dbe166f05607a7b2ec28438008e90a Mon Sep 17 00:00:00 2001 From: Kusaanko <39370373+kusaanko@users.noreply.github.com> Date: Fri, 28 Feb 2025 18:51:28 +0900 Subject: [PATCH 3/5] Fix wrong cfg! for openblas --- sys/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/build.rs b/sys/build.rs index 171a1b2..bcf858a 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -261,7 +261,7 @@ fn main() { println!("cargo:rustc-link-lib=static=ggml"); println!("cargo:rustc-link-lib=static=ggml-base"); println!("cargo:rustc-link-lib=static=ggml-cpu"); - if cfg!(target_os = "macos") || cfg!(target_os = "openblas") { + if cfg!(target_os = "macos") || cfg!(feature = "openblas") { println!("cargo:rustc-link-lib=static=ggml-blas"); } if cfg!(feature = "vulkan") { From 278df9b843617ae4d595c83e2bfbdd26a25dfa52 Mon Sep 17 00:00:00 2001 From: newfla Date: Mon, 3 Mar 2025 16:53:36 +0100 Subject: [PATCH 4/5] Revert "fix: segment_callaback logic" This reverts commit 684ec8e733ccad412b52759b7d902aaf096b2b7a. --- src/whisper_params.rs | 56 ++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/whisper_params.rs b/src/whisper_params.rs index 1113791..1bb2808 100644 --- a/src/whisper_params.rs +++ b/src/whisper_params.rs @@ -419,20 +419,26 @@ impl<'a, 'b> FullParams<'a, 'b> { { unsafe { let user_data = &mut *(user_data as *mut SegmentCallbackFn); - let text = whisper_rs_sys::whisper_full_get_segment_text_from_state(state, n_new); - let text = CStr::from_ptr(text); + let n_segments = whisper_rs_sys::whisper_full_n_segments_from_state(state); + let s0 = n_segments - n_new; + //let user_data = user_data as *mut Box; - let t0 = whisper_rs_sys::whisper_full_get_segment_t0_from_state(state, n_new); - let t1 = whisper_rs_sys::whisper_full_get_segment_t1_from_state(state, n_new); + for i in s0..n_segments { + let text = whisper_rs_sys::whisper_full_get_segment_text_from_state(state, i); + let text = CStr::from_ptr(text); - match text.to_str() { - Ok(n) => user_data(SegmentCallbackData { - segment: n_new + 1, - start_timestamp: t0, - end_timestamp: t1, - text: n.to_string(), - }), - Err(_) => {} + let t0 = whisper_rs_sys::whisper_full_get_segment_t0_from_state(state, i); + let t1 = whisper_rs_sys::whisper_full_get_segment_t1_from_state(state, i); + + match text.to_str() { + Ok(n) => user_data(SegmentCallbackData { + segment: i, + start_timestamp: t0, + end_timestamp: t1, + text: n.to_string(), + }), + Err(_) => {} + } } } } @@ -482,17 +488,23 @@ impl<'a, 'b> FullParams<'a, 'b> { { unsafe { let user_data = &mut *(user_data as *mut SegmentCallbackFn); - let text = whisper_rs_sys::whisper_full_get_segment_text_from_state(state, n_new); - let text = CStr::from_ptr(text); + let n_segments = whisper_rs_sys::whisper_full_n_segments_from_state(state); + let s0 = n_segments - n_new; + //let user_data = user_data as *mut Box; - let t0 = whisper_rs_sys::whisper_full_get_segment_t0_from_state(state, n_new); - let t1 = whisper_rs_sys::whisper_full_get_segment_t1_from_state(state, n_new); - user_data(SegmentCallbackData { - segment: n_new, - start_timestamp: t0, - end_timestamp: t1, - text: text.to_string_lossy().to_string(), - }); + for i in s0..n_segments { + let text = whisper_rs_sys::whisper_full_get_segment_text_from_state(state, i); + let text = CStr::from_ptr(text); + + let t0 = whisper_rs_sys::whisper_full_get_segment_t0_from_state(state, i); + let t1 = whisper_rs_sys::whisper_full_get_segment_t1_from_state(state, i); + user_data(SegmentCallbackData { + segment: i, + start_timestamp: t0, + end_timestamp: t1, + text: text.to_string_lossy().to_string(), + }); + } } } From 747f71da57487be6a1772e4eb014f860e5766c3a Mon Sep 17 00:00:00 2001 From: newfla Date: Mon, 3 Mar 2025 17:01:29 +0100 Subject: [PATCH 5/5] doc: added dtw-callback disclaimer --- src/whisper_params.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/whisper_params.rs b/src/whisper_params.rs index 1bb2808..607a65d 100644 --- a/src/whisper_params.rs +++ b/src/whisper_params.rs @@ -376,6 +376,7 @@ impl<'a, 'b> FullParams<'a, 'b> { /// Do not use this function unless you know what you are doing. /// * Be careful not to mutate the state of the whisper_context pointer returned in the callback. /// This could cause undefined behavior, as this violates the thread-safety guarantees of the underlying C library. + /// **Warning** Can't be used with DTW. DTW will produce inconsistent callback invocation /// /// Defaults to None. pub unsafe fn set_new_segment_callback( @@ -389,6 +390,7 @@ impl<'a, 'b> FullParams<'a, 'b> { /// /// # Safety /// See the safety notes for `set_new_segment_callback`. + /// **Warning** Can't be used with DTW. DTW will produce inconsistent callback invocation /// /// Defaults to None. pub unsafe fn set_new_segment_callback_user_data(&mut self, user_data: *mut std::ffi::c_void) { @@ -399,6 +401,7 @@ impl<'a, 'b> FullParams<'a, 'b> { /// /// Provides a limited segment_callback to ensure safety. /// See `set_new_segment_callback` if you need to use `whisper_context` and `whisper_state` + /// **Warning** Can't be used with DTW. DTW will produce inconsistent callback invocation /// /// Defaults to None. pub fn set_segment_callback_safe(&mut self, closure: O) @@ -468,6 +471,7 @@ impl<'a, 'b> FullParams<'a, 'b> { /// /// Provides a limited segment_callback to ensure safety with lossy handling of bad UTF-8 characters. /// See `set_new_segment_callback` if you need to use `whisper_context` and `whisper_state`. + /// **Warning** Can't be used with DTW. DTW will produce inconsistent callback invocation /// /// Defaults to None. pub fn set_segment_callback_safe_lossy(&mut self, closure: O)