From 5c46b0a6677ee59321c2d5aa5cc387f3fc7ac8e8 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Sat, 14 Sep 2024 00:38:00 +0300 Subject: [PATCH 1/3] feat: improve basic example --- .gitignore | 4 ++- examples/basic_use.rs | 59 +++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 4f72f63..902abe5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ **/target **/Cargo.lock /.idea -/.vscode \ No newline at end of file +/.vscode +*.bin +*.wav \ No newline at end of file diff --git a/examples/basic_use.rs b/examples/basic_use.rs index 8627473..415371e 100644 --- a/examples/basic_use.rs +++ b/examples/basic_use.rs @@ -1,54 +1,59 @@ -#![allow(clippy::uninlined_format_args)] +/* +wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin +wget https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav +cargo run --example basic_use ggml-tiny.bin jfk.wav +*/ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; -// note that running this example will not do anything, as it is just a -// demonstration of how to use the library, and actual usage requires -// more dependencies than the base library. -pub fn usage() -> Result<(), &'static str> { +fn main() { + let model_path = std::env::args() + .nth(1) + .expect("Please specify path to model"); + let wav_path = std::env::args() + .nth(2) + .expect("Please specify path to wav file"); + let language = "en"; + + let samples: Vec = hound::WavReader::open(wav_path) + .unwrap() + .into_samples::() + .map(|x| x.unwrap()) + .collect(); + // load a context and model - let ctx = WhisperContext::new_with_params("path/to/model", WhisperContextParameters::default()) + let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default()) .expect("failed to load model"); - // make a state + let mut state = ctx.create_state().expect("failed to create state"); - // create a params object - // note that currently the only implemented strategy is Greedy, BeamSearch is a WIP - // n_past defaults to 0 let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); - // edit things as needed - // here we set the number of threads to use to 1 - params.set_n_threads(1); - // we also enable translation - params.set_translate(true); // and set the language to translate to to english - params.set_language(Some("en")); + params.set_language(Some(&language)); + // we also explicitly disable anything that prints to stdout params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); params.set_print_timestamps(false); - // assume we have a buffer of audio data - // here we'll make a fake one, integer samples, 16 bit, 16KHz, stereo - let audio_data = vec![0_i16; 16000 * 2]; - // we must convert to 16KHz mono f32 samples for the model // some utilities exist for this // note that you don't need to use these, you can do it yourself or any other way you want // these are just provided for convenience // SIMD variants of these functions are also available, but only on nightly Rust: see the docs - let mut inter_audio_data = Vec::with_capacity(audio_data.len()); - whisper_rs::convert_integer_to_float_audio(&audio_data, &mut inter_audio_data) + let mut inter_samples = vec![Default::default(); samples.len()]; + + whisper_rs::convert_integer_to_float_audio(&samples, &mut inter_samples) .expect("failed to convert audio data"); - let audio_data = whisper_rs::convert_stereo_to_mono_audio(&inter_audio_data) + let samples = whisper_rs::convert_stereo_to_mono_audio(&inter_samples) .expect("failed to convert audio data"); // now we can run the model // note the key we use here is the one we created above state - .full(params, &audio_data[..]) + .full(params, &samples[..]) .expect("failed to run model"); // fetch the results @@ -67,10 +72,4 @@ pub fn usage() -> Result<(), &'static str> { .expect("failed to get segment end timestamp"); println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); } - - Ok(()) -} - -fn main() { - println!("running this example does nothing! see the source code for usage"); } From 9cb98dcf53f29e11a4821ffeaf999e5b8d576332 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Sat, 14 Sep 2024 00:56:01 +0300 Subject: [PATCH 2/3] fix: add search path for coreml on macOS --- sys/build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index 1f904a9..696867b 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -32,6 +32,7 @@ fn main() { #[cfg(feature = "coreml")] println!("cargo:rustc-link-lib=static=whisper.coreml"); + #[cfg(feature = "openblas")] { if let Ok(openblas_path) = env::var("OPENBLAS_PATH") { @@ -233,6 +234,7 @@ fn main() { let destination = config.build(); add_link_search_path(&out.join("lib")).unwrap(); + add_link_search_path(&out.join("build/src")).unwrap(); println!("cargo:rustc-link-search=native={}", destination.display()); println!("cargo:rustc-link-lib=static=whisper"); From d12415237627b55bafd055ee3d49753b87a73c88 Mon Sep 17 00:00:00 2001 From: flavio Date: Tue, 17 Sep 2024 15:26:44 +0200 Subject: [PATCH 3/3] fix: macos vulkan build script --- sys/build.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index 1f904a9..e1b0ec5 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -191,6 +191,17 @@ fn main() { }; let vulkan_lib_path = vulkan_path.join("Lib"); println!("cargo:rustc-link-search={}", vulkan_lib_path.display()); + } else if cfg!(target_os = "macos") { + println!("cargo:rerun-if-env-changed=VULKAN_SDK"); + println!("cargo:rustc-link-lib=vulkan"); + let vulkan_path = match env::var("VULKAN_SDK") { + Ok(path) => PathBuf::from(path), + Err(_) => panic!( + "Please install Vulkan SDK and ensure that VULKAN_SDK env variable is set" + ), + }; + let vulkan_lib_path = vulkan_path.join("lib"); + println!("cargo:rustc-link-search={}", vulkan_lib_path.display()); } else { println!("cargo:rustc-link-lib=vulkan"); }