From 5c46b0a6677ee59321c2d5aa5cc387f3fc7ac8e8 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Sat, 14 Sep 2024 00:38:00 +0300 Subject: [PATCH] feat: improve basic example --- .gitignore | 4 ++- examples/basic_use.rs | 59 +++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 4f72f63..902abe5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ **/target **/Cargo.lock /.idea -/.vscode \ No newline at end of file +/.vscode +*.bin +*.wav \ No newline at end of file diff --git a/examples/basic_use.rs b/examples/basic_use.rs index 8627473..415371e 100644 --- a/examples/basic_use.rs +++ b/examples/basic_use.rs @@ -1,54 +1,59 @@ -#![allow(clippy::uninlined_format_args)] +/* +wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin +wget https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav +cargo run --example basic_use ggml-tiny.bin jfk.wav +*/ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; -// note that running this example will not do anything, as it is just a -// demonstration of how to use the library, and actual usage requires -// more dependencies than the base library. -pub fn usage() -> Result<(), &'static str> { +fn main() { + let model_path = std::env::args() + .nth(1) + .expect("Please specify path to model"); + let wav_path = std::env::args() + .nth(2) + .expect("Please specify path to wav file"); + let language = "en"; + + let samples: Vec = hound::WavReader::open(wav_path) + .unwrap() + .into_samples::() + .map(|x| x.unwrap()) + .collect(); + // load a context and model - let ctx = WhisperContext::new_with_params("path/to/model", WhisperContextParameters::default()) + let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default()) .expect("failed to load model"); - // make a state + let mut state = ctx.create_state().expect("failed to create state"); - // create a params object - // note that currently the only implemented strategy is Greedy, BeamSearch is a WIP - // n_past defaults to 0 let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); - // edit things as needed - // here we set the number of threads to use to 1 - params.set_n_threads(1); - // we also enable translation - params.set_translate(true); // and set the language to translate to to english - params.set_language(Some("en")); + params.set_language(Some(&language)); + // we also explicitly disable anything that prints to stdout params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); params.set_print_timestamps(false); - // assume we have a buffer of audio data - // here we'll make a fake one, integer samples, 16 bit, 16KHz, stereo - let audio_data = vec![0_i16; 16000 * 2]; - // we must convert to 16KHz mono f32 samples for the model // some utilities exist for this // note that you don't need to use these, you can do it yourself or any other way you want // these are just provided for convenience // SIMD variants of these functions are also available, but only on nightly Rust: see the docs - let mut inter_audio_data = Vec::with_capacity(audio_data.len()); - whisper_rs::convert_integer_to_float_audio(&audio_data, &mut inter_audio_data) + let mut inter_samples = vec![Default::default(); samples.len()]; + + whisper_rs::convert_integer_to_float_audio(&samples, &mut inter_samples) .expect("failed to convert audio data"); - let audio_data = whisper_rs::convert_stereo_to_mono_audio(&inter_audio_data) + let samples = whisper_rs::convert_stereo_to_mono_audio(&inter_samples) .expect("failed to convert audio data"); // now we can run the model // note the key we use here is the one we created above state - .full(params, &audio_data[..]) + .full(params, &samples[..]) .expect("failed to run model"); // fetch the results @@ -67,10 +72,4 @@ pub fn usage() -> Result<(), &'static str> { .expect("failed to get segment end timestamp"); println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); } - - Ok(()) -} - -fn main() { - println!("running this example does nothing! see the source code for usage"); }