Merge pull request #174 from thewh1teagle/feat/improve-basic-example

feat: improve basic example
This commit is contained in:
Niko 2024-10-21 23:55:09 +00:00 committed by GitHub
commit cfec70d9cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 32 additions and 31 deletions

4
.gitignore vendored
View file

@ -1,4 +1,6 @@
**/target **/target
**/Cargo.lock **/Cargo.lock
/.idea /.idea
/.vscode /.vscode
*.bin
*.wav

View file

@ -1,54 +1,59 @@
#![allow(clippy::uninlined_format_args)] /*
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin
wget https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav
cargo run --example basic_use ggml-tiny.bin jfk.wav
*/
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
// note that running this example will not do anything, as it is just a fn main() {
// demonstration of how to use the library, and actual usage requires let model_path = std::env::args()
// more dependencies than the base library. .nth(1)
pub fn usage() -> Result<(), &'static str> { .expect("Please specify path to model");
let wav_path = std::env::args()
.nth(2)
.expect("Please specify path to wav file");
let language = "en";
let samples: Vec<i16> = hound::WavReader::open(wav_path)
.unwrap()
.into_samples::<i16>()
.map(|x| x.unwrap())
.collect();
// load a context and model // load a context and model
let ctx = WhisperContext::new_with_params("path/to/model", WhisperContextParameters::default()) let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default())
.expect("failed to load model"); .expect("failed to load model");
// make a state
let mut state = ctx.create_state().expect("failed to create state"); let mut state = ctx.create_state().expect("failed to create state");
// create a params object
// note that currently the only implemented strategy is Greedy, BeamSearch is a WIP
// n_past defaults to 0
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
// edit things as needed
// here we set the number of threads to use to 1
params.set_n_threads(1);
// we also enable translation
params.set_translate(true);
// and set the language to translate to to english // and set the language to translate to to english
params.set_language(Some("en")); params.set_language(Some(&language));
// we also explicitly disable anything that prints to stdout // we also explicitly disable anything that prints to stdout
params.set_print_special(false); params.set_print_special(false);
params.set_print_progress(false); params.set_print_progress(false);
params.set_print_realtime(false); params.set_print_realtime(false);
params.set_print_timestamps(false); params.set_print_timestamps(false);
// assume we have a buffer of audio data
// here we'll make a fake one, integer samples, 16 bit, 16KHz, stereo
let audio_data = vec![0_i16; 16000 * 2];
// we must convert to 16KHz mono f32 samples for the model // we must convert to 16KHz mono f32 samples for the model
// some utilities exist for this // some utilities exist for this
// note that you don't need to use these, you can do it yourself or any other way you want // note that you don't need to use these, you can do it yourself or any other way you want
// these are just provided for convenience // these are just provided for convenience
// SIMD variants of these functions are also available, but only on nightly Rust: see the docs // SIMD variants of these functions are also available, but only on nightly Rust: see the docs
let mut inter_audio_data = Vec::with_capacity(audio_data.len()); let mut inter_samples = vec![Default::default(); samples.len()];
whisper_rs::convert_integer_to_float_audio(&audio_data, &mut inter_audio_data)
whisper_rs::convert_integer_to_float_audio(&samples, &mut inter_samples)
.expect("failed to convert audio data"); .expect("failed to convert audio data");
let audio_data = whisper_rs::convert_stereo_to_mono_audio(&inter_audio_data) let samples = whisper_rs::convert_stereo_to_mono_audio(&inter_samples)
.expect("failed to convert audio data"); .expect("failed to convert audio data");
// now we can run the model // now we can run the model
// note the key we use here is the one we created above // note the key we use here is the one we created above
state state
.full(params, &audio_data[..]) .full(params, &samples[..])
.expect("failed to run model"); .expect("failed to run model");
// fetch the results // fetch the results
@ -67,10 +72,4 @@ pub fn usage() -> Result<(), &'static str> {
.expect("failed to get segment end timestamp"); .expect("failed to get segment end timestamp");
println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
} }
Ok(())
}
fn main() {
println!("running this example does nothing! see the source code for usage");
} }