/* wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin wget https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav cargo run --example basic_use ggml-tiny.bin jfk.wav */ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters}; fn main() { let model_path = std::env::args() .nth(1) .expect("Please specify path to model as argument 1"); let wav_path = std::env::args() .nth(2) .expect("Please specify path to wav file as argument 2"); let language = "en"; let samples: Vec = hound::WavReader::open(wav_path) .unwrap() .into_samples::() .map(|x| x.unwrap()) .collect(); // load a context and model let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default()) .expect("failed to load model"); let mut state = ctx.create_state().expect("failed to create state"); let mut params = FullParams::new(SamplingStrategy::BeamSearch { beam_size: 5, patience: -1.0, }); // and set the language to translate to to english params.set_language(Some(&language)); // we also explicitly disable anything that prints to stdout params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); params.set_print_timestamps(false); // we must convert to 16KHz mono f32 samples for the model // some utilities exist for this // note that you don't need to use these, you can do it yourself or any other way you want // these are just provided for convenience let mut inter_samples = vec![Default::default(); samples.len()]; whisper_rs::convert_integer_to_float_audio(&samples, &mut inter_samples) .expect("failed to convert audio data"); let samples = whisper_rs::convert_stereo_to_mono_audio(&inter_samples) .expect("failed to convert audio data"); // now we can run the model state .full(params, &samples[..]) .expect("failed to run model"); // fetch the results for segment in state.as_iter() { println!( "[{} - {}]: {}", segment.start_timestamp(), segment.end_timestamp(), segment ); } }