use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout. fn main() { // Load a context and model. let mut ctx = WhisperContext::new( "/Users/lucas/Documents/code/meetsary/whisper-test/whisper.cpp/models/ggml-base.en.bin", ) .expect("failed to load model"); // Create a params object for running the model. // Currently, only the Greedy sampling strategy is implemented, with BeamSearch as a WIP. // The number of past samples to consider defaults to 0. let mut params = FullParams::new(SamplingStrategy::Greedy { n_past: 0 }); // Edit params as needed. // Set the number of threads to use to 1. params.set_n_threads(1); // Enable translation. params.set_translate(true); // Set the language to translate to to English. params.set_language("en"); // Disable anything that prints to stdout. params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); params.set_print_timestamps(false); // Open the audio file. let mut reader = hound::WavReader::open("weeknd-2.wav").expect("failed to open file"); let hound::WavSpec { channels, sample_rate, bits_per_sample, .. } = reader.spec(); // Convert the audio to floating point samples. let mut audio = whisper_rs::convert_integer_to_float_audio( &reader .samples::() .map(|s| s.expect("invalid sample")) .collect::>(), ); // Convert audio to 16KHz mono f32 samples, as required by the model. // These utilities are provided for convenience, but can be replaced with custom conversion logic. // SIMD variants of these functions are also available on nightly Rust (see the docs). if channels == 2 { audio = whisper_rs::convert_stereo_to_mono_audio(&audio); } else if channels != 1 { panic!(">2 channels unsupported"); } if sample_rate != 16000 { panic!("sample rate must be 16KHz"); } // Run the model. ctx.full(params, &audio[..]).expect("failed to run model"); // Fetch and print the results. let num_segments = ctx.full_n_segments(); for i in 0..num_segments { let segment = ctx.full_get_segment_text(i).expect("failed to get segment"); let start_timestamp = ctx.full_get_segment_t0(i); let end_timestamp = ctx.full_get_segment_t1(i); println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); } }