wyoming-whisper-rs/examples/full_usage/src/main.rs
2023-08-11 15:10:50 +02:00

75 lines
2.5 KiB
Rust

#![allow(clippy::uninlined_format_args)]
use hound::{SampleFormat, WavReader};
use std::path::Path;
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
fn parse_wav_file(path: &Path) -> Vec<i16> {
let reader = WavReader::open(path).expect("failed to read file");
if reader.spec().channels != 1 {
panic!("expected mono audio file");
}
if reader.spec().sample_format != SampleFormat::Int {
panic!("expected integer sample format");
}
if reader.spec().sample_rate != 16000 {
panic!("expected 16KHz sample rate");
}
if reader.spec().bits_per_sample != 16 {
panic!("expected 16 bits per sample");
}
reader
.into_samples::<i16>()
.map(|x| x.expect("sample"))
.collect::<Vec<_>>()
}
fn main() {
let arg1 = std::env::args()
.nth(1)
.expect("first argument should be path to WAV file");
let audio_path = Path::new(&arg1);
if !audio_path.exists() {
panic!("audio file doesn't exist");
}
let arg2 = std::env::args()
.nth(2)
.expect("second argument should be path to Whisper model");
let whisper_path = Path::new(&arg2);
if !whisper_path.exists() {
panic!("whisper file doesn't exist")
}
let original_samples = parse_wav_file(audio_path);
let samples = whisper_rs::convert_integer_to_float_audio(&original_samples);
let ctx = WhisperContext::new(&whisper_path.to_string_lossy()).expect("failed to open model");
let mut state = ctx.create_state().expect("failed to create key");
let mut params = FullParams::new(SamplingStrategy::default());
params.set_progress_callback_safe(|progress| println!("Progress callback: {}%", progress));
let st = std::time::Instant::now();
state
.full(params, &samples)
.expect("failed to convert samples");
let et = std::time::Instant::now();
let num_segments = state
.full_n_segments()
.expect("failed to get number of segments");
for i in 0..num_segments {
let segment = state
.full_get_segment_text(i)
.expect("failed to get segment");
let start_timestamp = state
.full_get_segment_t0(i)
.expect("failed to get start timestamp");
let end_timestamp = state
.full_get_segment_t1(i)
.expect("failed to get end timestamp");
println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
}
println!("took {}ms", (et - st).as_millis());
}