diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs index b2f3445..7ab716d 100644 --- a/examples/audio_transcription.rs +++ b/examples/audio_transcription.rs @@ -7,7 +7,7 @@ use std::io::Write; use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout. -fn main() { +fn main() -> Result<(), &'static str> { // Load a context and model. let mut ctx = WhisperContext::new("example/path/to/model/whisper.cpp/models/ggml-base.en.bin") .expect("failed to load model"); @@ -52,7 +52,7 @@ fn main() { // These utilities are provided for convenience, but can be replaced with custom conversion logic. // SIMD variants of these functions are also available on nightly Rust (see the docs). if channels == 2 { - audio = whisper_rs::convert_stereo_to_mono_audio(&audio); + audio = whisper_rs::convert_stereo_to_mono_audio(&audio)?; } else if channels != 1 { panic!(">2 channels unsupported"); } @@ -85,4 +85,5 @@ fn main() { file.write_all(line.as_bytes()) .expect("failed to write to file"); } + Ok(()) } diff --git a/examples/basic_use.rs b/examples/basic_use.rs index 8d0f219..727deba 100644 --- a/examples/basic_use.rs +++ b/examples/basic_use.rs @@ -5,7 +5,7 @@ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; // note that running this example will not do anything, as it is just a // demonstration of how to use the library, and actual usage requires // more dependencies than the base library. -pub fn usage() { +pub fn usage() -> Result<(), &'static str> { // load a context and model let mut ctx = WhisperContext::new("path/to/model").expect("failed to load model"); @@ -38,7 +38,7 @@ pub fn usage() { // SIMD variants of these functions are also available, but only on nightly Rust: see the docs let audio_data = whisper_rs::convert_stereo_to_mono_audio( &whisper_rs::convert_integer_to_float_audio(&audio_data), - ); + )?; // now we can run the model ctx.full(params, &audio_data[..]) @@ -52,6 +52,8 @@ pub fn usage() { let end_timestamp = ctx.full_get_segment_t1(i); println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); } + + Ok(()) } fn main() { diff --git a/src/utilities.rs b/src/utilities.rs index 79bb9c2..b976475 100644 --- a/src/utilities.rs +++ b/src/utilities.rs @@ -54,7 +54,6 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec { /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio. /// -/// If there are an odd number of samples, the last half-sample is dropped. /// This variant does not use SIMD instructions. /// /// # Arguments @@ -62,16 +61,20 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec { /// /// # Returns /// A vector of 32 bit floating point mono PCM audio samples. -pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec { - samples +pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result, &'static str> { + if samples.len() & 1 != 0 { + return Err("The stereo audio vector has an odd number of samples. \ + This means a half-sample is missing somewhere"); + } + + Ok(samples .chunks_exact(2) .map(|x| (x[0] + x[1]) / 2.0) - .collect() + .collect()) } /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio. /// -/// If there are an odd number of samples, the last half-sample is dropped. /// This variant uses SIMD instructions, and as such is only available on /// nightly Rust. /// @@ -81,7 +84,7 @@ pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec { /// # Returns /// A vector of 32 bit floating point mono PCM audio samples. #[cfg(feature = "simd")] -pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec { +pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Result, &'static str> { let mut mono = Vec::with_capacity(samples.len() / 2); let div_array = f32x16::splat(2.0); @@ -105,9 +108,9 @@ pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec { // Handle the remainder. // do this normally because it's only a few samples and the overhead of // converting to SIMD is not worth it. - mono.extend(convert_stereo_to_mono_audio(remainder)); + mono.extend(convert_stereo_to_mono_audio(remainder)?); - mono + Ok(mono) } #[cfg(feature = "simd")] @@ -115,13 +118,33 @@ pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec { mod test { use super::*; + #[test] + pub fn assert_stereo_to_mono_err() { + // fake some sample data + let samples = (0u16..1029).map(f32::from).collect::>(); + let mono = convert_stereo_to_mono_audio(&samples); + assert!(mono.is_err()); + } +} + +#[cfg(feature = "simd")] +#[cfg(test)] +mod test_simd { + use super::*; + #[test] pub fn assert_stereo_to_mono_simd() { - // fake some sample data, of 1028 elements - let mut samples = Vec::with_capacity(1028); - for i in 0..1029 { - samples.push(i as f32); - } + // fake some sample data + let samples = (0u16..1028).map(f32::from).collect::>(); + let mono_simd = convert_stereo_to_mono_audio_simd(&samples); + let mono = convert_stereo_to_mono_audio(&samples); + assert_eq!(mono_simd, mono); + } + + #[test] + pub fn assert_stereo_to_mono_simd_err() { + // fake some sample data + let samples = (0u16..1029).map(f32::from).collect::>(); let mono_simd = convert_stereo_to_mono_audio_simd(&samples); let mono = convert_stereo_to_mono_audio(&samples); assert_eq!(mono_simd, mono);