diff --git a/Cargo.toml b/Cargo.toml index b70b5df..2f34200 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ exclude = ["examples/full_usage"] [package] name = "whisper-rs" -version = "0.10.1" +version = "0.11.0" edition = "2021" description = "Rust bindings for whisper.cpp" license = "Unlicense" @@ -20,11 +20,11 @@ tracing = { version = "0.1", optional = true } [dev-dependencies] hound = "3.5.0" +rand = "0.8.4" [features] default = [] -simd = [] coreml = ["whisper-rs-sys/coreml"] cuda = ["whisper-rs-sys/cuda", "_gpu"] opencl = ["whisper-rs-sys/opencl"] diff --git a/src/error.rs b/src/error.rs index 51ab0bc..6fe7420 100644 --- a/src/error.rs +++ b/src/error.rs @@ -41,6 +41,10 @@ pub enum WhisperError { FailedToCreateState, /// No samples were provided. NoSamples, + /// Input and output slices were not the same length. + InputOutputLengthMismatch { input_len: usize, output_len: usize }, + /// Input slice was not an even number of samples. + HalfSampleMissing(usize), } impl From for WhisperError { @@ -109,6 +113,24 @@ impl std::fmt::Display for WhisperError { c_int ), NoSamples => write!(f, "Input sample buffer was empty."), + InputOutputLengthMismatch { + output_len, + input_len, + } => { + write!( + f, + "Input and output slices were not the same length. Input: {}, Output: {}", + input_len, output_len + ) + } + HalfSampleMissing(size) => { + write!( + f, + "Input slice was not an even number of samples, got {}, expected {}", + size, + size + 1 + ) + } } } } diff --git a/src/lib.rs b/src/lib.rs index 7b38c6c..bdd326f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![allow(clippy::uninlined_format_args)] +#![cfg_attr(test, feature(test))] mod error; mod standalone; diff --git a/src/utilities.rs b/src/utilities.rs index f8bc554..8dfc045 100644 --- a/src/utilities.rs +++ b/src/utilities.rs @@ -1,33 +1,59 @@ +use crate::WhisperError; + /// Convert an array of 16 bit mono audio samples to a vector of 32 bit floats. /// -/// This variant does not use SIMD instructions. -/// /// # Arguments /// * `samples` - The array of 16 bit mono audio samples. +/// * `output` - The vector of 32 bit floats to write the converted samples to. /// -/// # Returns -/// A vector of 32 bit floats. -pub fn convert_integer_to_float_audio(samples: &[i16]) -> Vec { - let mut floats = Vec::with_capacity(samples.len()); - for sample in samples { - floats.push(*sample as f32 / 32768.0); +/// # Panics +/// * if `samples.len != output.len()` +/// +/// # Examples +/// ``` +/// # use whisper_rs::convert_integer_to_float_audio; +/// let samples = [0i16; 1024]; +/// let mut output = vec![0.0f32; samples.len()]; +/// convert_integer_to_float_audio(&samples, &mut output).expect("input and output lengths should be equal"); +/// ``` +pub fn convert_integer_to_float_audio( + samples: &[i16], + output: &mut [f32], +) -> Result<(), WhisperError> { + if samples.len() != output.len() { + return Err(WhisperError::InputOutputLengthMismatch { + input_len: samples.len(), + output_len: output.len(), + }); } - floats + + for (input, output) in samples.iter().zip(output.iter_mut()) { + *output = *input as f32 / 32768.0; + } + + Ok(()) } -/// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio. -/// -/// This variant does not use SIMD instructions. +/// Convert 32-bit floating point stereo PCM audio to 32-bit floating point mono PCM audio. /// /// # Arguments -/// * `samples` - The array of 32 bit floating point stereo PCM audio samples. +/// * `samples` - The array of 32-bit floating point stereo PCM audio samples. +/// +/// # Errors +/// * if `samples.len()` is odd /// /// # Returns -/// A vector of 32 bit floating point mono PCM audio samples. -pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result, &'static str> { +/// A vector of 32-bit floating point mono PCM audio samples. +/// +/// # Examples +/// ``` +/// # use whisper_rs::convert_stereo_to_mono_audio; +/// let samples = [0.0f32; 1024]; +/// let mono = convert_stereo_to_mono_audio(&samples).expect("should be no half samples missing"); +/// ``` +pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result, WhisperError> { if samples.len() & 1 != 0 { - return Err("The stereo audio vector has an odd number of samples. \ - This means a half-sample is missing somewhere"); + return Err(WhisperError::HalfSampleMissing(samples.len())); } Ok(samples @@ -36,16 +62,51 @@ pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result, &'stati .collect()) } -#[cfg(feature = "simd")] #[cfg(test)] mod test { use super::*; + use rand::distributions::{Distribution, Standard}; + use rand::Rng; + use std::hint::black_box; + + extern crate test; + + fn random_sample_data() -> Vec + where + Standard: Distribution, + { + const SAMPLE_SIZE: usize = 1_048_576; + + let mut rng = rand::thread_rng(); + let mut samples = Vec::with_capacity(SAMPLE_SIZE); + for _ in 0..SAMPLE_SIZE { + samples.push(rng.gen::()); + } + samples + } #[test] pub fn assert_stereo_to_mono_err() { - // fake some sample data - let samples = (0u16..1029).map(f32::from).collect::>(); + let samples = random_sample_data::(); let mono = convert_stereo_to_mono_audio(&samples); assert!(mono.is_err()); } + + #[bench] + pub fn bench_stereo_to_mono(b: &mut test::Bencher) { + let samples = random_sample_data::(); + b.iter(|| black_box(convert_stereo_to_mono_audio(black_box(&samples)))); + } + + #[bench] + pub fn bench_integer_to_float(b: &mut test::Bencher) { + let samples = random_sample_data::(); + let mut output = vec![0.0f32; samples.len()]; + b.iter(|| { + black_box(convert_integer_to_float_audio( + black_box(&samples), + black_box(&mut output), + )) + }); + } }