Merge pull request #113 from tazz4843/audio-util-update

Update audio utilities
2024-03-15 20:03:55 +00:00 · 2024-03-15 20:03:55 +00:00 · e7922d44bc
commit e7922d44bc
parent 9861dfdb93 49ef267bad
4 changed files with 110 additions and 22 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -4,7 +4,7 @@ exclude = ["examples/full_usage"]

 [package]
 name = "whisper-rs"
-version = "0.10.1"
+version = "0.11.0"
 edition = "2021"
 description = "Rust bindings for whisper.cpp"
 license = "Unlicense"
@ -18,11 +18,11 @@ whisper-rs-sys = { path = "sys", version = "0.8" }

 [dev-dependencies]
 hound = "3.5.0"
+rand = "0.8.4"

 [features]
 default = []

-simd = []
 coreml = ["whisper-rs-sys/coreml"]
 cuda = ["whisper-rs-sys/cuda", "_gpu"]
 opencl = ["whisper-rs-sys/opencl"]
--- a/src/error.rs
+++ b/src/error.rs
@ -41,6 +41,10 @@ pub enum WhisperError {
    FailedToCreateState,
    /// No samples were provided.
    NoSamples,
+    /// Input and output slices were not the same length.
+    InputOutputLengthMismatch { input_len: usize, output_len: usize },
+    /// Input slice was not an even number of samples.
+    HalfSampleMissing(usize),
 }

 impl From<Utf8Error> for WhisperError {
@ -109,6 +113,24 @@ impl std::fmt::Display for WhisperError {
                c_int
            ),
            NoSamples => write!(f, "Input sample buffer was empty."),
+            InputOutputLengthMismatch {
+                output_len,
+                input_len,
+            } => {
+                write!(
+                    f,
+                    "Input and output slices were not the same length. Input: {}, Output: {}",
+                    input_len, output_len
+                )
+            }
+            HalfSampleMissing(size) => {
+                write!(
+                    f,
+                    "Input slice was not an even number of samples, got {}, expected {}",
+                    size,
+                    size + 1
+                )
+            }
        }
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,4 +1,5 @@
 #![allow(clippy::uninlined_format_args)]
+#![cfg_attr(test, feature(test))]

 mod error;
 mod standalone;
@ -16,6 +17,10 @@ pub use whisper_ctx::WhisperContextParameters;
 pub use whisper_grammar::{WhisperGrammarElement, WhisperGrammarElementType};
 pub use whisper_params::{FullParams, SamplingStrategy};
 pub use whisper_state::WhisperState;
+#[cfg(feature = "whisper-cpp-log")]
+pub use whisper_sys_log::install_whisper_log_trampoline;
+#[cfg(feature = "whisper-cpp-tracing")]
+pub use whisper_sys_tracing::install_whisper_tracing_trampoline;

 pub type WhisperSysContext = whisper_rs_sys::whisper_context;
 pub type WhisperSysState = whisper_rs_sys::whisper_state;
--- a/src/utilities.rs
+++ b/src/utilities.rs
@ -1,33 +1,59 @@
+use crate::WhisperError;
+
 /// Convert an array of 16 bit mono audio samples to a vector of 32 bit floats.
 ///
-/// This variant does not use SIMD instructions.
-///
 /// # Arguments
 /// * `samples` - The array of 16 bit mono audio samples.
+/// * `output` - The vector of 32 bit floats to write the converted samples to.
 ///
-/// # Returns
-/// A vector of 32 bit floats.
-pub fn convert_integer_to_float_audio(samples: &[i16]) -> Vec<f32> {
-    let mut floats = Vec::with_capacity(samples.len());
-    for sample in samples {
-        floats.push(*sample as f32 / 32768.0);
+/// # Panics
+/// * if `samples.len != output.len()`
+///
+/// # Examples
+/// ```
+/// # use whisper_rs::convert_integer_to_float_audio;
+/// let samples = [0i16; 1024];
+/// let mut output = vec![0.0f32; samples.len()];
+/// convert_integer_to_float_audio(&samples, &mut output).expect("input and output lengths should be equal");
+/// ```
+pub fn convert_integer_to_float_audio(
+    samples: &[i16],
+    output: &mut [f32],
+) -> Result<(), WhisperError> {
+    if samples.len() != output.len() {
+        return Err(WhisperError::InputOutputLengthMismatch {
+            input_len: samples.len(),
+            output_len: output.len(),
+        });
    }
-    floats
+
+    for (input, output) in samples.iter().zip(output.iter_mut()) {
+        *output = *input as f32 / 32768.0;
+    }
+
+    Ok(())
 }

-/// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
-///
-/// This variant does not use SIMD instructions.
+/// Convert 32-bit floating point stereo PCM audio to 32-bit floating point mono PCM audio.
 ///
 /// # Arguments
-/// * `samples` - The array of 32 bit floating point stereo PCM audio samples.
+/// * `samples` - The array of 32-bit floating point stereo PCM audio samples.
+///
+/// # Errors
+/// * if `samples.len()` is odd
 ///
 /// # Returns
-/// A vector of 32 bit floating point mono PCM audio samples.
-pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, &'static str> {
+/// A vector of 32-bit floating point mono PCM audio samples.
+///
+/// # Examples
+/// ```
+/// # use whisper_rs::convert_stereo_to_mono_audio;
+/// let samples = [0.0f32; 1024];
+/// let mono = convert_stereo_to_mono_audio(&samples).expect("should be no half samples missing");
+/// ```
+pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, WhisperError> {
    if samples.len() & 1 != 0 {
-        return Err("The stereo audio vector has an odd number of samples. \
-            This means a half-sample is missing somewhere");
+        return Err(WhisperError::HalfSampleMissing(samples.len()));
    }

    Ok(samples
@ -36,16 +62,51 @@ pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, &'stati
        .collect())
 }

-#[cfg(feature = "simd")]
 #[cfg(test)]
 mod test {
    use super::*;
+    use rand::distributions::{Distribution, Standard};
+    use rand::Rng;
+    use std::hint::black_box;
+
+    extern crate test;
+
+    fn random_sample_data<T>() -> Vec<T>
+    where
+        Standard: Distribution<T>,
+    {
+        const SAMPLE_SIZE: usize = 1_048_576;
+
+        let mut rng = rand::thread_rng();
+        let mut samples = Vec::with_capacity(SAMPLE_SIZE);
+        for _ in 0..SAMPLE_SIZE {
+            samples.push(rng.gen::<T>());
+        }
+        samples
+    }

    #[test]
    pub fn assert_stereo_to_mono_err() {
-        // fake some sample data
-        let samples = (0u16..1029).map(f32::from).collect::<Vec<f32>>();
+        let samples = random_sample_data::<f32>();
        let mono = convert_stereo_to_mono_audio(&samples);
        assert!(mono.is_err());
    }
+
+    #[bench]
+    pub fn bench_stereo_to_mono(b: &mut test::Bencher) {
+        let samples = random_sample_data::<f32>();
+        b.iter(|| black_box(convert_stereo_to_mono_audio(black_box(&samples))));
+    }
+
+    #[bench]
+    pub fn bench_integer_to_float(b: &mut test::Bencher) {
+        let samples = random_sample_data::<i16>();
+        let mut output = vec![0.0f32; samples.len()];
+        b.iter(|| {
+            black_box(convert_integer_to_float_audio(
+                black_box(&samples),
+                black_box(&mut output),
+            ))
+        });
+    }
 }