Merge branch 'master' into log-trampolines

This commit is contained in:
Niko 2024-03-15 20:04:10 +00:00 committed by GitHub
commit 8bc3a435f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 106 additions and 22 deletions

View file

@ -4,7 +4,7 @@ exclude = ["examples/full_usage"]
[package]
name = "whisper-rs"
version = "0.10.1"
version = "0.11.0"
edition = "2021"
description = "Rust bindings for whisper.cpp"
license = "Unlicense"
@ -20,11 +20,11 @@ tracing = { version = "0.1", optional = true }
[dev-dependencies]
hound = "3.5.0"
rand = "0.8.4"
[features]
default = []
simd = []
coreml = ["whisper-rs-sys/coreml"]
cuda = ["whisper-rs-sys/cuda", "_gpu"]
opencl = ["whisper-rs-sys/opencl"]

View file

@ -41,6 +41,10 @@ pub enum WhisperError {
FailedToCreateState,
/// No samples were provided.
NoSamples,
/// Input and output slices were not the same length.
InputOutputLengthMismatch { input_len: usize, output_len: usize },
/// Input slice was not an even number of samples.
HalfSampleMissing(usize),
}
impl From<Utf8Error> for WhisperError {
@ -109,6 +113,24 @@ impl std::fmt::Display for WhisperError {
c_int
),
NoSamples => write!(f, "Input sample buffer was empty."),
InputOutputLengthMismatch {
output_len,
input_len,
} => {
write!(
f,
"Input and output slices were not the same length. Input: {}, Output: {}",
input_len, output_len
)
}
HalfSampleMissing(size) => {
write!(
f,
"Input slice was not an even number of samples, got {}, expected {}",
size,
size + 1
)
}
}
}
}

View file

@ -1,4 +1,5 @@
#![allow(clippy::uninlined_format_args)]
#![cfg_attr(test, feature(test))]
mod error;
mod standalone;

View file

@ -1,33 +1,59 @@
use crate::WhisperError;
/// Convert an array of 16 bit mono audio samples to a vector of 32 bit floats.
///
/// This variant does not use SIMD instructions.
///
/// # Arguments
/// * `samples` - The array of 16 bit mono audio samples.
/// * `output` - The vector of 32 bit floats to write the converted samples to.
///
/// # Returns
/// A vector of 32 bit floats.
pub fn convert_integer_to_float_audio(samples: &[i16]) -> Vec<f32> {
let mut floats = Vec::with_capacity(samples.len());
for sample in samples {
floats.push(*sample as f32 / 32768.0);
}
floats
/// # Panics
/// * if `samples.len != output.len()`
///
/// # Examples
/// ```
/// # use whisper_rs::convert_integer_to_float_audio;
/// let samples = [0i16; 1024];
/// let mut output = vec![0.0f32; samples.len()];
/// convert_integer_to_float_audio(&samples, &mut output).expect("input and output lengths should be equal");
/// ```
pub fn convert_integer_to_float_audio(
samples: &[i16],
output: &mut [f32],
) -> Result<(), WhisperError> {
if samples.len() != output.len() {
return Err(WhisperError::InputOutputLengthMismatch {
input_len: samples.len(),
output_len: output.len(),
});
}
/// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
///
/// This variant does not use SIMD instructions.
for (input, output) in samples.iter().zip(output.iter_mut()) {
*output = *input as f32 / 32768.0;
}
Ok(())
}
/// Convert 32-bit floating point stereo PCM audio to 32-bit floating point mono PCM audio.
///
/// # Arguments
/// * `samples` - The array of 32 bit floating point stereo PCM audio samples.
/// * `samples` - The array of 32-bit floating point stereo PCM audio samples.
///
/// # Errors
/// * if `samples.len()` is odd
///
/// # Returns
/// A vector of 32 bit floating point mono PCM audio samples.
pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, &'static str> {
/// A vector of 32-bit floating point mono PCM audio samples.
///
/// # Examples
/// ```
/// # use whisper_rs::convert_stereo_to_mono_audio;
/// let samples = [0.0f32; 1024];
/// let mono = convert_stereo_to_mono_audio(&samples).expect("should be no half samples missing");
/// ```
pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, WhisperError> {
if samples.len() & 1 != 0 {
return Err("The stereo audio vector has an odd number of samples. \
This means a half-sample is missing somewhere");
return Err(WhisperError::HalfSampleMissing(samples.len()));
}
Ok(samples
@ -36,16 +62,51 @@ pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, &'stati
.collect())
}
#[cfg(feature = "simd")]
#[cfg(test)]
mod test {
use super::*;
use rand::distributions::{Distribution, Standard};
use rand::Rng;
use std::hint::black_box;
extern crate test;
fn random_sample_data<T>() -> Vec<T>
where
Standard: Distribution<T>,
{
const SAMPLE_SIZE: usize = 1_048_576;
let mut rng = rand::thread_rng();
let mut samples = Vec::with_capacity(SAMPLE_SIZE);
for _ in 0..SAMPLE_SIZE {
samples.push(rng.gen::<T>());
}
samples
}
#[test]
pub fn assert_stereo_to_mono_err() {
// fake some sample data
let samples = (0u16..1029).map(f32::from).collect::<Vec<f32>>();
let samples = random_sample_data::<f32>();
let mono = convert_stereo_to_mono_audio(&samples);
assert!(mono.is_err());
}
#[bench]
pub fn bench_stereo_to_mono(b: &mut test::Bencher) {
let samples = random_sample_data::<f32>();
b.iter(|| black_box(convert_stereo_to_mono_audio(black_box(&samples))));
}
#[bench]
pub fn bench_integer_to_float(b: &mut test::Bencher) {
let samples = random_sample_data::<i16>();
let mut output = vec![0.0f32; samples.len()];
b.iter(|| {
black_box(convert_integer_to_float_audio(
black_box(&samples),
black_box(&mut output),
))
});
}
}