From 05d072ffc4dd8a4f88452e3f15096e5fb2bd218b Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Thu, 23 Mar 2023 12:38:54 -0400
Subject: [PATCH 1/8] Updated Cargo.toml and audio_transcription example to not
 fail cargo test

---
 Cargo.toml                      | 5 ++++-
 examples/audio_transcription.rs | 7 ++++---
 2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 1de051c..36fdd00 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,8 +16,11 @@ repository = "https://github.com/tazz4843/whisper-rs"
 [dependencies]
 whisper-rs-sys = { path = "sys", version = "0.3" }
 
+[dev-dependencies]
+hound = "3.5.0"
+
 [features]
 simd = []
 
 [package.metadata.docs.rs]
-features = ["simd"]
\ No newline at end of file
+features = ["simd"]
diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
index bf6b3d8..7831f6d 100644
--- a/examples/audio_transcription.rs
+++ b/examples/audio_transcription.rs
@@ -1,9 +1,10 @@
 // This example is not going to build in this folder.
-// You need to copy this code into your project and add the whisper_rs dependency in your cargo.toml
+// You need to copy this code into your project and add the dependencies whisper_rs and hound in your cargo.toml
 
 use std::fs::File;
 use std::io::Write;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
+use hound;
 
 /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
 fn main() {
@@ -14,7 +15,7 @@ fn main() {
     // Create a params object for running the model.
     // Currently, only the Greedy sampling strategy is implemented, with BeamSearch as a WIP.
     // The number of past samples to consider defaults to 0.
-    let mut params = FullParams::new(SamplingStrategy::Greedy { n_past: 0 });
+    let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
 
     // Edit params as needed.
     // Set the number of threads to use to 1.
@@ -22,7 +23,7 @@ fn main() {
     // Enable translation.
     params.set_translate(true);
     // Set the language to translate to to English.
-    params.set_language("en");
+    params.set_language(Some("en"));
     // Disable anything that prints to stdout.
     params.set_print_special(false);
     params.set_print_progress(false);

From 31260475dc3af6d617d6672d41feedf08e0a3653 Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Thu, 23 Mar 2023 12:41:26 -0400
Subject: [PATCH 2/8] Change assert_stereo_to_mono_simd test to exibit issue of
 odd length value

---
 src/utilities.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utilities.rs b/src/utilities.rs
index 4d210b8..7fdc057 100644
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -120,7 +120,7 @@ mod test {
     pub fn assert_stereo_to_mono_simd() {
         // fake some sample data, of 1028 elements
         let mut samples = Vec::with_capacity(1028);
-        for i in 0..1028 {
+        for i in 0..1029 {
             samples.push(i as f32);
         }
         let mono_simd = convert_stereo_to_mono_audio_simd(&samples);

From 1873288db0a9bda8173429b4807fda8f1e3722a4 Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Thu, 23 Mar 2023 13:44:53 -0400
Subject: [PATCH 3/8] Fixed odd value length bug in
 convert_stereo_to_mono_audio functions

---
 src/utilities.rs | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/utilities.rs b/src/utilities.rs
index 7fdc057..2da86ed 100644
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -54,6 +54,7 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec<f32> {
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
 ///
+/// If there are an odd number of samples, the last sample is dropped.
 /// This variant does not use SIMD instructions.
 ///
 /// # Arguments
@@ -62,15 +63,12 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec<f32> {
 /// # Returns
 /// A vector of 32 bit floating point mono PCM audio samples.
 pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec<f32> {
-    let mut mono = Vec::with_capacity(samples.len() / 2);
-    for i in (0..samples.len()).step_by(2) {
-        mono.push((samples[i] + samples[i + 1]) / 2.0);
-    }
-    mono
+    samples.chunks_exact(2).map(|x| (x[0] + x[1]) / 2.0).collect()
 }
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
 ///
+/// If there are an odd number of samples, the last sample is dropped.
 /// This variant uses SIMD instructions, and as such is only available on
 /// nightly Rust.
 ///
@@ -104,9 +102,7 @@ pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec<f32> {
     // Handle the remainder.
     // do this normally because it's only a few samples and the overhead of
     // converting to SIMD is not worth it.
-    for i in (0..remainder.len()).step_by(2) {
-        mono.push((remainder[i] + remainder[i + 1]) / 2.0);
-    }
+    mono.extend(convert_stereo_to_mono_audio(remainder));
 
     mono
 }

From 445a072bdae821b7801367c65f755fc86eb52239 Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Thu, 23 Mar 2023 13:46:36 -0400
Subject: [PATCH 4/8] Turned off warning for unused variable in
 audio_transcription example

---
 examples/audio_transcription.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
index 7831f6d..d795c39 100644
--- a/examples/audio_transcription.rs
+++ b/examples/audio_transcription.rs
@@ -32,6 +32,7 @@ fn main() {
 
     // Open the audio file.
     let mut reader = hound::WavReader::open("audio.wav").expect("failed to open file");
+    #[allow(unused_variables)]
     let hound::WavSpec {
         channels,
         sample_rate,

From bad88c38d831c621d768230eacf83283211786eb Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Sun, 26 Mar 2023 11:44:24 -0400
Subject: [PATCH 5/8] Change comments for convert_stereo_to_mono_audio
 functions

---
 src/utilities.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/utilities.rs b/src/utilities.rs
index 2da86ed..f521f7e 100644
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -54,7 +54,7 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec<f32> {
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
 ///
-/// If there are an odd number of samples, the last sample is dropped.
+/// If there are an odd number of samples, the last half-sample is dropped.
 /// This variant does not use SIMD instructions.
 ///
 /// # Arguments
@@ -68,7 +68,7 @@ pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec<f32> {
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
 ///
-/// If there are an odd number of samples, the last sample is dropped.
+/// If there are an odd number of samples, the last half-sample is dropped.
 /// This variant uses SIMD instructions, and as such is only available on
 /// nightly Rust.
 ///

From 30ff41989b2879e805454361b3ede345e1f85264 Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Mon, 27 Mar 2023 10:35:23 -0400
Subject: [PATCH 6/8] Ran cargo fmt

---
 examples/audio_transcription.rs | 2 +-
 src/utilities.rs                | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
index d795c39..b2f3445 100644
--- a/examples/audio_transcription.rs
+++ b/examples/audio_transcription.rs
@@ -1,10 +1,10 @@
 // This example is not going to build in this folder.
 // You need to copy this code into your project and add the dependencies whisper_rs and hound in your cargo.toml
 
+use hound;
 use std::fs::File;
 use std::io::Write;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
-use hound;
 
 /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
 fn main() {
diff --git a/src/utilities.rs b/src/utilities.rs
index f521f7e..79bb9c2 100644
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -63,7 +63,10 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec<f32> {
 /// # Returns
 /// A vector of 32 bit floating point mono PCM audio samples.
 pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec<f32> {
-    samples.chunks_exact(2).map(|x| (x[0] + x[1]) / 2.0).collect()
+    samples
+        .chunks_exact(2)
+        .map(|x| (x[0] + x[1]) / 2.0)
+        .collect()
 }
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.

From d8271e31d09b3a189177a33bc21819a0f7875eee Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Mon, 27 Mar 2023 11:49:13 -0400
Subject: [PATCH 7/8] Changed convert_stereo_to_mono_audio to return a Result

---
 examples/audio_transcription.rs |  5 ++--
 examples/basic_use.rs           |  6 ++--
 src/utilities.rs                | 49 ++++++++++++++++++++++++---------
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
index b2f3445..7ab716d 100644
--- a/examples/audio_transcription.rs
+++ b/examples/audio_transcription.rs
@@ -7,7 +7,7 @@ use std::io::Write;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
 
 /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
-fn main() {
+fn main() -> Result<(), &'static str> {
     // Load a context and model.
     let mut ctx = WhisperContext::new("example/path/to/model/whisper.cpp/models/ggml-base.en.bin")
         .expect("failed to load model");
@@ -52,7 +52,7 @@ fn main() {
     // These utilities are provided for convenience, but can be replaced with custom conversion logic.
     // SIMD variants of these functions are also available on nightly Rust (see the docs).
     if channels == 2 {
-        audio = whisper_rs::convert_stereo_to_mono_audio(&audio);
+        audio = whisper_rs::convert_stereo_to_mono_audio(&audio)?;
     } else if channels != 1 {
         panic!(">2 channels unsupported");
     }
@@ -85,4 +85,5 @@ fn main() {
         file.write_all(line.as_bytes())
             .expect("failed to write to file");
     }
+    Ok(())
 }
diff --git a/examples/basic_use.rs b/examples/basic_use.rs
index 8d0f219..727deba 100644
--- a/examples/basic_use.rs
+++ b/examples/basic_use.rs
@@ -5,7 +5,7 @@ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
 // note that running this example will not do anything, as it is just a
 // demonstration of how to use the library, and actual usage requires
 // more dependencies than the base library.
-pub fn usage() {
+pub fn usage() -> Result<(), &'static str> {
     // load a context and model
     let mut ctx = WhisperContext::new("path/to/model").expect("failed to load model");
 
@@ -38,7 +38,7 @@ pub fn usage() {
     // SIMD variants of these functions are also available, but only on nightly Rust: see the docs
     let audio_data = whisper_rs::convert_stereo_to_mono_audio(
         &whisper_rs::convert_integer_to_float_audio(&audio_data),
-    );
+    )?;
 
     // now we can run the model
     ctx.full(params, &audio_data[..])
@@ -52,6 +52,8 @@ pub fn usage() {
         let end_timestamp = ctx.full_get_segment_t1(i);
         println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
     }
+
+    Ok(())
 }
 
 fn main() {
diff --git a/src/utilities.rs b/src/utilities.rs
index 79bb9c2..b976475 100644
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -54,7 +54,6 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec<f32> {
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
 ///
-/// If there are an odd number of samples, the last half-sample is dropped.
 /// This variant does not use SIMD instructions.
 ///
 /// # Arguments
@@ -62,16 +61,20 @@ pub fn convert_integer_to_float_audio_simd(samples: &[i16]) -> Vec<f32> {
 ///
 /// # Returns
 /// A vector of 32 bit floating point mono PCM audio samples.
-pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec<f32> {
-    samples
+pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Result<Vec<f32>, &'static str> {
+    if samples.len() & 1 != 0 {
+        return Err("The stereo audio vector has an odd number of samples. \
+            This means a half-sample is missing somewhere");
+    }
+
+    Ok(samples
         .chunks_exact(2)
         .map(|x| (x[0] + x[1]) / 2.0)
-        .collect()
+        .collect())
 }
 
 /// Convert 32 bit floating point stereo PCM audio to 32 bit floating point mono PCM audio.
 ///
-/// If there are an odd number of samples, the last half-sample is dropped.
 /// This variant uses SIMD instructions, and as such is only available on
 /// nightly Rust.
 ///
@@ -81,7 +84,7 @@ pub fn convert_stereo_to_mono_audio(samples: &[f32]) -> Vec<f32> {
 /// # Returns
 /// A vector of 32 bit floating point mono PCM audio samples.
 #[cfg(feature = "simd")]
-pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec<f32> {
+pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Result<Vec<f32>, &'static str> {
     let mut mono = Vec::with_capacity(samples.len() / 2);
 
     let div_array = f32x16::splat(2.0);
@@ -105,9 +108,9 @@ pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec<f32> {
     // Handle the remainder.
     // do this normally because it's only a few samples and the overhead of
     // converting to SIMD is not worth it.
-    mono.extend(convert_stereo_to_mono_audio(remainder));
+    mono.extend(convert_stereo_to_mono_audio(remainder)?);
 
-    mono
+    Ok(mono)
 }
 
 #[cfg(feature = "simd")]
@@ -115,13 +118,33 @@ pub fn convert_stereo_to_mono_audio_simd(samples: &[f32]) -> Vec<f32> {
 mod test {
     use super::*;
 
+    #[test]
+    pub fn assert_stereo_to_mono_err() {
+        // fake some sample data
+        let samples = (0u16..1029).map(f32::from).collect::<Vec<f32>>();
+        let mono = convert_stereo_to_mono_audio(&samples);
+        assert!(mono.is_err());
+    }
+}
+
+#[cfg(feature = "simd")]
+#[cfg(test)]
+mod test_simd {
+    use super::*;
+
     #[test]
     pub fn assert_stereo_to_mono_simd() {
-        // fake some sample data, of 1028 elements
-        let mut samples = Vec::with_capacity(1028);
-        for i in 0..1029 {
-            samples.push(i as f32);
-        }
+        // fake some sample data
+        let samples = (0u16..1028).map(f32::from).collect::<Vec<f32>>();
+        let mono_simd = convert_stereo_to_mono_audio_simd(&samples);
+        let mono = convert_stereo_to_mono_audio(&samples);
+        assert_eq!(mono_simd, mono);
+    }
+
+    #[test]
+    pub fn assert_stereo_to_mono_simd_err() {
+        // fake some sample data
+        let samples = (0u16..1029).map(f32::from).collect::<Vec<f32>>();
         let mono_simd = convert_stereo_to_mono_audio_simd(&samples);
         let mono = convert_stereo_to_mono_audio(&samples);
         assert_eq!(mono_simd, mono);

From 9a3efcca5f6dd3008ab32a3fd126ff497e268748 Mon Sep 17 00:00:00 2001
From: James Bruska <jamesbthecoder@gmail.com>
Date: Mon, 27 Mar 2023 11:51:46 -0400
Subject: [PATCH 8/8] Changed version to 0.5.0 due to public API change

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 36fdd00..117dabf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,7 @@ exclude = ["examples/full_usage"]
 
 [package]
 name = "whisper-rs"
-version = "0.4.0"
+version = "0.5.0"
 edition = "2021"
 description = "Rust bindings for whisper.cpp"
 license = "Unlicense"