Merge pull request #169 from arizhih/whisper-cpp-latest

feat: Add support for new whisper.cpp project structure
2024-09-01 23:15:18 +00:00 · 2024-09-01 23:15:18 +00:00 · 8596d2d98c
commit 8596d2d98c
parent 166da11ebd a6d25d8797
10 changed files with 305 additions and 396 deletions
--- a/src/standalone.rs
+++ b/src/standalone.rs
@ -110,7 +110,6 @@ pub struct SystemInfo {
    pub fma: bool,
    pub f16c: bool,
    pub blas: bool,
-    pub clblast: bool,
    pub cuda: bool,
 }

@ -123,7 +122,6 @@ impl Default for SystemInfo {
                fma: whisper_rs_sys::ggml_cpu_has_fma() != 0,
                f16c: whisper_rs_sys::ggml_cpu_has_f16c() != 0,
                blas: whisper_rs_sys::ggml_cpu_has_blas() != 0,
-                clblast: whisper_rs_sys::ggml_cpu_has_clblast() != 0,
                cuda: whisper_rs_sys::ggml_cpu_has_cuda() != 0,
            }
        }
--- a/src/whisper_ctx.rs
+++ b/src/whisper_ctx.rs
@ -471,9 +471,6 @@ unsafe impl Sync for WhisperInnerContext {}

 pub struct WhisperContextParameters<'a> {
    /// Use GPU if available.
-    ///
-    /// **Warning**: Does not have an effect if OpenCL is selected as GPU backend
-    /// (in that case, GPU is always enabled).
    pub use_gpu: bool,
    /// Enable flash attention, default false
    ///
--- a/src/whisper_params.rs
+++ b/src/whisper_params.rs
@ -222,16 +222,6 @@ impl<'a, 'b> FullParams<'a, 'b> {
        self.fp.max_tokens = max_tokens;
    }

-    /// # EXPERIMENTAL
-    ///
-    /// Speed up audio ~2x by using phase vocoder.
-    /// Note that this can significantly reduce the accuracy of the transcription.
-    ///
-    /// Defaults to false.
-    pub fn set_speed_up(&mut self, speed_up: bool) {
-        self.fp.speed_up = speed_up;
-    }
-
    /// # EXPERIMENTAL
    ///
    /// Enables debug mode, such as dumping the log mel spectrogram.
@ -244,7 +234,6 @@ impl<'a, 'b> FullParams<'a, 'b> {
    /// # EXPERIMENTAL
    ///
    /// Overwrite the audio context size. 0 = default.
-    /// As with [set_speed_up](FullParams::set_speed_up), this can significantly reduce the accuracy of the transcription.
    ///
    /// Defaults to 0.
    pub fn set_audio_ctx(&mut self, audio_ctx: c_int) {
--- a/src/whisper_state.rs
+++ b/src/whisper_state.rs
@ -64,45 +64,6 @@ impl WhisperState {
        }
    }

-    /// Convert raw PCM audio (floating point 32 bit) to log mel spectrogram.
-    /// Applies a Phase Vocoder to speed up the audio x2.
-    /// The resulting spectrogram is stored in the context transparently.
-    ///
-    /// # Arguments
-    /// * pcm: The raw PCM audio.
-    /// * threads: How many threads to use. Defaults to 1. Must be at least 1, returns an error otherwise.
-    ///
-    /// # Returns
-    /// Ok(()) on success, Err(WhisperError) on failure.
-    ///
-    /// # C++ equivalent
-    /// `int whisper_pcm_to_mel(struct whisper_context * ctx, const float * samples, int n_samples, int n_threads)`
-    pub fn pcm_to_mel_phase_vocoder(
-        &mut self,
-        pcm: &[f32],
-        threads: usize,
-    ) -> Result<(), WhisperError> {
-        if threads < 1 {
-            return Err(WhisperError::InvalidThreadCount);
-        }
-        let ret = unsafe {
-            whisper_rs_sys::whisper_pcm_to_mel_phase_vocoder_with_state(
-                self.ctx.ctx,
-                self.ptr,
-                pcm.as_ptr(),
-                pcm.len() as c_int,
-                threads as c_int,
-            )
-        };
-        if ret == -1 {
-            Err(WhisperError::UnableToCalculateSpectrogram)
-        } else if ret == 0 {
-            Ok(())
-        } else {
-            Err(WhisperError::GenericError(ret))
-        }
-    }
-
    /// This can be used to set a custom log mel spectrogram inside the provided whisper state.
    /// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
    ///