Merge pull request #169 from arizhih/whisper-cpp-latest

feat: Add support for new whisper.cpp project structure
This commit is contained in:
Niko 2024-09-01 23:15:18 +00:00 committed by GitHub
commit 8596d2d98c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 305 additions and 396 deletions

View file

@ -110,7 +110,6 @@ pub struct SystemInfo {
pub fma: bool,
pub f16c: bool,
pub blas: bool,
pub clblast: bool,
pub cuda: bool,
}
@ -123,7 +122,6 @@ impl Default for SystemInfo {
fma: whisper_rs_sys::ggml_cpu_has_fma() != 0,
f16c: whisper_rs_sys::ggml_cpu_has_f16c() != 0,
blas: whisper_rs_sys::ggml_cpu_has_blas() != 0,
clblast: whisper_rs_sys::ggml_cpu_has_clblast() != 0,
cuda: whisper_rs_sys::ggml_cpu_has_cuda() != 0,
}
}

View file

@ -471,9 +471,6 @@ unsafe impl Sync for WhisperInnerContext {}
pub struct WhisperContextParameters<'a> {
/// Use GPU if available.
///
/// **Warning**: Does not have an effect if OpenCL is selected as GPU backend
/// (in that case, GPU is always enabled).
pub use_gpu: bool,
/// Enable flash attention, default false
///

View file

@ -222,16 +222,6 @@ impl<'a, 'b> FullParams<'a, 'b> {
self.fp.max_tokens = max_tokens;
}
/// # EXPERIMENTAL
///
/// Speed up audio ~2x by using phase vocoder.
/// Note that this can significantly reduce the accuracy of the transcription.
///
/// Defaults to false.
pub fn set_speed_up(&mut self, speed_up: bool) {
self.fp.speed_up = speed_up;
}
/// # EXPERIMENTAL
///
/// Enables debug mode, such as dumping the log mel spectrogram.
@ -244,7 +234,6 @@ impl<'a, 'b> FullParams<'a, 'b> {
/// # EXPERIMENTAL
///
/// Overwrite the audio context size. 0 = default.
/// As with [set_speed_up](FullParams::set_speed_up), this can significantly reduce the accuracy of the transcription.
///
/// Defaults to 0.
pub fn set_audio_ctx(&mut self, audio_ctx: c_int) {

View file

@ -64,45 +64,6 @@ impl WhisperState {
}
}
/// Convert raw PCM audio (floating point 32 bit) to log mel spectrogram.
/// Applies a Phase Vocoder to speed up the audio x2.
/// The resulting spectrogram is stored in the context transparently.
///
/// # Arguments
/// * pcm: The raw PCM audio.
/// * threads: How many threads to use. Defaults to 1. Must be at least 1, returns an error otherwise.
///
/// # Returns
/// Ok(()) on success, Err(WhisperError) on failure.
///
/// # C++ equivalent
/// `int whisper_pcm_to_mel(struct whisper_context * ctx, const float * samples, int n_samples, int n_threads)`
pub fn pcm_to_mel_phase_vocoder(
&mut self,
pcm: &[f32],
threads: usize,
) -> Result<(), WhisperError> {
if threads < 1 {
return Err(WhisperError::InvalidThreadCount);
}
let ret = unsafe {
whisper_rs_sys::whisper_pcm_to_mel_phase_vocoder_with_state(
self.ctx.ctx,
self.ptr,
pcm.as_ptr(),
pcm.len() as c_int,
threads as c_int,
)
};
if ret == -1 {
Err(WhisperError::UnableToCalculateSpectrogram)
} else if ret == 0 {
Ok(())
} else {
Err(WhisperError::GenericError(ret))
}
}
/// This can be used to set a custom log mel spectrogram inside the provided whisper state.
/// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
///