Merge pull request #169 from arizhih/whisper-cpp-latest
feat: Add support for new whisper.cpp project structure
This commit is contained in:
commit
8596d2d98c
10 changed files with 305 additions and 396 deletions
|
|
@ -110,7 +110,6 @@ pub struct SystemInfo {
|
|||
pub fma: bool,
|
||||
pub f16c: bool,
|
||||
pub blas: bool,
|
||||
pub clblast: bool,
|
||||
pub cuda: bool,
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +122,6 @@ impl Default for SystemInfo {
|
|||
fma: whisper_rs_sys::ggml_cpu_has_fma() != 0,
|
||||
f16c: whisper_rs_sys::ggml_cpu_has_f16c() != 0,
|
||||
blas: whisper_rs_sys::ggml_cpu_has_blas() != 0,
|
||||
clblast: whisper_rs_sys::ggml_cpu_has_clblast() != 0,
|
||||
cuda: whisper_rs_sys::ggml_cpu_has_cuda() != 0,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -471,9 +471,6 @@ unsafe impl Sync for WhisperInnerContext {}
|
|||
|
||||
pub struct WhisperContextParameters<'a> {
|
||||
/// Use GPU if available.
|
||||
///
|
||||
/// **Warning**: Does not have an effect if OpenCL is selected as GPU backend
|
||||
/// (in that case, GPU is always enabled).
|
||||
pub use_gpu: bool,
|
||||
/// Enable flash attention, default false
|
||||
///
|
||||
|
|
|
|||
|
|
@ -222,16 +222,6 @@ impl<'a, 'b> FullParams<'a, 'b> {
|
|||
self.fp.max_tokens = max_tokens;
|
||||
}
|
||||
|
||||
/// # EXPERIMENTAL
|
||||
///
|
||||
/// Speed up audio ~2x by using phase vocoder.
|
||||
/// Note that this can significantly reduce the accuracy of the transcription.
|
||||
///
|
||||
/// Defaults to false.
|
||||
pub fn set_speed_up(&mut self, speed_up: bool) {
|
||||
self.fp.speed_up = speed_up;
|
||||
}
|
||||
|
||||
/// # EXPERIMENTAL
|
||||
///
|
||||
/// Enables debug mode, such as dumping the log mel spectrogram.
|
||||
|
|
@ -244,7 +234,6 @@ impl<'a, 'b> FullParams<'a, 'b> {
|
|||
/// # EXPERIMENTAL
|
||||
///
|
||||
/// Overwrite the audio context size. 0 = default.
|
||||
/// As with [set_speed_up](FullParams::set_speed_up), this can significantly reduce the accuracy of the transcription.
|
||||
///
|
||||
/// Defaults to 0.
|
||||
pub fn set_audio_ctx(&mut self, audio_ctx: c_int) {
|
||||
|
|
|
|||
|
|
@ -64,45 +64,6 @@ impl WhisperState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Convert raw PCM audio (floating point 32 bit) to log mel spectrogram.
|
||||
/// Applies a Phase Vocoder to speed up the audio x2.
|
||||
/// The resulting spectrogram is stored in the context transparently.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * pcm: The raw PCM audio.
|
||||
/// * threads: How many threads to use. Defaults to 1. Must be at least 1, returns an error otherwise.
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) on success, Err(WhisperError) on failure.
|
||||
///
|
||||
/// # C++ equivalent
|
||||
/// `int whisper_pcm_to_mel(struct whisper_context * ctx, const float * samples, int n_samples, int n_threads)`
|
||||
pub fn pcm_to_mel_phase_vocoder(
|
||||
&mut self,
|
||||
pcm: &[f32],
|
||||
threads: usize,
|
||||
) -> Result<(), WhisperError> {
|
||||
if threads < 1 {
|
||||
return Err(WhisperError::InvalidThreadCount);
|
||||
}
|
||||
let ret = unsafe {
|
||||
whisper_rs_sys::whisper_pcm_to_mel_phase_vocoder_with_state(
|
||||
self.ctx.ctx,
|
||||
self.ptr,
|
||||
pcm.as_ptr(),
|
||||
pcm.len() as c_int,
|
||||
threads as c_int,
|
||||
)
|
||||
};
|
||||
if ret == -1 {
|
||||
Err(WhisperError::UnableToCalculateSpectrogram)
|
||||
} else if ret == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(WhisperError::GenericError(ret))
|
||||
}
|
||||
}
|
||||
|
||||
/// This can be used to set a custom log mel spectrogram inside the provided whisper state.
|
||||
/// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
||||
///
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue