diff --git a/src/error.rs b/src/error.rs index 51ab0bc..8e4a9aa 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,7 +1,10 @@ use std::ffi::{c_int, NulError}; use std::str::Utf8Error; -/// Whisper tends to output errors to stderr, so if an error occurs, check stderr. +/// If you have not configured a logging trampoline with [crate::whisper_sys_log::install_whisper_log_trampoline] or +/// [crate::whisper_sys_tracing::install_whisper_tracing_trampoline], +/// then `whisper.cpp`'s errors will be output to stderr, +/// so you can check there for more information upon receiving a `WhisperError`. #[derive(Debug, Copy, Clone)] pub enum WhisperError { /// Failed to create a new context. diff --git a/src/whisper_ctx.rs b/src/whisper_ctx.rs index 330391e..25b3013 100644 --- a/src/whisper_ctx.rs +++ b/src/whisper_ctx.rs @@ -6,7 +6,8 @@ use std::ffi::{c_int, CStr, CString}; /// Safe Rust wrapper around a Whisper context. /// /// You likely want to create this with [WhisperContext::new_with_params], -/// then run a full transcription with [WhisperContext::full]. +/// create a state with [WhisperContext::create_state], +/// then run a full transcription with [WhisperState::full]. #[derive(Debug)] pub struct WhisperContext { ctx: *mut whisper_rs_sys::whisper_context, @@ -138,7 +139,7 @@ impl WhisperContext { /// * text: The text to convert. /// /// # Returns - /// Ok(Vec) on success, Err(WhisperError) on failure. + /// `Ok(Vec)` on success, `Err(WhisperError)` on failure. /// /// # C++ equivalent /// `int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_token * tokens, int n_max_tokens);` diff --git a/src/whisper_params.rs b/src/whisper_params.rs index 318083e..92552dd 100644 --- a/src/whisper_params.rs +++ b/src/whisper_params.rs @@ -121,7 +121,7 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.single_segment = single_segment; } - /// Print special tokens (e.g. , , , etc.) + /// Print special tokens (e.g. ``, ``, ``, etc.) /// /// Defaults to false. pub fn set_print_special(&mut self, print_special: bool) { @@ -285,7 +285,8 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.detect_language = detect_language; } - /// Set suppress_blank. See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89 + /// Set suppress_blank. + /// See /// for more information. /// /// Defaults to true. @@ -293,7 +294,8 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.suppress_blank = suppress_blank; } - /// Set suppress_non_speech_tokens. See https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253 + /// Set suppress_non_speech_tokens. + /// See /// for more information. /// /// Defaults to false. @@ -301,14 +303,16 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.suppress_non_speech_tokens = suppress_non_speech_tokens; } - /// Set initial decoding temperature. See https://ai.stackexchange.com/a/32478 for more information. + /// Set initial decoding temperature. + /// See for more information. /// /// Defaults to 0.0. pub fn set_temperature(&mut self, temperature: f32) { self.fp.temperature = temperature; } - /// Set max_initial_ts. See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97 + /// Set max_initial_ts. + /// See /// for more information. /// /// Defaults to 1.0. @@ -316,7 +320,8 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.max_initial_ts = max_initial_ts; } - /// Set length_penalty. See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L267 + /// Set length_penalty. + /// See /// for more information. /// /// Defaults to -1.0. @@ -324,7 +329,8 @@ impl<'a, 'b> FullParams<'a, 'b> { self.fp.length_penalty = length_penalty; } - /// Set temperature_inc. See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L274-L278 + /// Set temperature_inc. + /// See /// for more information. /// /// Defaults to 0.2. @@ -333,14 +339,15 @@ impl<'a, 'b> FullParams<'a, 'b> { } /// Set entropy_thold. Similar to OpenAI's compression_ratio_threshold. - /// See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L274-L278 for more information. + /// See for more information. /// /// Defaults to 2.4. pub fn set_entropy_thold(&mut self, entropy_thold: f32) { self.fp.entropy_thold = entropy_thold; } - /// Set logprob_thold. See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L274-L278 + /// Set logprob_thold. + /// See /// for more information. /// /// Defaults to -1.0. diff --git a/src/whisper_state.rs b/src/whisper_state.rs index f336638..79029aa 100644 --- a/src/whisper_state.rs +++ b/src/whisper_state.rs @@ -112,7 +112,7 @@ impl<'a> WhisperState<'a> { /// # Note /// This is a low-level function. /// If you're a typical user, you probably don't want to use this function. - /// See instead [WhisperContext::pcm_to_mel]. + /// See instead [WhisperState::pcm_to_mel]. /// /// # Arguments /// * data: The log mel spectrogram. @@ -144,7 +144,7 @@ impl<'a> WhisperState<'a> { } /// Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper state. - /// Make sure to call [WhisperContext::pcm_to_mel] or [WhisperContext::set_mel] first. + /// Make sure to call [WhisperState::pcm_to_mel] or [WhisperState::set_mel] first. /// /// # Arguments /// * offset: Can be used to specify the offset of the first frame in the spectrogram. Usually 0. @@ -177,7 +177,7 @@ impl<'a> WhisperState<'a> { } /// Run the Whisper decoder to obtain the logits and probabilities for the next token. - /// Make sure to call [WhisperContext::encode] first. + /// Make sure to call [WhisperState::encode] first. /// tokens + n_tokens is the provided context for the decoder. /// /// # Arguments @@ -228,7 +228,7 @@ impl<'a> WhisperState<'a> { /// * n_threads: How many threads to use. Defaults to 1. Must be at least 1, returns an error otherwise. /// /// # Returns - /// Ok(Vec) on success, Err(WhisperError) on failure. + /// `Ok(Vec)` on success, `Err(WhisperError)` on failure. /// /// # C++ equivalent /// `int whisper_lang_auto_detect(struct whisper_context * ctx, int offset_ms, int n_threads, float * lang_probs)` @@ -270,7 +270,7 @@ impl<'a> WhisperState<'a> { } // logit functions - /// Gets logits obtained from the last call to [WhisperContext::decode]. + /// Gets logits obtained from the last call to [WhisperState::decode]. /// As of whisper.cpp 1.4.1, only a single row of logits is available, corresponding to the last token in the input. /// /// # Returns @@ -319,7 +319,8 @@ impl<'a> WhisperState<'a> { /// /// # Arguments /// * params: [crate::FullParams] struct. - /// * pcm: PCM audio data. + /// * pcm: raw PCM audio data, 32 bit floating point at a sample rate of 16 kHz, 1 channel. + /// See utilities in the root of this crate for functions to convert audio to this format. /// /// # Returns /// Ok(c_int) on success, Err(WhisperError) on failure. @@ -424,7 +425,7 @@ impl<'a> WhisperState<'a> { /// * segment: Segment index. /// /// # Returns - /// Ok(Vec) on success, Err(WhisperError) on failure. + /// `Ok(Vec)` on success, `Err(WhisperError)` on failure. /// /// # C++ equivalent /// `const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment)`