From ee932caf3d47d85995a587d0dd3cbd5cf932e930 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Tue, 9 Apr 2024 18:08:44 +0300 Subject: [PATCH 1/2] feat: add get segment text lossy --- src/whisper_state.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/whisper_state.rs b/src/whisper_state.rs index 15f540a..459c29a 100644 --- a/src/whisper_state.rs +++ b/src/whisper_state.rs @@ -419,6 +419,29 @@ impl<'a> WhisperState<'a> { Ok(r_str.to_string()) } + /// Get the text of the specified segment. + /// This function differs from [WhisperState::full_get_segment_text] + /// in that it ignores invalid UTF-8 in whisper strings, + /// instead opting to replace it with the replacement character. + /// + /// # Arguments + /// * segment: Segment index. + /// + /// # Returns + /// Ok(String) on success, Err(WhisperError) on failure. + /// + /// # C++ equivalent + /// `const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment)` + pub fn full_get_segment_text_lossy(&self, segment: c_int) -> Result { + let ret = + unsafe { whisper_rs_sys::whisper_full_get_segment_text_from_state(self.ptr, segment) }; + if ret.is_null() { + return Err(WhisperError::NullPointer); + } + let c_str = unsafe { CStr::from_ptr(ret) }; + Ok(c_str.to_string_lossy().to_string()) + } + /// Get the bytes of the specified segment. /// /// # Arguments From f3c86de64f447f762d13c6c7ea0ce835e924a4fb Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Mon, 15 Apr 2024 00:39:09 +0300 Subject: [PATCH 2/2] format with cargo fmt --- src/whisper_state.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/whisper_state.rs b/src/whisper_state.rs index 459c29a..d9b02c3 100644 --- a/src/whisper_state.rs +++ b/src/whisper_state.rs @@ -423,7 +423,7 @@ impl<'a> WhisperState<'a> { /// This function differs from [WhisperState::full_get_segment_text] /// in that it ignores invalid UTF-8 in whisper strings, /// instead opting to replace it with the replacement character. - /// + /// /// # Arguments /// * segment: Segment index. ///