diff --git a/README.md b/README.md index 3539352..89678f4 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,10 @@ fn main() { ).expect("failed to load model"); // create a params object - let params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); + let params = FullParams::new(SamplingStrategy::BeamSearch { + beam_size: 5, + patience: -1.0, + }); // assume we have a buffer of audio data // here we'll make a fake one, floating point samples, 32 bit, 16KHz, mono @@ -40,20 +43,16 @@ fn main() { .expect("failed to run model"); // fetch the results - let num_segments = state - .full_n_segments() - .expect("failed to get number of segments"); - for i in 0..num_segments { - let segment = state - .full_get_segment_text(i) - .expect("failed to get segment"); - let start_timestamp = state - .full_get_segment_t0(i) - .expect("failed to get segment start timestamp"); - let end_timestamp = state - .full_get_segment_t1(i) - .expect("failed to get segment end timestamp"); - println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); + for segment in state.as_iter() { + println!( + "[{} - {}]: {}", + // note start and end timestamps are in centiseconds + // (10s of milliseconds) + segment.start_timestamp(), + segment.end_timestamp(), + // the Display impl for WhisperSegment will replace invalid UTF-8 with the Unicode replacement character + segment + ); } } ``` diff --git a/examples/basic_use.rs b/examples/basic_use.rs index 1868c66..53d2aab 100644 --- a/examples/basic_use.rs +++ b/examples/basic_use.rs @@ -13,7 +13,6 @@ fn main() { let wav_path = std::env::args() .nth(2) .expect("Please specify path to wav file as argument 2"); - let language = "en"; let samples: Vec = hound::WavReader::open(wav_path) .unwrap() @@ -24,18 +23,24 @@ fn main() { // load a context and model let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default()) .expect("failed to load model"); - + // create a state attached to the model let mut state = ctx.create_state().expect("failed to create state"); + // the sampling strategy will determine how accurate your final output is going to be + // typically BeamSearch is more accurate at the cost of significantly increased CPU time let mut params = FullParams::new(SamplingStrategy::BeamSearch { + // whisper.cpp defaults to a beam size of 5, a reasonable default beam_size: 5, + // this parameter is currently unused but defaults to -1.0 patience: -1.0, }); - // and set the language to translate to to english - params.set_language(Some(&language)); + // and set the language to translate to as english + params.set_language(Some("en")); // we also explicitly disable anything that prints to stdout + // despite all of this you will still get things printing to stdout, + // be prepared to deal with it params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); @@ -61,8 +66,11 @@ fn main() { for segment in state.as_iter() { println!( "[{} - {}]: {}", + // these timestamps are in centiseconds (10s of milliseconds) segment.start_timestamp(), segment.end_timestamp(), + // this default Display implementation will result in any invalid UTF-8 + // being converted into the Unicode replacement character, U+FFFD segment ); }