From a7cdb545540e9bf04d65a06957327e5ed2e44f7f Mon Sep 17 00:00:00 2001 From: Dave Lage Date: Tue, 6 Dec 2022 19:57:42 -0500 Subject: [PATCH 1/6] Update readme with new usage --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a04d5fb..d05270b 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,14 @@ Rust bindings to [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) ## Usage ```rust +use whisper_rs::{WhisperContext, FullParams, SamplingStrategy}; + fn main() { // load a context and model let mut ctx = WhisperContext::new("path/to/model").expect("failed to load model"); // create a params object - let mut params = FullParams::new(DecodeStrategy::Greedy { n_past: 0 }); + let mut params = FullParams::new(SamplingStrategy::Greedy { n_past: 0 }); // assume we have a buffer of audio data // here we'll make a fake one, floating point samples, 32 bit, 16KHz, mono From 15dbd58c7edcb93e31c0e731598a80da5bcb2b74 Mon Sep 17 00:00:00 2001 From: Niklas Korz Date: Thu, 15 Dec 2022 16:40:37 +0100 Subject: [PATCH 2/6] Link against C++ standard library and macOS Accelerate framework --- sys/build.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sys/build.rs b/sys/build.rs index e7a60aa..5c3077d 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -4,6 +4,16 @@ use std::env; use std::path::PathBuf; fn main() { + let target = env::var("TARGET").unwrap(); + // Link C++ standard library + if let Some(cpp_stdlib) = get_cpp_link_stdlib(&target) { + println!("cargo:rustc-link-lib=dylib={}", cpp_stdlib); + } + // Link macOS Accelerate framework for matrix calculations + if target.contains("apple") { + println!("cargo:rustc-link-lib=framework=Accelerate"); + } + println!("cargo:rustc-link-search={}", env::var("OUT_DIR").unwrap()); println!("cargo:rustc-link-lib=static=whisper"); println!("cargo:rerun-if-changed=wrapper.h"); @@ -66,3 +76,20 @@ fn main() { .status() .expect("Failed to clean whisper build directory"); } + +// From https://github.com/alexcrichton/cc-rs/blob/fba7feded71ee4f63cfe885673ead6d7b4f2f454/src/lib.rs#L2462 +fn get_cpp_link_stdlib(target: &str) -> Option<&'static str> { + if target.contains("msvc") { + None + } else if target.contains("apple") { + Some("c++") + } else if target.contains("freebsd") { + Some("c++") + } else if target.contains("openbsd") { + Some("c++") + } else if target.contains("android") { + Some("c++_shared") + } else { + Some("stdc++") + } +} From bc51b4d4fc26f436c5eefe421b4edcc062d1e13d Mon Sep 17 00:00:00 2001 From: reisub0 Date: Sat, 24 Dec 2022 22:49:57 +0100 Subject: [PATCH 3/6] error[E0599]: no method named `set_print_special_tokens` found for struct `FullParams` in the current scope --> examples/basic_use.rs:23:12 | 23 | params.set_print_special_tokens(false); | ^^^^^^^^^^^^^^^^^^^^^^^^ help: there is a method with a similar name: `set_print_special` For more information about this error, try `rustc --explain E0599`. --- examples/basic_use.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/basic_use.rs b/examples/basic_use.rs index b021c93..2330f1e 100644 --- a/examples/basic_use.rs +++ b/examples/basic_use.rs @@ -20,7 +20,7 @@ pub fn usage() { // and set the language to translate to to english params.set_language("en"); // we also explicitly disable anything that prints to stdout - params.set_print_special_tokens(false); + params.set_print_special(false); params.set_print_progress(false); params.set_print_realtime(false); params.set_print_timestamps(false); From 748709ec627c4c4f8200594d6143e48d0b681932 Mon Sep 17 00:00:00 2001 From: Lucas Zanek Date: Tue, 3 Jan 2023 22:34:18 -0300 Subject: [PATCH 4/6] audio transcription example --- examples/audio_transcription.rs | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 examples/audio_transcription.rs diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs new file mode 100644 index 0000000..d11fc0a --- /dev/null +++ b/examples/audio_transcription.rs @@ -0,0 +1,70 @@ +use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; + +/// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout. +fn main() { + // Load a context and model. + let mut ctx = WhisperContext::new( + "/Users/lucas/Documents/code/meetsary/whisper-test/whisper.cpp/models/ggml-base.en.bin", + ) + .expect("failed to load model"); + + // Create a params object for running the model. + // Currently, only the Greedy sampling strategy is implemented, with BeamSearch as a WIP. + // The number of past samples to consider defaults to 0. + let mut params = FullParams::new(SamplingStrategy::Greedy { n_past: 0 }); + + // Edit params as needed. + // Set the number of threads to use to 1. + params.set_n_threads(1); + // Enable translation. + params.set_translate(true); + // Set the language to translate to to English. + params.set_language("en"); + // Disable anything that prints to stdout. + params.set_print_special(false); + params.set_print_progress(false); + params.set_print_realtime(false); + params.set_print_timestamps(false); + + // Open the audio file. + let mut reader = hound::WavReader::open("weeknd-2.wav").expect("failed to open file"); + let hound::WavSpec { + channels, + sample_rate, + bits_per_sample, + .. + } = reader.spec(); + + // Convert the audio to floating point samples. + let mut audio = whisper_rs::convert_integer_to_float_audio( + &reader + .samples::() + .map(|s| s.expect("invalid sample")) + .collect::>(), + ); + + // Convert audio to 16KHz mono f32 samples, as required by the model. + // These utilities are provided for convenience, but can be replaced with custom conversion logic. + // SIMD variants of these functions are also available on nightly Rust (see the docs). + if channels == 2 { + audio = whisper_rs::convert_stereo_to_mono_audio(&audio); + } else if channels != 1 { + panic!(">2 channels unsupported"); + } + + if sample_rate != 16000 { + panic!("sample rate must be 16KHz"); + } + + // Run the model. + ctx.full(params, &audio[..]).expect("failed to run model"); + + // Fetch and print the results. + let num_segments = ctx.full_n_segments(); + for i in 0..num_segments { + let segment = ctx.full_get_segment_text(i).expect("failed to get segment"); + let start_timestamp = ctx.full_get_segment_t0(i); + let end_timestamp = ctx.full_get_segment_t1(i); + println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); + } +} From a09ed82675e6ae74c76d1d578b8d76c496906c7e Mon Sep 17 00:00:00 2001 From: Lucas Zanek Date: Tue, 3 Jan 2023 22:48:35 -0300 Subject: [PATCH 5/6] added logic to write the result in a txt file --- examples/audio_transcription.rs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs index d11fc0a..f1012c4 100644 --- a/examples/audio_transcription.rs +++ b/examples/audio_transcription.rs @@ -1,12 +1,12 @@ +use std::fs::File; +use std::io::Write; use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout. fn main() { // Load a context and model. - let mut ctx = WhisperContext::new( - "/Users/lucas/Documents/code/meetsary/whisper-test/whisper.cpp/models/ggml-base.en.bin", - ) - .expect("failed to load model"); + let mut ctx = WhisperContext::new("example/path/to/model/whisper.cpp/models/ggml-base.en.bin") + .expect("failed to load model"); // Create a params object for running the model. // Currently, only the Greedy sampling strategy is implemented, with BeamSearch as a WIP. @@ -27,7 +27,7 @@ fn main() { params.set_print_timestamps(false); // Open the audio file. - let mut reader = hound::WavReader::open("weeknd-2.wav").expect("failed to open file"); + let mut reader = hound::WavReader::open("audio.wav").expect("failed to open file"); let hound::WavSpec { channels, sample_rate, @@ -59,12 +59,25 @@ fn main() { // Run the model. ctx.full(params, &audio[..]).expect("failed to run model"); - // Fetch and print the results. + // Create a file to write the transcript to. + let mut file = File::create("transcript.txt").expect("failed to create file"); + + // Iterate through the segments of the transcript. let num_segments = ctx.full_n_segments(); for i in 0..num_segments { + // Get the transcribed text and timestamps for the current segment. let segment = ctx.full_get_segment_text(i).expect("failed to get segment"); let start_timestamp = ctx.full_get_segment_t0(i); let end_timestamp = ctx.full_get_segment_t1(i); + + // Print the segment to stdout. println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); + + // Format the segment information as a string. + let line = format!("[{} - {}]: {}\n", start_timestamp, end_timestamp, segment); + + // Write the segment information to the file. + file.write_all(line.as_bytes()) + .expect("failed to write to file"); } } From 1562644a8de44cc914bd8f8ddfabbbb67e21a8da Mon Sep 17 00:00:00 2001 From: Lucas Zanek Date: Tue, 3 Jan 2023 23:36:02 -0300 Subject: [PATCH 6/6] add information about how to run the example --- examples/audio_transcription.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs index f1012c4..bf6b3d8 100644 --- a/examples/audio_transcription.rs +++ b/examples/audio_transcription.rs @@ -1,3 +1,6 @@ +// This example is not going to build in this folder. +// You need to copy this code into your project and add the whisper_rs dependency in your cargo.toml + use std::fs::File; use std::io::Write; use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};