From 748709ec627c4c4f8200594d6143e48d0b681932 Mon Sep 17 00:00:00 2001
From: Lucas Zanek <lucas@Lucass-MBP.fibertel.com.ar>
Date: Tue, 3 Jan 2023 22:34:18 -0300
Subject: [PATCH 1/3] audio transcription example

---
 examples/audio_transcription.rs | 70 +++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 examples/audio_transcription.rs
diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
new file mode 100644
index 0000000..d11fc0a
--- /dev/null
+++ b/examples/audio_transcription.rs
@@ -0,0 +1,70 @@
+use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
+
+/// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
+fn main() {
+    // Load a context and model.
+    let mut ctx = WhisperContext::new(
+        "/Users/lucas/Documents/code/meetsary/whisper-test/whisper.cpp/models/ggml-base.en.bin",
+    )
+    .expect("failed to load model");
+
+    // Create a params object for running the model.
+    // Currently, only the Greedy sampling strategy is implemented, with BeamSearch as a WIP.
+    // The number of past samples to consider defaults to 0.
+    let mut params = FullParams::new(SamplingStrategy::Greedy { n_past: 0 });
+
+    // Edit params as needed.
+    // Set the number of threads to use to 1.
+    params.set_n_threads(1);
+    // Enable translation.
+    params.set_translate(true);
+    // Set the language to translate to to English.
+    params.set_language("en");
+    // Disable anything that prints to stdout.
+    params.set_print_special(false);
+    params.set_print_progress(false);
+    params.set_print_realtime(false);
+    params.set_print_timestamps(false);
+
+    // Open the audio file.
+    let mut reader = hound::WavReader::open("weeknd-2.wav").expect("failed to open file");
+    let hound::WavSpec {
+        channels,
+        sample_rate,
+        bits_per_sample,
+        ..
+    } = reader.spec();
+
+    // Convert the audio to floating point samples.
+    let mut audio = whisper_rs::convert_integer_to_float_audio(
+        &reader
+            .samples::<i16>()
+            .map(|s| s.expect("invalid sample"))
+            .collect::<Vec<_>>(),
+    );
+
+    // Convert audio to 16KHz mono f32 samples, as required by the model.
+    // These utilities are provided for convenience, but can be replaced with custom conversion logic.
+    // SIMD variants of these functions are also available on nightly Rust (see the docs).
+    if channels == 2 {
+        audio = whisper_rs::convert_stereo_to_mono_audio(&audio);
+    } else if channels != 1 {
+        panic!(">2 channels unsupported");
+    }
+
+    if sample_rate != 16000 {
+        panic!("sample rate must be 16KHz");
+    }
+
+    // Run the model.
+    ctx.full(params, &audio[..]).expect("failed to run model");
+
+    // Fetch and print the results.
+    let num_segments = ctx.full_n_segments();
+    for i in 0..num_segments {
+        let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
+        let start_timestamp = ctx.full_get_segment_t0(i);
+        let end_timestamp = ctx.full_get_segment_t1(i);
+        println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
+    }
+}

From a09ed82675e6ae74c76d1d578b8d76c496906c7e Mon Sep 17 00:00:00 2001
From: Lucas Zanek <lucaszanek94@gmail.com>
Date: Tue, 3 Jan 2023 22:48:35 -0300
Subject: [PATCH 2/3] added logic to write the result in a txt file

---
 examples/audio_transcription.rs | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
index d11fc0a..f1012c4 100644
--- a/examples/audio_transcription.rs
+++ b/examples/audio_transcription.rs
@@ -1,12 +1,12 @@
+use std::fs::File;
+use std::io::Write;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
 
 /// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
 fn main() {
     // Load a context and model.
-    let mut ctx = WhisperContext::new(
-        "/Users/lucas/Documents/code/meetsary/whisper-test/whisper.cpp/models/ggml-base.en.bin",
-    )
-    .expect("failed to load model");
+    let mut ctx = WhisperContext::new("example/path/to/model/whisper.cpp/models/ggml-base.en.bin")
+        .expect("failed to load model");
 
     // Create a params object for running the model.
     // Currently, only the Greedy sampling strategy is implemented, with BeamSearch as a WIP.
@@ -27,7 +27,7 @@ fn main() {
     params.set_print_timestamps(false);
 
     // Open the audio file.
-    let mut reader = hound::WavReader::open("weeknd-2.wav").expect("failed to open file");
+    let mut reader = hound::WavReader::open("audio.wav").expect("failed to open file");
     let hound::WavSpec {
         channels,
         sample_rate,
@@ -59,12 +59,25 @@ fn main() {
     // Run the model.
     ctx.full(params, &audio[..]).expect("failed to run model");
 
-    // Fetch and print the results.
+    // Create a file to write the transcript to.
+    let mut file = File::create("transcript.txt").expect("failed to create file");
+
+    // Iterate through the segments of the transcript.
     let num_segments = ctx.full_n_segments();
     for i in 0..num_segments {
+        // Get the transcribed text and timestamps for the current segment.
         let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
         let start_timestamp = ctx.full_get_segment_t0(i);
         let end_timestamp = ctx.full_get_segment_t1(i);
+
+        // Print the segment to stdout.
         println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
+
+        // Format the segment information as a string.
+        let line = format!("[{} - {}]: {}\n", start_timestamp, end_timestamp, segment);
+
+        // Write the segment information to the file.
+        file.write_all(line.as_bytes())
+            .expect("failed to write to file");
     }
 }

From 1562644a8de44cc914bd8f8ddfabbbb67e21a8da Mon Sep 17 00:00:00 2001
From: Lucas Zanek <lucaszanek94@gmail.com>
Date: Tue, 3 Jan 2023 23:36:02 -0300
Subject: [PATCH 3/3] add information about how to run the example

---
 examples/audio_transcription.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/audio_transcription.rs b/examples/audio_transcription.rs
index f1012c4..bf6b3d8 100644
--- a/examples/audio_transcription.rs
+++ b/examples/audio_transcription.rs
@@ -1,3 +1,6 @@
+// This example is not going to build in this folder.
+// You need to copy this code into your project and add the whisper_rs dependency in your cargo.toml
+
 use std::fs::File;
 use std::io::Write;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};