From 83988ea6ae54fa6f223609669c47aa2e1f045828 Mon Sep 17 00:00:00 2001 From: Jeroen Matser Date: Wed, 9 Oct 2024 09:18:25 +0200 Subject: [PATCH 1/2] Initial_prompt easily accessible Made initial_prompt more easily accessible to be able to give context --- README.md | 3 +++ whisper_live/client.py | 12 ++++++++---- whisper_live/server.py | 4 ++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2f98b9b..0e20dc9 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,9 @@ client = TranscriptionClient( use_vad=False, save_output_recording=True, # Only used for microphone input, False by Default output_recording_filename="./output_recording.wav" # Only used for microphone input + options={ + 'initial_prompt': None, #To add context replace None with any context for the model like this: 'Jane Doe context' + } ) ``` It connects to the server running on localhost at port 9090. Using a multilingual model, language for the transcription will be automatically detected. You can also use the language option to specify the target language for the transcription, in this case, English ("en"). The translate option should be set to `True` if we want to translate from the source language to English and `False` if we want to transcribe in the source language. diff --git a/whisper_live/client.py b/whisper_live/client.py index c252607..983f337 100644 --- a/whisper_live/client.py +++ b/whisper_live/client.py @@ -30,7 +30,8 @@ def __init__( model="small", srt_file_path="output.srt", use_vad=True, - log_transcription=True + log_transcription=True, + options=None ): """ Initializes a Client instance for audio recording and streaming to a server. @@ -59,6 +60,7 @@ def __init__( self.last_segment = None self.last_received_segment = None self.log_transcription = log_transcription + self.options = options if translate: self.task = "translate" @@ -199,7 +201,8 @@ def on_open(self, ws): "language": self.language, "task": self.task, "model": self.model, - "use_vad": self.use_vad + "use_vad": self.use_vad, + "options": self.options } ) ) @@ -681,8 +684,9 @@ def __init__( output_recording_filename="./output_recording.wav", output_transcription_path="./output.srt", log_transcription=True, + options=None, ): - self.client = Client(host, port, lang, translate, model, srt_file_path=output_transcription_path, use_vad=use_vad, log_transcription=log_transcription) + self.client = Client(host, port, lang, translate, model, srt_file_path=output_transcription_path, use_vad=use_vad, log_transcription=log_transcription, options=options) if save_output_recording and not output_recording_filename.endswith(".wav"): raise ValueError(f"Please provide a valid `output_recording_filename`: {output_recording_filename}") if not output_transcription_path.endswith(".srt"): @@ -692,4 +696,4 @@ def __init__( [self.client], save_output_recording=save_output_recording, output_recording_filename=output_recording_filename - ) + ) \ No newline at end of file diff --git a/whisper_live/server.py b/whisper_live/server.py index e3346d2..109fd77 100644 --- a/whisper_live/server.py +++ b/whisper_live/server.py @@ -191,8 +191,8 @@ def initialize_client( task=options["task"], client_uid=options["uid"], model=options["model"], - initial_prompt=options.get("initial_prompt"), - vad_parameters=options.get("vad_parameters"), + initial_prompt=options["options"].get("initial_prompt"), + vad_parameters=options["options"].get("vad_parameters"), use_vad=self.use_vad, single_model=self.single_model, ) From 60c4dbf633c879a8fbc87c28fa6412a958f5a952 Mon Sep 17 00:00:00 2001 From: Spudra Date: Thu, 7 Nov 2024 10:31:36 +0100 Subject: [PATCH 2/2] Update client.py --- whisper_live/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/whisper_live/client.py b/whisper_live/client.py index d7a2c30..5b340c6 100644 --- a/whisper_live/client.py +++ b/whisper_live/client.py @@ -31,7 +31,7 @@ def __init__( srt_file_path="output.srt", use_vad=True, log_transcription=True, - options=None + options=None, max_clients=4, max_connection_time=600, ): @@ -207,7 +207,7 @@ def on_open(self, ws): "task": self.task, "model": self.model, "use_vad": self.use_vad, - "options": self.options + "options": self.options, "max_clients": self.max_clients, "max_connection_time": self.max_connection_time, } @@ -710,4 +710,4 @@ def __init__( [self.client], save_output_recording=save_output_recording, output_recording_filename=output_recording_filename - ) \ No newline at end of file + )