diff --git a/main.py b/main.py index 8600bcd..a995f6f 100644 --- a/main.py +++ b/main.py @@ -38,6 +38,6 @@ for filename in os.listdir(audio_dir): filename = open(filepath, "w") ### transcribe ### - transcriptor = Transcriptor(audiofile, filepath, language, modelSize, access_token, voicefolder, quantization) + transcriptor = Transcriptor(audiofile, output_dir, language, modelSize, access_token, voicefolder, quantization) res = transcriptor.faster_whisper() \ No newline at end of file diff --git a/main2.py b/main2.py index 4e33d07..4a6c8f4 100644 --- a/main2.py +++ b/main2.py @@ -51,7 +51,7 @@ if st.button("Transcribe"): #filename = open(filepath, "w") ### transcribe ### - transcriptor = Transcriptor(path, filepath, language, modelSize, access_token, voicefolder, quantization) + transcriptor = Transcriptor(path, outputfolder, language, modelSize, access_token, voicefolder, quantization) res = transcriptor.faster_whisper() print(f"Content has been written to {filepath}") st.success(f"Transcribe successful!") \ No newline at end of file diff --git a/main3.py b/main3.py index 1e084f3..5fbe8e0 100644 --- a/main3.py +++ b/main3.py @@ -47,7 +47,7 @@ if st.button("Transcribe"): #filename = open(filepath, "w") ### transcribe ### - transcriptor = Transcriptor(path, filepath, language, modelSize, access_token, voicefolder, quantization) + transcriptor = Transcriptor(path, outputfolder2, filepath=filepath, language, modelSize, access_token, voicefolder, quantization) res = transcriptor.faster_whisper() print(f"Content has been written to {filepath}") st.success(f"Transcribe successful!") \ No newline at end of file diff --git a/output/Mie ayam rindu malam.MP3.txt b/output/Mie ayam rindu malam.MP3.txt new file mode 100644 index 0000000..5553ef9 --- /dev/null +++ b/output/Mie ayam rindu malam.MP3.txt @@ -0,0 +1,18 @@ +DWI (0.3 : 0.5) : Terima kasih. +DWI (5.7 : 8.0) : Assalamualaikum +DWI (9.1 : 16.5) : siang pak oke siang maaf pak lagi mengganggu sebentar saya dari papan grup +DWI (17.6 : 37.7) : produknya mie, udang keju, sumai udang. Maaf pak kalau boleh tahu kebutuhannya setiap harinya berapa kilo kira-kira pak. +unknown (35.0 : 35.5) : Terima kasih. +DWI (39.5 : 42.0) : Ya, ga, looking back. +DWI (60.3 : 95.0) : Ini saya bawa, ini saya bawa sampel sih pak, mie-nya Mapan Group ini harganya relatif murah sih pak, Rp18,5 itu yang JS, ekonomi JS pak. Pack-packannya ada isi 10, isi suntur 100 gramnya. Kalau bapak biasanya pakai yang 80 gram, muka yang 100 gram pak? +DWI (95.9 : 104.8) : Dikasih tester aja dulu ya pak ya Saya tinggalin nomor telepon saya pak ya +DWI (105.8 : 109.0) : Oke Pak, 0821. +DWI (109.7 : 112.9) : 409 +DWI (113.9 : 115.3) : 208 +DWI (116.0 : 117.1) : 3, 4, Pak +DWI (117.9 : 118.6) : Uhhh. +DWI (119.8 : 127.5) : Iya, jangan makan siang ya Pak. +DWI (130.2 : 131.1) : Siang bu! +DWI (133.4 : 135.3) : Maaf ya bu, ya nggak ganggu bu. +DWI (138.0 : 139.8) : rame bu ya kayak gini nih ya +DWI (141.9 : 146.8) : Makasih ya bu Assalamualaikum diff --git a/speechlib/__pycache__/convert_to_mono.cpython-312.pyc b/speechlib/__pycache__/convert_to_mono.cpython-312.pyc index d8e2e4d..f2c0606 100644 Binary files a/speechlib/__pycache__/convert_to_mono.cpython-312.pyc and b/speechlib/__pycache__/convert_to_mono.cpython-312.pyc differ diff --git a/speechlib/core_analysis.py b/speechlib/core_analysis.py index fc578fa..9221410 100644 --- a/speechlib/core_analysis.py +++ b/speechlib/core_analysis.py @@ -15,7 +15,7 @@ import subprocess # by default use google speech-to-text API # if False, then use whisper finetuned version for sinhala -def core_analysis(file_name, voices_folder, log_folder, language, modelSize, ACCESS_TOKEN, model_type, quantization=False, custom_model_path=None, hf_model_id=None, aai_api_key=None): +def core_analysis(file_name, voices_folder, log_folder, logfile, language, modelSize, ACCESS_TOKEN, model_type, quantization=False, custom_model_path=None, hf_model_id=None, aai_api_key=None): # <-------------------PreProcessing file--------------------------> # convert compressed wav @@ -138,7 +138,7 @@ def core_analysis(file_name, voices_folder, log_folder, language, modelSize, ACC common_segments.append([start, end, segment[2], speaker]) # writing log file - write_log_file(common_segments, log_folder, file_name, language) + write_log_file(common_segments, log_folder, logfile, file_name, language) ## TODO cleaning segments and temp folder diff --git a/speechlib/speechlib.py b/speechlib/speechlib.py index 65feaff..2dcb086 100644 --- a/speechlib/speechlib.py +++ b/speechlib/speechlib.py @@ -5,7 +5,7 @@ from .convert_to_wav import (convert_to_wav) class Transcriptor: - def __init__(self, file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder=None, quantization=False): + def __init__(self, file, log_folder, logfile, language, modelSize, ACCESS_TOKEN, voices_folder=None, quantization=False): ''' transcribe a wav file @@ -234,28 +234,29 @@ class Transcriptor: self.voices_folder = voices_folder self.language = language self.log_folder = log_folder + self.logfile = logfile self.modelSize = modelSize self.quantization = quantization self.ACCESS_TOKEN = ACCESS_TOKEN def whisper(self): - res = core_analysis(self.file, self.voices_folder, self.log_folder, self.language, self.modelSize, self.ACCESS_TOKEN, "whisper", self.quantization) + res = core_analysis(self.file, self.voices_folder, self.log_folder, self.logfile, self.language, self.modelSize, self.ACCESS_TOKEN, "whisper", self.quantization) return res def faster_whisper(self): - res = core_analysis(self.file, self.voices_folder, self.log_folder, self.language, self.modelSize, self.ACCESS_TOKEN, "faster-whisper", self.quantization) + res = core_analysis(self.file, self.voices_folder, self.log_folder, self.logfile, self.language, self.modelSize, self.ACCESS_TOKEN, "faster-whisper", self.quantization) return res def custom_whisper(self, custom_model_path): - res = core_analysis(self.file, self.voices_folder, self.log_folder, self.language, self.modelSize, self.ACCESS_TOKEN, "custom", self.quantization, custom_model_path) + res = core_analysis(self.file, self.voices_folder, self.log_folder, self.logfile, self.language, self.modelSize, self.ACCESS_TOKEN, "custom", self.quantization, custom_model_path) return res def huggingface_model(self, hf_model_id): - res = core_analysis(self.file, self.voices_folder, self.log_folder, self.language, self.modelSize, self.ACCESS_TOKEN, "huggingface", self.quantization, None, hf_model_id) + res = core_analysis(self.file, self.voices_folder, self.log_folder, self.logfile, self.language, self.modelSize, self.ACCESS_TOKEN, "huggingface", self.quantization, None, hf_model_id) return res def assemby_ai_model(self, aai_api_key): - res = core_analysis(self.file, self.voices_folder, self.log_folder, self.language, self.modelSize, self.ACCESS_TOKEN, "assemblyAI", self.quantization, None, None, aai_api_key) + res = core_analysis(self.file, self.voices_folder, self.log_folder, self.logfile, self.language, self.modelSize, self.ACCESS_TOKEN, "assemblyAI", self.quantization, None, None, aai_api_key) return res class PreProcessor: diff --git a/speechlib/write_log_file.py b/speechlib/write_log_file.py index 22a0e9e..40d9201 100644 --- a/speechlib/write_log_file.py +++ b/speechlib/write_log_file.py @@ -1,7 +1,7 @@ import os from datetime import datetime -def write_log_file(common_segments, log_folder, file_name, language): +def write_log_file(common_segments, log_folder, logfile, file_name, language): if not os.path.exists(log_folder): os.makedirs(log_folder) @@ -12,8 +12,8 @@ def write_log_file(common_segments, log_folder, file_name, language): file_name = os.path.splitext(os.path.basename(file_name))[0] - #log_file = log_folder + "/" + file_name + "_" + current_time + "_" + language + ".txt" - log_file = log_folder + log_file = logfile + # log_file = log_folder lf=open(log_file,"wb")