fix some bug

2025-05-30 14:11:01 +07:00 · 2025-05-30 14:11:01 +07:00 · 7eb80ddb73
commit 7eb80ddb73
parent b6eccf4194
14 changed files with 42 additions and 7 deletions
--- a/config.ini
+++ b/config.ini
@ -8,4 +8,4 @@ voicefolder = D:\Pythoncode\speechlib\voices
 beamsize = 5
 batchsize = 4
 accesstoken = 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
-quantization = False
+quantization = True
--- a/main.py
+++ b/main.py
@ -38,6 +38,6 @@ for filename in os.listdir(audio_dir):
        filename = open(filepath, "w")

        ### transcribe ###
-        transcriptor = Transcriptor(audiofile, output_dir, language, modelSize, access_token, voicefolder, quantization)
+        transcriptor = Transcriptor(audiofile, filepath, language, modelSize, access_token, voicefolder, quantization)
        res = transcriptor.faster_whisper()
        
--- a/main2.py
+++ b/main2.py
@ -48,7 +48,7 @@ if st.button("Transcribe"):
            #filepath = os.path.join(outputfolder, os.path.basename(audiofilewithoutextension).split('/')[-1]+".txt")
            filepath = os.path.join(outputfolder, f"{outlet}-{crew_id}-{crew_name}-{date}-Transkrip.txt")
            print(f"Output file: {filepath}")
-            filename = open(filepath, "w")
+            #filename = open(filepath, "w")

            ### transcribe ###
            transcriptor = Transcriptor(path, filepath, language, modelSize, access_token, voicefolder, quantization)
--- a/main3.py
+++ b/main3.py
@ -44,7 +44,7 @@ if st.button("Transcribe"):
            #filepath = os.path.join(outputfolder, os.path.basename(audiofilewithoutextension).split('/')[-1]+".txt")
            filepath = os.path.join(outputfolder2, f"{crew_id}-{crew_name}-{customer_name}-{date}-Transkrip.txt")
            print(f"Output file: {filepath}")
-            filename = open(filepath, "w")
+            #filename = open(filepath, "w")

            ### transcribe ###
            transcriptor = Transcriptor(path, filepath, language, modelSize, access_token, voicefolder, quantization)
--- a/output/Candra
+++ b/output/Candra
--- a/mas_113350_id.txt
+++ b/mas_113350_id.txt
@ -0,0 +1,11 @@
+WATI (2.1 : 3.3) :  See ya. 
+WATI (6.3 : 9.9) :  dari mapan cik mau kunjungan 
+WATI (10.6 : 14.3) :  Oh iya silahkan masuk mbak. Tak cek dulu ya cik ya. 
+WATI (15.3 : 17.7) :  Kita hitung dulu stoknya. 
+WATI (18.6 : 20.2) :  Terima kasih. 
+WATI (21.6 : 38.1) :  yang layan nih pesan-pesan tok ya cik ya onan-onan hujan terus ya hujan angin ini tinggal berapa itunya apa kemarin somek udang ya somek udang sama 
+DWI (27.9 : 29.5) :  Terima kasih. 
+WATI (38.7 : 40.0) :  itunya sampean 
+WATI (44.2 : 45.7) :  Minya tinggal satu lho, Ci. 
+WATI (46.7 : 47.7) :  Kita hitung ya. 
+WATI (51.1 : 52.8) :  Mbak helmnya diambil aja. 
--- a/output/REC20250526103049.WAV.txt
+++ b/output/REC20250526103049.WAV.txt
--- a/speechlib/pycache/convert_to_mono.cpython-312.pyc
+++ b/speechlib/pycache/convert_to_mono.cpython-312.pyc
--- a/speechlib/pycache/core_analysis.cpython-312.pyc
+++ b/speechlib/pycache/core_analysis.cpython-312.pyc
--- a/speechlib/pycache/wav_segmenter.cpython-312.pyc
+++ b/speechlib/pycache/wav_segmenter.cpython-312.pyc
--- a/speechlib/pycache/write_log_file.cpython-312.pyc
+++ b/speechlib/pycache/write_log_file.cpython-312.pyc
--- a/speechlib/convert_to_mono.py
+++ b/speechlib/convert_to_mono.py
@ -1,8 +1,31 @@
 import wave
 import numpy as np

+import soundfile as sf
+
+def resave_audio(input_file, output_file):
+    """Loads an audio file and resaves it.
+
+    Args:
+        input_file (str): Path to the input audio file.
+        output_file (str): Path to save the resaved audio file.
+    """
+    try:
+        # Read the audio file
+        data, samplerate = sf.read(input_file)
+
+        # Write the audio data to a new file
+        sf.write(output_file, data, samplerate)
+        print(f"Successfully resaved audio from '{input_file}' to '{output_file}'")
+
+    except Exception as e:
+        print(f"Error processing audio: {e}")
+
 def convert_to_mono(input_wav):
-    # Open the input WAV file
+    # Resave WAV file
+    resave_audio(input_wav, input_wav.split('.')[0] + "_pcm.wav")
+    input_wav = input_wav.split('.')[0] + "_pcm.wav"
+
    with wave.open(input_wav, 'rb') as input_file:
        # Get the parameters of the input file
        params = input_file.getparams()
--- a/speechlib/wav_segmenter.py
+++ b/speechlib/wav_segmenter.py
@ -23,7 +23,7 @@ def wav_file_segmentation(file_name, segments, language, modelSize, model_type,
        end = segment[1] * 1000     # end time in miliseconds
        clip = audio[start:end]
        i = i + 1
-        file = folder_name + "/" + f"segment-{file_name.split("\\")[-1].split(".")[0]}"+ str(i) + ".wav"
+        file = folder_name + "/" + "segment-{}".format(file_name.split('\\')[-1].split(".")[0])+ str(i) + ".wav"
        clip.export(file, format="wav")

        try:
--- a/speechlib/write_log_file.py
+++ b/speechlib/write_log_file.py
@ -12,7 +12,8 @@ def write_log_file(common_segments, log_folder, file_name, language):

    file_name = os.path.splitext(os.path.basename(file_name))[0]

-    log_file = log_folder + "/" + file_name + "_" + current_time + "_" + language + ".txt"
+    #log_file = log_folder + "/" + file_name + "_" + current_time + "_" + language + ".txt"
+    log_file = log_folder
    
    lf=open(log_file,"wb")