I am using the speech to text but only able to transcribe about 50% of the audio. (ms word is able to transcribe the entire audio file)
audio file is wav, only 2 min and 15 sec, size: 2112 kb
this is my first time using this. Is there any setting that could affect this? below is my script
import os
import requests
# Define the necessary information
subscription_key = "********"
region = "westus"
# Set up the headers
headers = {
"Ocp-Apim-Subscription-Key": subscription_key
}
folder_path = "**********"
headers = {
"Ocp-Apim-Subscription-Key": subscription_key,
"Content-Type": "audio/wav",
## "Authorization": f"Bearer {token}"###token not needed
}
for file_name in os.listdir(folder_path):
if file_name.endswith(".wav"):
audio_file_path = os.path.join(folder_path, file_name)
file_name_without_extension = os.path.splitext(file_name)[0]
uri = "https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US&format=detailed"
with open(audio_file_path, "rb") as audio_file:
response = requests.post(uri, headers=headers, data=audio_file)
response_json = response.json()
transcription = response_json.get("DisplayText", "")
output_file_path = os.path.join(folder_path, f"{file_name_without_extension}.txt")
with open(output_file_path, "w") as output_file:
output_file.write(transcription)
print("Transcription completed for all audio files in the folder.")
input("Press Enter to exit...")