How to get sentence word timestamp results for real-time speech recognition ?

莓 草 0 Reputation points
2024-07-05T06:25:10.16+00:00

I am using Golang's SDK

this is my golang code

func (m *microsoft) Do(ctx context.Context, path string) (string, error) {
	defer os.Remove(path)
	accessKeyConfig := AccessKeyList[rand.Intn(len(AccessKeyList))]
	subscription := accessKeyConfig.Key
	region := accessKeyConfig.Region
	file := path
	audioConfig, err := audio.NewAudioConfigFromWavFileInput(file)
	if err != nil {
		fwlog.New(ctx).Info("", "audioConfigErr")
		return "", err
	}
	defer audioConfig.Close()
	config, err := speech.NewSpeechConfigFromSubscription(subscription, region)
	if err != nil {
		fwlog.New(ctx).Info("", "configErr")
		return "", err
	}
	config.RequestWordLevelTimestamps()
	defer config.Close()
	speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig)
	if err != nil {
		fwlog.New(ctx).Info("", "speechRecognizerErr")
		return "", err
	}
	defer speechRecognizer.Close()
	speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) {
		defer event.Close()
		fmt.Println("Session Started (ID=", event.SessionID, ")")
	})
	speechRecognizer.Recognizing(recognizingHandler)
	speechRecognizer.Recognized(recognizedHandler)
	//speechRecognizer.Recognizing(recognizedHandler)
	speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) {
		defer event.Close()
		fmt.Println("Session Stopped (ID=", event.SessionID, ")")
	})

	task := speechRecognizer.RecognizeOnceAsync()
	var outcome speech.SpeechRecognitionOutcome
	select {
	case outcome = <-task:
	case <-time.After(120 * time.Second):
		fmt.Println("Timed out")
		return "", errors.New("Timed out")
	}
	defer outcome.Close()
	defer os.Remove(path)
	if outcome.Error != nil {
		fwlog.New(ctx).Info("", "outcomeErr")
		return "", outcome.Error
	}
	return outcome.Result.Text, nil
}

Azure AI Speech
Azure AI Speech
An Azure service that integrates speech processing into apps and services.
1,519 questions
{count} votes

1 answer

Sort by: Most helpful
  1. navba-MSFT 19,735 Reputation points Microsoft Employee
    2024-07-05T10:47:04.7+00:00

    @莓 草 Welcome to Microsoft Q&A Forum, Thank you for posting your query here!

    .

    While I did some research on this, I see that in JAVA SDK this is how it is done.

    .

    .

    Just following the same approach in the Go SDK too and Could you please check if the below helps ?

    jsonText := outcome.Result.Properties.GetProperty(common.SpeechServiceResponseJSONResult, "")
    

    .

    If you have any follow-up questions, please let me know. I would be happy to help.