Why does Viseme Callback no longer deliver blendshapes?

Ebertowski, Rene 5 Reputation points
2023-12-02T11:58:31.9033333+00:00

Hi there,

A few months ago, I developed a project based on Speech Synthesis and Viseme Blendshapes. At that time, I had no problems receiving the blendshapes. Recently I realized that my code was no longer working. I receive Viseme with the corresponding IDs and the corresponding audio offset, but no Blendshapes are received anymore. I then looked at the current documentation and executed the example code to find that it didn't work either. Here is the code to reproduce:

import osfrom dotenv 
import load_dotenv
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason, CancellationReason


def viseme_cb(evt):    
	print(f'evt: {vars(evt)}')
    # `Animation` is an xml string for SVG or a json string for blend shapes    
	animation = evt.animation
    print(f'animation: {animation}')


if __name__ == '__main__':
    # load .env    
	load_dotenv(override=True)

    # check if azure key variable exists in os environment
    if 'AZURE_API_KEY' not in os.environ:
        raise RuntimeError("There is no 'AZURE_API_KEY' in the environment variables which is needed for operation!")

    # create Azure SpeechSynthesizer instance
    azure_speech_config = SpeechConfig(subscription=os.getenv('AZURE_API_KEY'), region='germanywestcentral')
    speech_synthesizer = SpeechSynthesizer(speech_config=azure_speech_config, audio_config=None)

    # Subscribes to viseme received event
    speech_synthesizer.viseme_received.connect(viseme_cb)

    # execute speech synthesis
    # example from https://video2.skills-academy.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-structure#viseme-element
    ssml = (
        '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" '
        'xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US">'
        '   <voice name="en-US-JennyNeural">'
        '       <mstts:viseme type="FacialExpression"/>'
        '       Rainbow has seven colors: Red, orange, yellow, green, blue, indigo, and violet.'
        '   </voice>'
        '</speak>')
    result = speech_synthesizer.speak_ssml_async(ssml).get()
    print(f'result: {result}')

    # canceled and error printing
    if result.reason == ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == CancellationReason.Error and cancellation_details.error_details:
            print("Error details: {}".format(cancellation_details.error_details))
            print("Did you set the speech resource key and region values?")

Maybe I'm missing something here.
When i run it the output is as follows:

evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 500000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 1000000, '_viseme_id': 13, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 2375000, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 2812500, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 3250000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 3750000, '_viseme_id': 21, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 4250000, '_viseme_id': 8, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 4625000, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 5125000, '_viseme_id': 12, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 5875000, '_viseme_id': 1, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 6375000, '_viseme_id': 15, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 7000000, '_viseme_id': 15, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 7750000, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 8500000, '_viseme_id': 18, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 8875000, '_viseme_id': 1, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 9375000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 10125000, '_viseme_id': 20, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 11000000, '_viseme_id': 1, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 12125000, '_viseme_id': 14, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 12625000, '_viseme_id': 1, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 13500000, '_viseme_id': 13, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 14500000, '_viseme_id': 15, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 15620000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 17625000, '_viseme_id': 13, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 18375000, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 20125000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 20750000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 22625000, '_viseme_id': 3, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 23750000, '_viseme_id': 13, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 24500000, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 25250000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 25750000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 26312500, '_viseme_id': 16, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 26870000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 28375000, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 29000000, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 29875000, '_viseme_id': 14, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 30750000, '_viseme_id': 8, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 31812500, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 32870000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 34500000, '_viseme_id': 20, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 35125000, '_viseme_id': 13, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 36000000, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 37750000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 38250000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 39750000, '_viseme_id': 21, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 40250000, '_viseme_id': 14, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 41250000, '_viseme_id': 7, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 44250000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 46000000, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 47000000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 47375000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 47875000, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 48375000, '_viseme_id': 20, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 49250000, '_viseme_id': 8, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 50500000, '_viseme_id': 4, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 51750000, '_viseme_id': 0, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 53750000, '_viseme_id': 1, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 54625000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 55375000, '_viseme_id': 18, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 56125000, '_viseme_id': 11, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 57375000, '_viseme_id': 1, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 58250000, '_viseme_id': 14, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9750>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 58750000, '_viseme_id': 6, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E97B0>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 59875000, '_viseme_id': 19, '_animation': ''}
animation: 
evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x00000240D52E9780>, '_result_id': '42dfc5caa98e4316b6e32e07b2f13ec3', '_audio_offset': 61620000, '_viseme_id': 0, '_animation': ''}
animation: 
result: SpeechSynthesisResult(result_id=42dfc5caa98e4316b6e32e07b2f13ec3, reason=ResultReason.SynthesizingAudioCompleted, audio_length=221646)

Process finished with exit code 0

The version of azure-cognitiveservices-speech i use is 1.33.0

Maybe someone can help me out with this

Azure AI Speech
Azure AI Speech
An Azure service that integrates speech processing into apps and services.
1,506 questions
{count} vote

1 answer

Sort by: Most helpful
  1. navba-MSFT 19,495 Reputation points Microsoft Employee
    2023-12-04T05:13:29.6766667+00:00

    @Ebertowski, Rene Welcome to Microsoft Q&A Forum, Thank you for posting your query here!

    I understand that the animation events are coming blank and blendshapes is not working for viseme events.
    .
    I am unable to reproduce the issue at my end. The above sample worked fine and the blendshapes details are loading correctly even with 1.33.0 version of the sdk.
    Could you please confirm if the older version (1.32.1) of the SDK azure-cognitiveservices-speech worked fine as expected ? I tried this on my resource deployed to WestEurope region ? Could you please check this on different region ?
    .
    Also are you aware that the Azure AI Speech announced public preview of text to speech avatar. More info here. Custom text to speech avatar access is limited based on eligibility and usage criteria. Request access on the intake form. Try sample code, Real-time synthesis (SDK), Live chat with Azure Open AI in behind (SDK)
    .

    See the output from my machine for the same sample code:

    animation:
    
    evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x000001F5B970D510>, '_result_id': 'f3aa05aba199414ab98418e84102c817', '_audio_offset': 58250000, '_viseme_id': 14, '_animation': ''}
    
    animation:
    
    evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x000001F5B970D4D0>, '_result_id': 'f3aa05aba199414ab98418e84102c817', '_audio_offset': 58750000, '_viseme_id': 6, '_animation': ''}
    
    animation:
    
    evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x000001F5B970D510>, '_result_id': 'f3aa05aba199414ab98418e84102c817', '_audio_offset': 59875000, '_viseme_id': 19, '_animation': ''}
    
    animation:
    
    evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x000001F5B970D4D0>, '_result_id': 'f3aa05aba199414ab98418e84102c817', '_audio_offset': 61620000, '_viseme_id': 0, '_animation': ''}
    
    animation:
    
    evt: {'_SpeechSynthesisVisemeEventArgs__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x000001F5B970DE10>, '_result_id': 'f3aa05aba199414ab98418e84102c817', '_audio_offset': 0, '_viseme_id': 0, 
    '_animation': '{"FrameIndex":0,"BlendShapes":
    [[0.171,0.164,0.038,0,0,0.095,0,0.171,0.164,0.074,0,0,0.095,0,0.088,0.028,0,0.311,0.254,0.177,0.179,0.011,0.004,0.007,0.016,0.04,0.017,0.056,0.051,0.123,0.099,0.089,0.084,0.018,0.1,0.023,0.024,0.262,0.258,0.02,0.02,0.014,0.014,0.091,0,0,0.021,0.054,0.058,0.038,0.039,0,0.013,-0.001,0.002],[0.171,0.164,0.039,0,0,0.095,0,0.171,0.164,0.074,0,0,0.095,0,0.08,0.026,0,0.371,0.37,0.156,0.308,0.01,0.004,-0,0.005,0.054,0.016,0.052,0.047,0.113,0.091,0.114,0.102,0.017,0.091,0.021,0.022,0.204,0.2,0.018,0.019,0.015,0.015,0.091,0,0,0.02,0.05,0.054,0.035,0.036,0,0.013,-0.001,0.002],[0.171,0.164,0.039,0,0,0.095,0,0.171,0.164,0.073,0,0,0.095,0,0.088,0.024,0,0.362,0.39,0.113,0.332,0.009,0.004,0.008,0.01,0.045,0.014,0.048,0.044,0.104,0.084,0.131,0.124,0.016,0.084,0.019,0.02,0.176,0.173,0.017,0.017,0.015,0.015,0.091,0,0,0.018,0.046,0.05,0.032,0.033,0,0.013,-0.001,0.002],[0.171,0.164,0.04,0,0,0.095,0,0.171,0.164,0.072,0,0,0.095,0,0.086,0.022,0,0.343,0.38,0.091,0.335,0.008,0.003,0.01,0.011,0.039,0.013,0.045,0.04,0.096,0.078,0.131,0.124,0.015,0.078,0.018,0.019,0.159,0.156,0.015,0.016,0.015,0.015,0.091,0,0,0.017,0.042,0.046,0.03,0.031,0,0.013,-0.001,0.002],[0.171,0.164,0.04,0,0,0.095,0,0.171,0.164,0.072,0,0,0.095,0,0.079,0.022,0,0.334,0.372,0.091,0.337,0.008,0.003,0.011,0.011,0.037,0.013,0.044,0.04,0.095,0.077,0.127,0.117,0.014,0.076,0.018,0.019,0.156,0.153,0.015,0.016,0.015,0.015,0.091,0,0,0.017,0.042,0.045,0.03,0.03,0,0.013,-0.001,0.002],[0.171,0.164,0.041,0,0,0.095,0,0.171,0.164,0.072,0,0,0.095,0,0.081,0.023,0,0.337,0.374,0.104,0.336,0.009,0.004,0.018,0.016,0.033,0.014,0.047,0.043,0.102,0.083,0.129,0.12,0.015,0.082,0.019,0.02,0.174,0.169,0.016,0.017,0.015,0.015,0.091,0,0,0.018,0.045,0.049,0.032,0.033,0,0.013,-0.001,0.002],[0.171,0.164,0.04,0,0,0.095,0,0.171,0.164,0.072,0,0,0.095,0,0.076,0.025,0,0.349,0.375,0.133,0.339,0.01,0.004,0.023,0.019,0.035,0.015,0.051,0.046,0.111,0.089,0.124,0.112,0.017,0.089,0.021,0.022,0.196,0.189,0.018,0.018,0.015,0.015,0.091,0,0,0.019,0.049,0.053,0.035,0.035,0,0.013,-0.001,0.002],[0.171,0.164,0.04,0,0,0.095,0,0.171,0.164,0.072,0,0,0.095,0,0.078,0.027,0,0.345,0.343,0.161,0.295,0.011,0.004,0.031,0.029,0.031,0.016,0.056,0.05,0.12,0.097,0.115,0.108,0.018,0.097,0.023,0.024,0.23,0.222,0.019,0.02,0.015,0.015,0.091,0,0,0.021,0.053,0.058,..>
    
    
    

    Awaiting your reply.

    1 person found this answer helpful.
    0 comments No comments