Note
Please see Azure Cognitive Services for Speech documentation for the latest supported speech solutions.
EMMA Document Examples
This topic presents two examples of EMMA documents. You can use the first example as input to Simulator. The second example is the output generated by Simulator from the input file example. You can use the Simulator output file as input to the Simulator Results Analyzer tool.
Example 1: EMMA Input File
The following is an example of an EMMA file that you can use as input for Simulator. You can find this file under the Samples folder where you installed the Microsoft Speech Platform SDK 11. The audio files and grammars referenced in this document are in the Waves and Grammars folders, respectively.
<?xml version="1.0" encoding="utf-8"?>
<!-- The emma:emma node is the topmost node that defines the emma namespace and the
supporting ms namespace for richer recognition result information. -->
<emma:emma version="1.0" xmlns:ms=https://www.microsoft.com/xmlns/webreco
xmlns:emma="http://www.w3.org/2003/04/emma" xmlns="http://www.example.com/example">
<!-- Below the emma:emma topmost node are two daughter nodes: emma:grammar nodes and
one or more emma:group nodes that contain utterance information. -->
<!-- In this example, the grammars are located in a subfolder to where Simulator.exe
is being run. The "id" attribute tells the recognizer which grammars to activate
(that is, to compare the utterance against), when performing a recognition.-->
<emma:grammar id="grammar0" ref="grammars\menu_choices.grxml" />
<emma:grammar id="grammar1" ref="grammars\help_choices.grxml" />
<!-- A set of utterances are contained in an emma:group node. In this example, each
daughter node is also an emma:group node because each utterance may not be from the
same speaker. -->
<emma:group id="toplevel">
<!-- The first utterance in the sequence. -->
<emma:group id="utterance_1">
<emma:info>
<!-- The ms:audio element tells the tool where to find the WAV file. In this
example, the WAV file is located in a subfolder beneath the tool. The type
attribute indicates that it's of WAV format (and not emulation). -->
<ms:audio ref="waves/utt_1.wav" type="audio/x-wav" />
<!-- The transcript is helpful to later compare the recognition results to the
actual words that we know are contained in the WAV file. -->
<ms:transcript>
<ms:original>sports</ms:original>
</ms:transcript>
<!-- The active-grammars element tells the recognizer what grammars to activate
for this utterance. -->
<ms:active-grammars>
<ms:grammar emma:grammar-ref="grammar0" />
<ms:grammar emma:grammar-ref="grammar1" />
</ms:active-grammars>
</emma:info>
</emma:group>
<!-- The second utterance in the sequence. -->
<emma:group id="utterance_2">
<!-- Information about the second utterance... -->
</emma:group>
<!-- The third utterance in the sequence. -->
<emma:group id="utterance_3">
<!-- Information about the third utterance... -->
</emma:group>
<!-- Potentially, information about additional utterances to be recognized. -->
</emma:group>
</emma:emma>
Example 2: EMMA Output File
Simulator takes the EMMA input file of the first example and produces the following EMMA output file, which can be used as input to Simulator Results Analyzer. You can find the example file under the Samples folder where you installed the Speech Platform SDK 11. The audio files and grammars referenced in this document are in the Waves and Grammars folders, respectively.
<?xml version="1.0" encoding="utf-8"?>
<emma:emma version="1.0" xmlns:ms="https://www.microsoft.com/xmlns/webreco"
xmlns:emma="http://www.w3.org/2003/04/emma" xmlns="http://www.example.com/example">
<!-- The emma:grammar elements here correspond to the grammars used in each utterance;
Specific recognition results are contained below in each emma:group element. -->
<emma:grammar id="grammar0" ref="grammars\menu_choices.grxml" />
<emma:grammar id="grammar1" ref="grammars\help_choices.grxml" />
<!-- Note that additional id information may appear here in more emma:grammar tags,
depending on the recognizer. -->
<emma:group id="toplevel">
<emma:group id="utterance_1">
<!-- The recognition results are split between the emma:info element and a
sister emma:one-of element. The former contains emulation information,
active grammars, and request id's for server based recognizers. The latter
contains interpretation information based the semantics specified in
the grammar.-->
<emma:info ms:status="OK">
<ms:audio ref="waves/utt_1.wav" type="audio/x-wav" />
<!-- Because the transcript value was included for this utterance,
the result under the ms:transcript element is the emulation result.
That is, because "sports" was indicated as the transcript, the
recognizer tells us that the text equivalent of "sports" is indeed
recognized by the grammar referenced to 'grammar-0'. -->
<ms:transcript ms:status="OK">
<ms:original>sports</ms:original>
<emma:one-of id="utterance_1-transcript-reco-nbest-list"
disjunction-type="understanding" emma:medium="acoustic" emma:mode="voice">
<emma:interpretation emma:time-ref-uri="#reco-nbest-list"
emma:tokens="sports" id="utterance_1-transcript-reco-nbest-1"
emma:offset-to-start="0" emma:confidence="1" ms:typespace="ECMA-262"
ms:dataType="object" emma:grammar-ref="utterance_0.0.0.0-transcript-grammar-0"
emma:duration="0" emma:lang="en-us">
<application emma:confidence="1" ms:actualConfidence="1" ms:dataType="string"
ms:valueType="string" >SPORTS</application>
<grammar emma:confidence="1" ms:actualConfidence="1" ms:dataType="string"
ms:valueType="string" >menu_choices.grxml</grammar>
</emma:interpretation>
</emma:one-of>
</ms:transcript>
<ms:active-grammars>
<ms:grammar emma:grammar-ref="grammar0" />
<ms:grammar emma:grammar-ref="grammar1" />
</ms:active-grammars>
<ms:requestid>2c25a256-19df-47b9-8b0c2e7e232</ms:requestid>
<ms:sessionid>a2b68525-e8c6-49b6-9b1e0667cf6</ms:sessionid>
</emma:info>
<!-- This emma:one-of element includes the recognition result. -->
<emma:one-of id="utterance_0.0.0.0-reco-nbest-list" emma:start="1262633376270"
disjunction-type="understanding" emma:medium="acoustic" emma:mode="voice">
<emma:interpretation emma:time-ref-uri="#reco-nbest-list" emma:tokens="sports"
id="utterance_1-reco-nbest-1" emma:offset-to-start="300"
emma:confidence="0.9641953" ms:typespace="ECMA-262" ms:dataType="object"
emma:grammar-ref="utterance_0.0.0.0-grammar-0" emma:duration="850"
emma:lang="en-us">
<emma:derived-from resource="#utterance_1-rule-tree-1" composite="false" />
<application emma:confidence="0.9445539" ms:actualConfidence="1"
ms:dataType="string" ms:valueType="string" >SPORTS</application>
<grammar emma:confidence="0.9445539" ms:actualConfidence="1"
ms:dataType="string" ms:valueType="string" >menu_choices.grxml</grammar>
</emma:interpretation>
</emma:one-of>
</emma:group>
<emma:group id="utterance_2">
<!-- Information about the second utterance... -->
</emma:group>
<emma:group id="utterance_3">
<!-- Information about the third utterance... -->
</emma:group>
</emma:group>
<!-- The emma:derivation node contains rich recognition result information, including
duration, confidence, lattice, and phoneme structure.-->
<emma:derivation>
<emma:one-of id="utterance_1-word-list" disjunction-type="recognition">
<emma:interpretation id="utterance_1-word-list-1">
<emma:lattice emma:time-ref-uri="#reco-nbest-1" initial="1" final="2">
<emma:arc emma:offset-to-start="0" from="1" emma:confidence="0.9641953"
emma:duration="770" to="2">sports<emma:info><ms:sapiPhraseElement
ms:displayAttributes="2" ms:lexicalForm="sports" ms:actualConfidence="1"
ms:pronunciation="S P AO RA T S" /></emma:info></emma:arc>
</emma:lattice>
</emma:interpretation>
</emma:one-of>
<emma:one-of id="utterance_1-rule-tree-list" disjunction-type="recognition">
<emma:interpretation emma:time-ref-uri="#reco-nbest-1"
id="utterance_1-rule-tree-1" emma:confidence="0.9641953">
<emma:derived-from resource="#utterance_1-word-list-1" composite="false" />
<top emma:offset-to-start="0" emma:confidence="0.9641953"
ms:actualConfidence="1" emma:duration="770" >sports</top>
</emma:interpretation>
</emma:one-of>
<emma:one-of id="utterance_2-word-list" disjunction-type="recognition">
<!-- Information about the second utterance... -->
</emma:one-of>
<emma:one-of id="utterance_2-rule-tree-list" disjunction-type="recognition">
<!-- Information about the second utterance... -->
</emma:one-of>
<emma:one-of id="utterance_3-word-list" disjunction-type="recognition">
<!-- Information about the third utterance... -->
</emma:one-of>
<emma:one-of id="utterance_3-rule-tree-list" disjunction-type="recognition">
<!-- Information about the third utterance... -->
</emma:one-of>
</emma:derivation>
</emma:emma>