Note
Please see Azure Cognitive Services for Speech documentation for the latest supported speech solutions.
Microsoft Speech Platform
Create Grammars using IspGrammarBuilder
You can use the ISpGrammarBuilder interface to programmatically create and modify recognition grammars. Although ISpGrammarBuilder offers fine-grained control over the constituent parts of a grammar, you may find that using the XML format defined by the Speech Recognition Grammar Specification Version 1.0 (SRGS) to author new grammars is more efficient and provides ample tools for specifying grammar functionality. SRGS grammars that have been loaded dynamically can be modified at runtime using the ISpGrammarBuilder interface.
Example
The code example below illustrates three approaches to implement a travel grammar, using the ISpGrammarBuilder interface. An approximation of the XML form is included for each of the following three grammar authoring approaches.
`
// Declare local identifiers: HRESULT hr = S_OK; CComPtr<ISpGrammarBuilder> cpGrammarBuilder; SPSTATEHANDLE hStateTravel; SPSTATEHANDLE hStateTravel_Second; SPSTATEHANDLE hStateMethod; SPSTATEHANDLE hStateDest;`// Create (if rule does not already exist) // top-level Rule, defaulting to Active. hr = cpGrammarBuilder->GetRule(L"Travel", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel;);
// Approach 1: List all possible phrases. // This is the most intuitive approach, and it does not sacrifice efficiency // because the grammar builder will merge shared sub-phrases when possible. // There is only one root state, hStateTravel, and the terminal NULL state, // and there are six unique transitions between root state and NULL state.
/* XML Approximation: <rule id="Travel"> <item> fly to Seattle </item> <item> fly to New York </item> <item> fly to Washington DC </item> <item> drive to Seattle </item> <item> drive to New York </item> <item> drive to Washington DC </item> </rule> */
// Create set of peer phrases, each containing complete phrase. // Note: the word delimiter is set as " ", so that the text we // attach to the transition can be multiple words (for example, // "fly to Seattle" is implicitly "fly" + "to" + "Seattle"): if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Seattle", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to New York", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Washington DC", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Seattle", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to New York", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Washington DC", L" ", SPWT_LEXICAL, 1, NULL); }
Figure 1: States and Transitions of Approach 1
// Approach 2: Construct the directed-graph using intermediate states.
// This approach gives you more control of the grammar layout, and may be
// easier to implement when you have some combinations.
// Using this approach, there is one root state (hStateTravel), one interim state
// (hStateTravel_Second), and the final terminal NULL state. There are three
// unique transitions between the root state and the interim state. And there are
// three more unique transitions between the interim state, and the final NULL state.
// Note that the graph includes only 2-transition paths. The grammar will not recognize
// only the first transition or the second transition (that is, "fly to" will not be
// recognized, nor will "Seattle", but "fly to Seattle" will be recognized.
/* XML Approximation:
<rule id="Travel" >
<one-of>
<item> fly to </item>
<item> drive to </item>
<item> take train to </item>
</one-of>
<one-of>
<item> Seattle </item>
<item> New York </item>
<item> Washington DC </item>
</one-of>
</rule>
*/
// Create a new transition which starts at
// the root state and ends at a second state.
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->CreateNewState(hStateTravel, &hStateTravel;_Second);
}
// Attach the first part of the phrase to to first transition:
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateTravel, hStateTravel_Second, L"fly to", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateTravel, hStateTravel_Second, L"drive to", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateTravel, hStateTravel_Second, L"take train to", L" ", SPWT_LEXICAL, 1, NULL);
}
// Attach the second and final part of the phrase to
// the last transition (ending with the NULL state):
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateTravel_Second, NULL, L"Seattle", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateTravel_Second, NULL, L"New York", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateTravel_Second, NULL, L"Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
}
Figure 2: States and Transitions of Approach 2
// Approach 3: Use rule references.
// This approach let you structure the grammars and is useful when building large grammars,
// since it allows for reusable rules (see ruleref Element (Microsoft.Speech)).
// Note that forward-declarations are allowed, since the grammar validation is not performed
// until the XML is compiled or the GrammarBuilder instance is 'Commit'ted.
// The main difference between Approach 2 and Approach 3 is the creation of separate
// rules for the method of travel and the list of destinations, and a root rule that
// references both the new rules. This also facilitates reuse of the newly created rules
// by other rules (for example, create a second rule called "Geography" which combines the phrase
// "where is" with the "Dest" rule, allowing the user to say "where is New York", without
// requiring the grammar author/designer to place the same phrase text in multiple places
// of the grammar leading to grammar maintenance problems.
/* XML Approximation:
<rule id="Travel">
<ruleref uri="#Method"/>
<ruleref uri="#Dest"/>
</rule>
<rule id="Method">
<one-of>
<item> fly to </item>
<item> drive to </item>
<item> take train to </item>
</one-of>
</rule>
<rule id="Dest" sapi:dynamic="true">
<one-of>
<item> Seattle </item>
<item> New York </item>
<item> Washington DC </item>
</one-of>
</rule>
*/
if (SUCCEEDED(hr))
{
// Note the two new rules ("Method" and "Dest") are NOT marked Top-level, since they are
// reused by other top-level rules, and are not by themselves recognizable phrases:
hr = cpGrammarBuilder->GetRule(L"Method", 0, 0, TRUE, &hStateMethod;);
}
if (SUCCEEDED(hr))
{
// Adding the "Dest" rule as Dynamic allows the programmatic grammar author to
// update only the "Dest" rule after the initial ::Commit (for example, to add more travel
// destinations depending on user history, preferences, or geographic data):
hr = cpGrammarBuilder->GetRule(L"Dest", 0, SPRAF_Dynamic, TRUE, &hStateDest;);
}
if (SUCCEEDED(hr))
{
// Create an interim state (same as Approach 2).
hr = cpGrammarBuilder->CreateNewState(hStateTravel, &hStateTravel;_Second);
}
// Then attach rules to the transitions from Root->Interim and Interim->NULL state:
// This is analogous to using the ruleref element in an SRGS grammar.
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddRuleTransition(hStateTravel, hStateTravel_Second, hStateMethod, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddRuleTransition(hStateTravel_Second, NULL, hStateDest, 1, NULL);
}
// Add the set of sibling transitions for travel "method":
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateMethod, NULL, L"fly to", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateMethod, NULL, L"drive to", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateMethod, NULL, L"take train to", L" ", SPWT_LEXICAL, 1, NULL);
}
// Add the set of sibling transitions for travel "destinations":
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateDest, NULL, L"Seattle", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateDest, NULL, L"New York", L" ", SPWT_LEXICAL, 1, NULL);
}
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->AddWordTransition(hStateDest, NULL, L"Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
}
// Must Commit before the grammar changes before using the grammar.
if (SUCCEEDED(hr))
{
hr = cpGrammarBuilder->Commit(0);
}
if (SUCCEEDED(hr))
{
// Do some more stuff here.
}
Figure 3: States and Transitions of Approach 3