Amazon Polly · JSON Structure
Amazon Polly Structure
Schema defining the structure of Amazon Polly speech synthesis resources, including voice configurations, lexicons, and synthesis task parameters for converting text to lifelike speech.
Type: object
Properties: 9
Required: 3
AIMachine LearningSpeech SynthesisText-To-SpeechTTSVoiceSSMLNeural EngineGenerative AI
Amazon Polly Speech Synthesis Definition is a JSON Structure definition published by Amazon Polly, describing 9 properties, of which 3 are required. It conforms to the https://json-structure.org/meta/core/v0/# meta-schema.
Properties
Engine
LanguageCode
LexiconNames
OutputFormat
SampleRate
SpeechMarkTypes
Text
TextType
VoiceId
Meta-schema: https://json-structure.org/meta/core/v0/#
JSON Structure
{
"$schema": "https://json-structure.org/meta/core/v0/#",
"$id": "https://schemas.apievangelist.com/amazon-polly/speech-synthesis-definition",
"name": "Amazon Polly Speech Synthesis Definition",
"description": "Schema defining the structure of Amazon Polly speech synthesis resources, including voice configurations, lexicons, and synthesis task parameters for converting text to lifelike speech.",
"type": "object",
"required": [
"OutputFormat",
"Text",
"VoiceId"
],
"properties": {
"Engine": {
"type": "string",
"enum": [
"standard",
"neural",
"long-form",
"generative"
],
"description": "Specifies the engine for Amazon Polly to use when processing input text."
},
"LanguageCode": {
"type": "string",
"description": "The language identification tag for the voice."
},
"LexiconNames": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[0-9A-Za-z]{1,20}$"
},
"maxItems": 5,
"description": "List of one or more pronunciation lexicon names to apply during synthesis."
},
"OutputFormat": {
"type": "string",
"enum": [
"json",
"mp3",
"ogg_vorbis",
"pcm"
],
"description": "The format in which the returned output will be encoded."
},
"SampleRate": {
"type": "string",
"description": "The audio frequency specified in Hz.",
"enum": [
"8000",
"16000",
"22050",
"24000"
]
},
"SpeechMarkTypes": {
"type": "array",
"items": {
"type": "string",
"enum": [
"sentence",
"ssml",
"viseme",
"word"
]
},
"description": "The type of speech marks returned for the input text."
},
"Text": {
"type": "string",
"description": "The input text to synthesize.",
"maxLength": 3000
},
"TextType": {
"type": "string",
"enum": [
"ssml",
"text"
],
"description": "Specifies whether the input text is plain text or SSML."
},
"VoiceId": {
"type": "string",
"description": "The voice ID to use for the synthesis."
}
},
"$defs": {
"Voice": {
"type": "object",
"description": "Description of an Amazon Polly voice.",
"properties": {
"Gender": {
"type": "string",
"enum": [
"Female",
"Male"
],
"description": "Gender of the voice."
},
"Id": {
"type": "string",
"description": "Amazon Polly assigned voice ID."
},
"LanguageCode": {
"type": "string",
"description": "Language code of the voice."
},
"LanguageName": {
"type": "string",
"description": "Human-readable name of the language."
},
"Name": {
"type": "string",
"description": "Name of the voice."
},
"AdditionalLanguageCodes": {
"type": "array",
"items": {
"type": "string"
},
"description": "Additional language codes the voice supports."
},
"SupportedEngines": {
"type": "array",
"items": {
"type": "string",
"enum": [
"standard",
"neural",
"long-form",
"generative"
]
},
"description": "Specifies which engines are supported by the voice."
}
}
},
"Lexicon": {
"type": "object",
"description": "A pronunciation lexicon stored in an AWS Region.",
"properties": {
"Name": {
"type": "string",
"description": "Name of the lexicon.",
"pattern": "^[0-9A-Za-z]{1,20}$"
},
"Content": {
"type": "string",
"description": "Lexicon content in PLS format."
},
"LexiconArn": {
"type": "string",
"description": "The ARN of the lexicon."
},
"LexemesCount": {
"type": "int32",
"description": "Number of lexemes in the lexicon."
},
"Size": {
"type": "int32",
"description": "Total size of the lexicon in characters."
},
"Alphabet": {
"type": "string",
"description": "Phonetic alphabet used in the lexicon.",
"enum": [
"ipa",
"x-sampa"
]
},
"LanguageCode": {
"type": "string",
"description": "Language code the lexicon applies to."
},
"LastModified": {
"type": "datetime",
"description": "Timestamp when the lexicon was last modified."
}
}
},
"SynthesisTask": {
"type": "object",
"description": "An asynchronous speech synthesis task.",
"properties": {
"TaskId": {
"type": "string",
"description": "The unique identifier of the synthesis task."
},
"TaskStatus": {
"type": "string",
"enum": [
"scheduled",
"inProgress",
"completed",
"failed"
],
"description": "Current status of the synthesis task."
},
"TaskStatusReason": {
"type": "string",
"description": "Reason for the current status of the task."
},
"OutputUri": {
"type": "uri",
"description": "Pathway for the output speech file."
},
"OutputFormat": {
"type": "string",
"enum": [
"json",
"mp3",
"ogg_vorbis",
"pcm"
]
},
"Engine": {
"type": "string",
"enum": [
"standard",
"neural",
"long-form",
"generative"
]
},
"VoiceId": {
"type": "string"
},
"LanguageCode": {
"type": "string"
},
"CreationTime": {
"type": "datetime"
},
"RequestCharacters": {
"type": "int32",
"description": "Number of billable characters in the request."
}
}
}
}
}