Add a text-to-speech demo (#744)

Co-authored-by: Hugo Locurcio <hugo.locurcio@hugo.pro>
This commit is contained in:
bruvzg
2023-02-24 19:11:55 +02:00
committed by GitHub
parent fa3c247bd5
commit 78dffe0d04
8 changed files with 449 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
# Text-to-Speech Demo
This is a demo showing text-to-speech support.
Language: GDScript
Renderer: Compatibility
## How does it work?
It uses `tts_*()` methods of the [`DisplayServer`](https://docs.godotengine.org/en/latest/classes/class_displayserver.html) singleton
to enumerate voice information, send utterances to the OS TTS API, and receive callback signals.
## Screenshots
![Screenshot](screenshots/text_to_speech.webp)

View File

@@ -0,0 +1,244 @@
[gd_scene load_steps=2 format=3 uid="uid://u5emvyeyodyh"]
[ext_resource type="Script" path="res://voice_list.gd" id="1_0bwjs"]
[node name="Control" type="Control"]
layout_mode = 3
anchors_preset = 8
anchor_left = 0.5
anchor_top = 0.5
anchor_right = 0.5
anchor_bottom = 0.5
offset_left = -576.0
offset_top = -312.0
offset_right = -576.0
offset_bottom = -312.0
grow_horizontal = 2
grow_vertical = 2
size_flags_horizontal = 4
size_flags_vertical = 4
script = ExtResource("1_0bwjs")
[node name="LineEditFilterLang" type="LineEdit" parent="."]
layout_mode = 0
offset_left = 416.0
offset_top = 304.0
offset_right = 704.0
offset_bottom = 337.0
theme_override_font_sizes/font_size = 16
placeholder_text = "Language"
[node name="LineEditFilterName" type="LineEdit" parent="."]
layout_mode = 0
offset_left = 96.0
offset_top = 304.0
offset_right = 408.0
offset_bottom = 337.0
theme_override_font_sizes/font_size = 16
placeholder_text = "Name"
[node name="Label" type="Label" parent="LineEditFilterName"]
layout_mode = 0
offset_left = -76.0
offset_top = 3.0
offset_right = -20.0
offset_bottom = 35.0
text = "Filter:"
[node name="Tree" type="Tree" parent="."]
layout_mode = 0
offset_left = 16.0
offset_top = 56.0
offset_right = 704.0
offset_bottom = 296.0
columns = 2
[node name="Utterance" type="TextEdit" parent="."]
layout_mode = 0
offset_left = 264.0
offset_top = 472.0
offset_right = 704.0
offset_bottom = 584.0
theme_override_font_sizes/font_size = 16
text = "Beware the Jabberwock, my son! The jaws that bite, the claws that catch!"
wrap_mode = 1
draw_spaces = true
[node name="ButtonSpeak" type="Button" parent="."]
layout_mode = 0
offset_left = 16.0
offset_top = 472.0
offset_right = 128.0
offset_bottom = 504.0
text = "Speak"
[node name="ButtonIntSpeak" type="Button" parent="."]
layout_mode = 0
offset_left = 144.0
offset_top = 472.0
offset_right = 256.0
offset_bottom = 504.0
text = "Interrupt"
[node name="ButtonStop" type="Button" parent="."]
layout_mode = 0
offset_left = 16.0
offset_top = 512.0
offset_right = 128.0
offset_bottom = 544.0
text = "Stop"
[node name="ButtonPause" type="Button" parent="."]
layout_mode = 0
offset_left = 144.0
offset_top = 512.0
offset_right = 256.0
offset_bottom = 544.0
toggle_mode = true
text = "Pause"
[node name="HSliderRate" type="HSlider" parent="."]
layout_mode = 0
offset_left = 96.0
offset_top = 352.0
offset_right = 440.0
offset_bottom = 368.0
min_value = 0.1
max_value = 10.0
step = 0.05
value = 1.0
exp_edit = true
[node name="Label" type="Label" parent="HSliderRate"]
layout_mode = 0
offset_left = -76.0
offset_top = -5.0
offset_right = -20.0
offset_bottom = 27.0
text = "Rate:"
[node name="Value" type="Label" parent="HSliderRate"]
layout_mode = 0
offset_left = 352.0
offset_top = -8.0
offset_right = 416.0
offset_bottom = 24.0
text = "1.00x"
[node name="HSliderPitch" type="HSlider" parent="."]
layout_mode = 0
offset_left = 96.0
offset_top = 392.0
offset_right = 440.0
offset_bottom = 408.0
max_value = 2.0
step = 0.05
value = 1.0
[node name="Label" type="Label" parent="HSliderPitch"]
layout_mode = 0
offset_left = -76.0
offset_top = -5.0
offset_right = -28.0
offset_bottom = 27.0
text = "Pitch:"
[node name="Value" type="Label" parent="HSliderPitch"]
layout_mode = 0
offset_left = 352.0
offset_top = -8.0
offset_right = 416.0
offset_bottom = 24.0
text = "1.00x"
[node name="HSliderVolume" type="HSlider" parent="."]
layout_mode = 0
offset_left = 96.0
offset_top = 432.0
offset_right = 440.0
offset_bottom = 448.0
min_value = 1.0
value = 50.0
[node name="Label" type="Label" parent="HSliderVolume"]
layout_mode = 0
offset_left = -76.0
offset_top = -5.0
offset_right = -12.0
offset_bottom = 27.0
text = "Volume:"
[node name="Value" type="Label" parent="HSliderVolume"]
layout_mode = 0
offset_left = 352.0
offset_top = -8.0
offset_right = 416.0
offset_bottom = 24.0
text = "50%"
[node name="ColorRect" type="ColorRect" parent="."]
layout_mode = 0
offset_left = 16.0
offset_top = 16.0
offset_right = 144.0
offset_bottom = 40.0
[node name="Label" type="Label" parent="ColorRect"]
layout_mode = 0
offset_right = 128.0
offset_bottom = 32.0
theme_override_font_sizes/font_size = 16
text = "Speaking..."
[node name="Log" type="TextEdit" parent="."]
layout_mode = 0
offset_left = 712.0
offset_top = 56.0
offset_right = 1138.0
offset_bottom = 584.0
editable = false
context_menu_enabled = false
shortcut_keys_enabled = false
virtual_keyboard_enabled = false
middle_mouse_paste_enabled = false
[node name="ButtonClearLog" type="Button" parent="Log"]
layout_mode = 0
offset_left = 346.0
offset_top = 8.0
offset_right = 418.0
offset_bottom = 39.0
theme_override_font_sizes/font_size = 16
text = "Clear"
[node name="RichTextLabel" type="RichTextLabel" parent="."]
layout_mode = 0
offset_left = 152.0
offset_top = 16.0
offset_right = 1008.0
offset_bottom = 40.0
theme_override_font_sizes/normal_font_size = 16
bbcode_enabled = true
scroll_active = false
[node name="ButtonDemo" type="Button" parent="."]
layout_mode = 0
offset_left = 16.0
offset_top = 552.0
offset_right = 256.0
offset_bottom = 581.0
theme_override_font_sizes/font_size = 16
text = "Demo"
[connection signal="text_changed" from="LineEditFilterLang" to="." method="_on_LineEditFilterName_text_changed"]
[connection signal="text_changed" from="LineEditFilterName" to="." method="_on_LineEditFilterName_text_changed"]
[connection signal="item_activated" from="Tree" to="." method="_on_ItemList_item_activated"]
[connection signal="pressed" from="ButtonSpeak" to="." method="_on_ButtonSpeak_pressed"]
[connection signal="pressed" from="ButtonIntSpeak" to="." method="_on_ButtonIntSpeak_pressed"]
[connection signal="pressed" from="ButtonStop" to="." method="_on_ButtonStop_pressed"]
[connection signal="pressed" from="ButtonPause" to="." method="_on_ButtonPause_pressed"]
[connection signal="value_changed" from="HSliderRate" to="." method="_on_HSliderRate_value_changed"]
[connection signal="value_changed" from="HSliderPitch" to="." method="_on_HSliderPitch_value_changed"]
[connection signal="value_changed" from="HSliderVolume" to="." method="_on_HSliderVolume_value_changed"]
[connection signal="pressed" from="Log/ButtonClearLog" to="." method="_on_ButtonClearLog_pressed"]
[connection signal="pressed" from="ButtonDemo" to="." method="_on_Button_pressed"]

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

View File

@@ -0,0 +1,34 @@
[remap]
importer="texture"
type="CompressedTexture2D"
uid="uid://53lrswe56fov"
path="res://.godot/imported/icon.png-487276ed1e3a0c39cad0279d744ee560.ctex"
metadata={
"vram_texture": false
}
[deps]
source_file="res://icon.png"
dest_files=["res://.godot/imported/icon.png-487276ed1e3a0c39cad0279d744ee560.ctex"]
[params]
compress/mode=0
compress/high_quality=false
compress/lossy_quality=0.7
compress/hdr_compression=1
compress/normal_map=0
compress/channel_pack=0
mipmaps/generate=false
mipmaps/limit=-1
roughness/mode=0
roughness/src_normal=""
process/fix_alpha_border=true
process/premult_alpha=false
process/normal_map_invert_y=false
process/hdr_as_srgb=false
process/hdr_clamp_exposure=false
process/size_limit=0
detect_3d/compress_to=1

View File

@@ -0,0 +1,28 @@
; Engine configuration file.
; It's best edited using the editor UI and not directly,
; since the parameters that go here are not all obvious.
;
; Format:
; [section] ; section goes between []
; param=value ; assign values to parameters
config_version=5
[application]
config/name="Text-to-speech demo"
config/description="This is a demo showing text-to-speech support."
run/main_scene="res://control.tscn"
config/features=PackedStringArray("4.0")
config/icon="res://icon.png"
[display]
window/stretch/mode="canvas_items"
window/stretch/aspect="expand"
[rendering]
renderer/rendering_method="gl_compatibility"
renderer/rendering_method.mobile="gl_compatibility"
environment/defaults/default_clear_color=Color(0.2, 0.2, 0.2, 1)

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

View File

@@ -0,0 +1,127 @@
extends Control
var id = 0 #utterance id
var ut_map = {}
var vs
func _ready():
# get voice data
vs = DisplayServer.tts_get_voices()
var root = $Tree.create_item()
$Tree.set_hide_root(true)
$Tree.set_column_title(0, "Name")
$Tree.set_column_title(1, "Language")
$Tree.set_column_titles_visible(true)
for v in vs:
var child = $Tree.create_item(root)
child.set_text(0, v["name"])
child.set_metadata(0, v["id"])
child.set_text(1, v["language"])
$Log.text += "%d voices available\n" % [vs.size()]
$Log.text += "=======\n"
# add callbacks
DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_STARTED, Callable(self, "_on_utterance_start"))
DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_ENDED, Callable(self, "_on_utterance_end"))
DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_CANCELED, Callable(self, "_on_utterance_error"))
DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_BOUNDARY, Callable(self, "_on_utterance_boundary"))
set_process(true)
func _process(delta):
$ButtonPause.button_pressed = DisplayServer.tts_is_paused()
if DisplayServer.tts_is_speaking():
$ColorRect.color = Color(1, 0, 0)
else:
$ColorRect.color = Color(1, 1, 1)
func _on_utterance_boundary(pos, id):
$RichTextLabel.text = "[bgcolor=yellow][color=black]" + ut_map[id].substr(0, pos) + "[/color][/bgcolor]" + ut_map[id].substr(pos, -1)
func _on_utterance_start(id):
$Log.text += "utterance %d started\n" % [id]
func _on_utterance_end(id):
$RichTextLabel.text = "[bgcolor=yellow][color=black]" + ut_map[id] + "[/color][/bgcolor]"
$Log.text += "utterance %d ended\n" % [id]
ut_map.erase(id)
func _on_utterance_error(id):
$RichTextLabel.text = ""
$Log.text += "utterance %d canceled/failed\n" % [id]
ut_map.erase(id)
func _on_ButtonStop_pressed():
DisplayServer.tts_stop()
func _on_ButtonPause_pressed():
if $ButtonPause.pressed:
DisplayServer.tts_pause()
else:
DisplayServer.tts_resume()
func _on_ButtonSpeak_pressed():
if $Tree.get_selected():
$Log.text += "utterance %d queried\n" % [id]
ut_map[id] = $Utterance.text
DisplayServer.tts_speak($Utterance.text, $Tree.get_selected().get_metadata(0), $HSliderVolume.value, $HSliderPitch.value, $HSliderRate.value, id, false)
id += 1
else:
OS.alert("No voice selected.\nSelect a voice in the list, then try using Speak again.")
func _on_ButtonIntSpeak_pressed():
if $Tree.get_selected():
$Log.text += "utterance %d interrupt\n" % [id]
ut_map[id] = $Utterance.text
DisplayServer.tts_speak($Utterance.text, $Tree.get_selected().get_metadata(0), $HSliderVolume.value, $HSliderPitch.value, $HSliderRate.value, id, true)
id += 1
else:
OS.alert("No voice selected.\nSelect a voice in the list, then try using Interrupt again.")
func _on_ButtonClearLog_pressed():
$Log.text = ""
func _on_HSliderRate_value_changed(value):
$HSliderRate/Value.text = "%.2fx" % [value]
func _on_HSliderPitch_value_changed(value):
$HSliderPitch/Value.text = "%.2fx" % [value]
func _on_HSliderVolume_value_changed(value):
$HSliderVolume/Value.text = "%d%%" % [value]
func _on_Button_pressed():
var vc
#demo - en
vc = DisplayServer.tts_get_voices_for_language("en")
if !vc.is_empty():
ut_map[id] = "Beware the Jabberwock, my son!"
ut_map[id + 1] = "The jaws that bite, the claws that catch!"
DisplayServer.tts_speak("Beware the Jabberwock, my son!", vc[0], 50, 1, 1, id)
DisplayServer.tts_speak("The jaws that bite, the claws that catch!", vc[0], 50, 1, 1, id + 1)
id += 2
#demo - es
vc = DisplayServer.tts_get_voices_for_language("es")
if !vc.is_empty():
ut_map[id] = "¡Cuidado, hijo, con el Fablistanón!"
ut_map[id + 1] = "¡Con sus dientes y garras, muerde, apresa!"
DisplayServer.tts_speak("¡Cuidado, hijo, con el Fablistanón!", vc[0], 50, 1, 1, id)
DisplayServer.tts_speak("¡Con sus dientes y garras, muerde, apresa!", vc[0], 50, 1, 1, id + 1)
id += 2
#demo - ru
vc = DisplayServer.tts_get_voices_for_language("ru")
if !vc.is_empty():
ut_map[id] = "О, бойся Бармаглота, сын!"
ut_map[id + 1] = "Он так свирлеп и дик!"
DisplayServer.tts_speak("О, бойся Бармаглота, сын!", vc[0], 50, 1, 1, id)
DisplayServer.tts_speak("Он так свирлеп и дик!", vc[0], 50, 1, 1, id + 1)
id += 2
func _on_LineEditFilterName_text_changed(new_text):
$Tree.clear()
var root = $Tree.create_item()
for v in vs:
if ($LineEditFilterName.text.is_empty() || $LineEditFilterName.text.to_lower() in v["name"].to_lower()) && ($LineEditFilterLang.text.is_empty() || $LineEditFilterLang.text.to_lower() in v["language"].to_lower()):
var child = $Tree.create_item(root)
child.set_text(0, v["name"])
child.set_metadata(0, v["id"])
child.set_text(1, v["language"])