From 4b4dee8fd21b21f550251e8c4856ab218a1277fa Mon Sep 17 00:00:00 2001
From: Andrey Volk <andywolk@gmail.com>
Date: Fri, 27 Dec 2024 22:02:30 +0300
Subject: [PATCH] [mod_dptools] Move tts format from mod_ssml.

---
 .../applications/mod_dptools/mod_dptools.c    | 130 ++++++++++++++++++
 src/mod/formats/mod_ssml/mod_ssml.c           | 124 -----------------
 2 files changed, 130 insertions(+), 124 deletions(-)

diff --git a/src/mod/applications/mod_dptools/mod_dptools.c b/src/mod/applications/mod_dptools/mod_dptools.c
index 58fce5f5d3..d134576fb9 100644
--- a/src/mod/applications/mod_dptools/mod_dptools.c
+++ b/src/mod/applications/mod_dptools/mod_dptools.c
@@ -5322,6 +5322,130 @@ static switch_status_t file_url_file_write(switch_file_handle_t *handle, void *d
 
 /* Registration */
 
+/**
+ * TTS playback state
+ */
+struct tts_context {
+	/** handle to TTS engine */
+	switch_speech_handle_t sh;
+	/** TTS flags */
+	switch_speech_flag_t flags;
+	/** maximum number of samples to read at a time */
+	int max_frame_size;
+	/** done flag */
+	int done;
+};
+
+/**
+ * Do TTS as file format
+ * @param handle
+ * @param path the inline SSML
+ * @return SWITCH_STATUS_SUCCESS if opened
+ */
+static switch_status_t tts_file_open(switch_file_handle_t *handle, const char *path)
+{
+	switch_status_t status = SWITCH_STATUS_SUCCESS;
+	struct tts_context *context = switch_core_alloc(handle->memory_pool, sizeof(*context));
+	char *arg_string = switch_core_strdup(handle->memory_pool, path);
+	char *args[3] = { 0 };
+	int argc = switch_separate_string(arg_string, '|', args, (sizeof(args) / sizeof(args[0])));
+	char *module;
+	char *voice;
+	char *document;
+
+	/* path is module:(optional)profile|voice|{param1=val1,param2=val2}TTS document */
+	if (argc != 3) {
+		return SWITCH_STATUS_FALSE;
+	}
+
+	module = args[0];
+	voice = args[1];
+	document = args[2];
+
+	memset(context, 0, sizeof(*context));
+	context->flags = SWITCH_SPEECH_FLAG_NONE;
+	if ((status = switch_core_speech_open(&context->sh, module, voice, handle->samplerate, handle->interval, handle->channels, &context->flags, NULL)) == SWITCH_STATUS_SUCCESS) {
+		if (handle->params) {
+			const char *channel_uuid = switch_event_get_header(handle->params, "channel-uuid");
+
+			if (!zstr(channel_uuid)) {
+				switch_core_speech_text_param_tts(&context->sh, "channel-uuid", channel_uuid);
+			}
+		}
+
+		if ((status = switch_core_speech_feed_tts(&context->sh, document, &context->flags)) == SWITCH_STATUS_SUCCESS) {
+			handle->channels = 1;
+			handle->samples = 0;
+			handle->format = 0;
+			handle->sections = 0;
+			handle->seekable = 0;
+			handle->speed = 0;
+			context->max_frame_size = handle->samplerate / 1000 * SWITCH_MAX_INTERVAL;
+
+			if ((context->sh.flags & SWITCH_SPEECH_FLAG_MULTI)) {
+				switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_DONE;
+				switch_core_speech_feed_tts(&context->sh, "DONE", &flags);
+			}
+		} else {
+			switch_core_speech_close(&context->sh, &context->flags);
+		}
+	}
+
+	handle->private_info = context;
+
+	return status;
+}
+
+/**
+ * Read audio from TTS engine
+ * @param handle
+ * @param data
+ * @param len
+ * @return
+ */
+static switch_status_t tts_file_read(switch_file_handle_t *handle, void *data, size_t *len)
+{
+	switch_status_t status = SWITCH_STATUS_SUCCESS;
+	struct tts_context *context = (struct tts_context *)handle->private_info;
+	switch_size_t rlen;
+
+	if (*len > context->max_frame_size) {
+		*len = context->max_frame_size;
+	}
+
+	rlen = *len * 2; /* rlen (bytes) = len (samples) * 2 */
+
+	if (!context->done) {
+		context->flags = SWITCH_SPEECH_FLAG_BLOCKING;
+		if ((status = switch_core_speech_read_tts(&context->sh, data, &rlen, &context->flags))) {
+			context->done = 1;
+		}
+	} else {
+		switch_core_speech_flush_tts(&context->sh);
+		memset(data, 0, rlen);
+		status = SWITCH_STATUS_FALSE;
+	}
+
+	*len = rlen / 2; /* len (samples) = rlen (bytes) / 2 */
+
+	return status;
+}
+
+/**
+ * Close TTS engine
+ * @param handle
+ * @return SWITCH_STATUS_SUCCESS
+ */
+static switch_status_t tts_file_close(switch_file_handle_t *handle)
+{
+	struct tts_context *context = (struct tts_context *)handle->private_info;
+
+	switch_core_speech_close(&context->sh, &context->flags);
+
+	return SWITCH_STATUS_SUCCESS;
+}
+
+static char *tts_supported_formats[] = { "tts", NULL };
 static char *file_string_supported_formats[SWITCH_MAX_CODECS] = { 0 };
 static char *file_url_supported_formats[SWITCH_MAX_CODECS] = { 0 };
 
@@ -6464,6 +6588,12 @@ SWITCH_MODULE_LOAD_FUNCTION(mod_dptools_load)
 	file_interface->file_write = file_url_file_write;
 	file_interface->file_seek = file_url_file_seek;
 
+	file_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_FILE_INTERFACE);
+	file_interface->interface_name = modname;
+	file_interface->extens = tts_supported_formats;
+	file_interface->file_open = tts_file_open;
+	file_interface->file_close = tts_file_close;
+	file_interface->file_read = tts_file_read;
 
 	error_endpoint_interface = (switch_endpoint_interface_t *) switch_loadable_module_create_interface(*module_interface, SWITCH_ENDPOINT_INTERFACE);
 	error_endpoint_interface->interface_name = "error";
diff --git a/src/mod/formats/mod_ssml/mod_ssml.c b/src/mod/formats/mod_ssml/mod_ssml.c
index 4fe73d247c..0301162297 100644
--- a/src/mod/formats/mod_ssml/mod_ssml.c
+++ b/src/mod/formats/mod_ssml/mod_ssml.c
@@ -893,119 +893,6 @@ static switch_status_t ssml_file_seek(switch_file_handle_t *handle, unsigned int
 	return switch_core_file_seek(&context->fh, cur_sample, samples, whence);
 }
 
-/**
- * TTS playback state
- */
-struct tts_context {
-	/** handle to TTS engine */
-	switch_speech_handle_t sh;
-	/** TTS flags */
-	switch_speech_flag_t flags;
-	/** maximum number of samples to read at a time */
-	int max_frame_size;
-	/** done flag */
-	int done;
-};
-
-/**
- * Do TTS as file format
- * @param handle
- * @param path the inline SSML
- * @return SWITCH_STATUS_SUCCESS if opened
- */
-static switch_status_t tts_file_open(switch_file_handle_t *handle, const char *path)
-{
-	switch_status_t status = SWITCH_STATUS_SUCCESS;
-	struct tts_context *context = switch_core_alloc(handle->memory_pool, sizeof(*context));
-	char *arg_string = switch_core_strdup(handle->memory_pool, path);
-	char *args[3] = { 0 };
-	int argc = switch_separate_string(arg_string, '|', args, (sizeof(args) / sizeof(args[0])));
-	char *module;
-	char *voice;
-	char *document;
-
-	/* path is module:(optional)profile|voice|{param1=val1,param2=val2}TTS document */
-	if (argc != 3) {
-		return SWITCH_STATUS_FALSE;
-	}
-	module = args[0];
-	voice = args[1];
-	document = args[2];
-
-	memset(context, 0, sizeof(*context));
-	context->flags = SWITCH_SPEECH_FLAG_NONE;
-	if ((status = switch_core_speech_open(&context->sh, module, voice, handle->samplerate, handle->interval, handle->channels, &context->flags, NULL)) == SWITCH_STATUS_SUCCESS) {
-		if (handle->params) {
-			const char *channel_uuid = switch_event_get_header(handle->params, "channel-uuid");
-			if (!zstr(channel_uuid)) {
-				switch_core_speech_text_param_tts(&context->sh, "channel-uuid", channel_uuid);
-			}
-		}
-		if ((status = switch_core_speech_feed_tts(&context->sh, document, &context->flags)) == SWITCH_STATUS_SUCCESS) {
-			handle->channels = 1;
-			handle->samples = 0;
-			handle->format = 0;
-			handle->sections = 0;
-			handle->seekable = 0;
-			handle->speed = 0;
-			context->max_frame_size = handle->samplerate / 1000 * SWITCH_MAX_INTERVAL;
-
-			if ((context->sh.flags & SWITCH_SPEECH_FLAG_MULTI)) {
-				switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_DONE;
-				switch_core_speech_feed_tts(&context->sh, "DONE", &flags);
-			}
-		} else {
-			switch_core_speech_close(&context->sh, &context->flags);
-		}
-	}
-	handle->private_info = context;
-	return status;
-}
-
-/**
- * Read audio from TTS engine
- * @param handle
- * @param data
- * @param len
- * @return
- */
-static switch_status_t tts_file_read(switch_file_handle_t *handle, void *data, size_t *len)
-{
-	switch_status_t status = SWITCH_STATUS_SUCCESS;
-	struct tts_context *context = (struct tts_context *)handle->private_info;
-	switch_size_t rlen;
-
-	if (*len > context->max_frame_size) {
-		*len = context->max_frame_size;
-	}
-	rlen = *len * 2; /* rlen (bytes) = len (samples) * 2 */
-
-	if (!context->done) {
-		context->flags = SWITCH_SPEECH_FLAG_BLOCKING;
-		if ((status = switch_core_speech_read_tts(&context->sh, data, &rlen, &context->flags))) {
-			context->done = 1;
-		}
-	} else {
-		switch_core_speech_flush_tts(&context->sh);
-		memset(data, 0, rlen);
-		status = SWITCH_STATUS_FALSE;
-	}
-	*len = rlen / 2; /* len (samples) = rlen (bytes) / 2 */
-	return status;
-}
-
-/**
- * Close TTS engine
- * @param handle
- * @return SWITCH_STATUS_SUCCESS
- */
-static switch_status_t tts_file_close(switch_file_handle_t *handle)
-{
-	struct tts_context *context = (struct tts_context *)handle->private_info;
-	switch_core_speech_close(&context->sh, &context->flags);
-	return SWITCH_STATUS_SUCCESS;
-}
-
 /**
  * Configure voices
  * @param pool memory pool to use
@@ -1168,7 +1055,6 @@ static switch_status_t do_config(switch_memory_pool_t *pool)
 }
 
 static char *ssml_supported_formats[] = { "ssml", NULL };
-static char *tts_supported_formats[] = { "tts", NULL };
 
 SWITCH_MODULE_LOAD_FUNCTION(mod_ssml_load)
 {
@@ -1183,16 +1069,6 @@ SWITCH_MODULE_LOAD_FUNCTION(mod_ssml_load)
 	file_interface->file_read = ssml_file_read;
 	file_interface->file_seek = ssml_file_seek;
 
-	file_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_FILE_INTERFACE);
-	file_interface->interface_name = modname;
-	file_interface->extens = tts_supported_formats;
-	file_interface->file_open = tts_file_open;
-	file_interface->file_close = tts_file_close;
-	file_interface->file_read = tts_file_read;
-	/* TODO allow skip ahead if TTS supports it
-	 * file_interface->file_seek = tts_file_seek;
-	 */
-
 	globals.pool = pool;
 	switch_core_hash_init(&globals.voice_cache);
 	switch_core_hash_init(&globals.tts_voice_map);