MassageRobot_Dobot/Language/Speech_processor/scripts/Synthesizer.py

import tempfile
import os
import json
import nls
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest
import time
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
from tools.log import CustomLogger

import os
current_file_path = os.path.abspath(__file__)
Language_Path = os.path.dirname(os.path.dirname(os.path.dirname(current_file_path)))
MassageRobot_Dobot_Path = os.path.dirname(Language_Path)
print("MassageRobot_Dobot_Path:",MassageRobot_Dobot_Path)
sys.path.append(MassageRobot_Dobot_Path)
from VortXDB.client import VTXClient

# aliyun语音合成
class SpeechSynthesizer:
    def __init__(self):
        vtxdb = VTXClient()
        self.url = vtxdb.get("robot_config", "Language.Speech_processor.aliyun_synthesize_config.url")
        self.appkey = vtxdb.get("robot_config", "Language.Speech_processor.aliyun_synthesize_config.appkey")
        self.api_id = vtxdb.get("robot_config", "Language.Speech_processor.aliyun_synthesize_config.api_id")
        self.api_key = vtxdb.get("robot_config", "Language.Speech_processor.aliyun_synthesize_config.api_key")
        self.logger = CustomLogger()
        self.token = self.get_Text_To_Speech_token()


    def on_metainfo(self, message, *args):
        self.phoneme = message # 记录音素级别的时间戳
        # print("on_metainfo message=>{}".format(message))
        return

    def on_error(self, message, *args):
        print("on_error args=>{}".format(args))

    def on_close(self, *args):
        # print("on_close: args=>{}".format(args))
        try:
            self.__file.close()
        except Exception as e:
            print("close failed:", e)

    def on_data(self, data, *args):
        try:
            self.__file.write(data)
        except Exception as e:
            print("write data failed:", e)

    def on_completed(self, message, *args):
        # print("on_completed:args=>{} message=>{}".format(args, message))
        return

    def speech_synthesize(self, text, speech_rate = -250, output_file=None):
        try:
            time1=time.time()
            if output_file:
                self.__file = open(output_file, "wb")
            else:
                # Create a temporary file
                fd, tmpfile_name = tempfile.mkstemp(suffix='.mp3')
                os.close(fd)  # Close the file descriptor
                self.__file = open(tmpfile_name, "wb")
            self.__text = text
            self.__speech_rate = speech_rate
            self.__voice = "zhimiao_emo"
            self.ex = {'enable_subtitle':True,"enable_phoneme_timestamp":True} # 记录音素级别的时间戳
            tts = nls.NlsSpeechSynthesizer(
                url=self.url,
                token=self.token,
                appkey=self.appkey,
                on_metainfo=self.on_metainfo,
                on_data=self.on_data,
                on_completed=self.on_completed,
                on_error=self.on_error,
                on_close=self.on_close
            )
            result = tts.start(self.__text, voice=self.__voice, speech_rate=self.__speech_rate, aformat="mp3",ex=self.ex)
            # Close the file after writing
            self.__file.close()
            # Return the path to the temporary file
            time2=time.time()
            # self.logger.log_blue(f"语音合成时间：{time2-time1}")
            return output_file if output_file else tmpfile_name
        except Exception as e:
            self.logger.log_error("Failded to Synthesizer")
            return

    # 语音合成获取token（aliyun）
    def get_Text_To_Speech_token(self):
        client = AcsClient(
            self.api_id,
            self.api_key,
            "cn-shanghai"
        )
        # 创建request，并设置参数。
        request = CommonRequest()
        request.set_method('POST')
        request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
        request.set_version('2019-02-28')
        request.set_action_name('CreateToken')

        try:
            response = client.do_action_with_exception(request)
            # print(response)

            jss = json.loads(response)
            if 'Token' in jss and 'Id' in jss['Token']:
                token = jss['Token']['Id']
                expireTime = jss['Token']['ExpireTime']
                self.logger.log_info("Successfully get Synthesizer token!!!")
                return token
        except Exception as e:
            # print("Error occurred while getting Synthesizer token")
            self.logger.log_error(f"{e}")
            raise e


if __name__ == '__main__':
    import argparse
    import sys
    from pathlib import Path
    sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
    from tools.yaml_operator import read_yaml
    from Audio_player import AudioPlayer
    def parse_args():
        parser = argparse.ArgumentParser(description='Speech processor')
        parser.add_argument('--synthesizer_config_path', type=str, default='/home/jsfb/jsfb_ws/MassageRobot_Dobot/Language/Speech_processor/config/aliyun_synthesize_config.yaml')
        args = parser.parse_args()
        return args

    args = parse_args()
    config = read_yaml(args.synthesizer_config_path)
    synthesizer = SpeechSynthesizer()
    player = AudioPlayer()
    # #天气和新闻
    # file = synthesizer.speech_synthesize("现在使用砭石手法为您。", output_file="xxx.mp3")
    # 默认路径
    file = synthesizer.speech_synthesize("现在使用砭石手法为您。")
    # print(file)
    player.play(file,True)
    # print(synthesizer.phoneme)