digitalhumantalk/thirdparty/aliyun/aliyuntts.py

# -*- coding: UTF-8 -*-
# Python 2.x引入httplib模块。
# import httplib
# Python 3.x引入http.client模块。
import requests
# Python 2.x引入urllib模块。
# import urllib
# Python 3.x引入urllib.parse模块。
import urllib.parse
import json
import os
from .aliyuntokenaccess import get_token
import asyncio
from io import BytesIO
from urllib.parse import urlencode, urlunparse, urljoin

# 支持的语音列表 https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis
SUPPORTED_VOICES = {
    '阿斌': 'abin',
    '知小白': 'zhixiaobai',
    '知小夏': 'zhixiaoxia',
    '知小妹': 'zhixiaomei',
    '知柜': 'zhigui',
    '知硕': 'zhishuo',
    '艾夏': 'aixia',
    'Cally': 'cally',
    '知锋_多情感': 'zhifeng_emo',
    '知冰_多情感': 'zhibing_emo',
    '知妙_多情感': 'zhimiao_emo',
    '知米_多情感': 'zhimi_emo',
    '知燕_多情感': 'zhiyan_emo',
    '知贝_多情感': 'zhibei_emo',
    '知甜_多情感': 'zhitian_emo',
    '小云': 'xiaoyun',
    '小刚': 'xiaogang',
    '若兮': 'ruoxi',
    '思琪': 'siqi',
    '思佳': 'sijia',
    '思诚': 'sicheng',
    '艾琪': 'aiqi',
    '艾佳': 'aijia',
    '艾诚': 'aicheng',
    '艾达': 'aida',
    '宁儿': 'ninger',
    '瑞琳': 'ruilin',
    '思悦': 'siyue',
    '艾雅': 'aiya',
    '艾美': 'aimei',
    '艾雨': 'aiyu',
    '艾悦': 'aiyue',
    '艾婧': 'aijing',
    '小美': 'xiaomei',
    '艾娜': 'aina',
    '伊娜': 'yina',
    '思婧': 'sijing',
    '思彤': 'sitong',
    '小北': 'xiaobei',
    '艾彤': 'aitong',
    '艾薇': 'aiwei',
    '艾宝': 'aibao',
    'Harry': 'harry',
    'Abby': 'abby',
    'Andy': 'andy',
    'Eric': 'eric',
    'Emily': 'emily',
    'Luna': 'luna',
    'Luca': 'luca',
    'Wendy': 'wendy',
    'William': 'william',
    'Olivia': 'olivia',
    '姗姗': 'shanshan',
    '小玥': 'chuangirl',
    'Lydia': 'lydia',
    '艾硕': 'aishuo',
    '青青': 'qingqing',
    '翠姐': 'cuijie',
    '小泽': 'xiaoze',
    '智香': 'tomoka',
    '智也': 'tomoya',
    'Annie': 'annie',
    '佳佳': 'jiajia',
    'Indah': 'indah',
    '桃子': 'taozi',
    '柜姐': 'guijie',
    'Stella': 'stella',
    'Stanley': 'stanley',
    'Kenny': 'kenny',
    'Rosa': 'rosa',
    'Farah': 'farah',
    '马树': 'mashu',
    '小仙': 'xiaoxian',
    '悦儿': 'yuer',
    '猫小美': 'maoxiaomei',
    '艾飞': 'aifei',
    '亚群': 'yaqun',
    '巧薇': 'qiaowei',
    '大虎': 'dahu',
    'ava': 'ava',
    '艾伦': 'ailun',
    '杰力豆': 'jielidou',
    '老铁': 'laotie',
    '老妹': 'laomei',
    '艾侃': 'aikan',
    'Talat': 'talat',
    'Tien': 'tien',
    'Becca': 'becca',
    'Kyong': 'kyong',
    'masha': 'masha',
    'camila': 'camila',
    'perla': 'perla',
    '知猫': 'zhimao',
    '知媛': 'zhiyuan',
    '知雅': 'zhiya',
    '知悦': 'zhiyue',
    '知达': 'zhida',
    '知莎': 'zhistella',
    'Kelly': 'kelly',
    'clara': 'clara',
    'hanna': 'hanna',
    'waan': 'waan',
    'betty': 'betty',
    'beth': 'beth',
    'cindy': 'cindy',
    'donna': 'donna',
    'eva': 'eva',
    'brian': 'brian'
}

import logging
# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def processGETRequest(host, appKey, token, text, audioSaveFile, format='wav', sampleRate=16000, voice="xiaoyun", volume=50, speech_rate=0, stream=True):
    # Set URL request parameters
    params = {
        'appkey': appKey,
        'token': token,
        'text': text,
        'format': format,
        'sample_rate': str(sampleRate),
        'voice': voice,
        'volume': str(volume),
        'speech_rate': str(speech_rate)
    }

    logging.debug(f"Parameters: {params}")
    # Manually construct the query string without URL encoding the text field
    query_string = '&'.join([f"{key}={value}" if key != 'text' else f"{key}={text}" for key, value in params.items()])
    base_url = f'https://{host}/stream/v1/tts'

    # Construct the full URL
    url = urlunparse(('https', host, '/stream/v1/tts', '', query_string, ''))

    logging.debug(f"Constructed URL: {url}")

    try:
        # Send GET request
        response = requests.get(url)
        logging.debug(f"Response status and reason: {response.status_code} - {response.reason}")

        contentType = response.headers['Content-Type']
        logging.debug(f"Content Type: {contentType}")

        body = response.content

        if 'audio/mpeg' == contentType:
            logging.info("Received audio data successfully.")
            if stream:
                logging.debug("Returning audio data as bytes.")
                return body
            else:
                #确保存储的文件路径存在
                os.makedirs(os.path.dirname(audioSaveFile), exist_ok=True)
                with open(audioSaveFile, mode='wb') as f:
                    f.write(body)
                logging.info(f"Audio saved to {audioSaveFile}")
                return base_url  # Return URL or the file path based on your need

        logging.error(f"The GET request failed: Content Type is not audio/mpeg. Response content: {response.text}")
        return None

    except requests.exceptions.RequestException as e:
        logging.exception("An error occurred during the HTTP request.")
        return None


async def aliyun_text_to_speech(text, voice, rate=0, volume=0):
    print(f"text : {text}")
    audioSaveFile = 'output/tmp/temp.wav'
    format = 'wav'
    sampleRate = 16000
    text_encoded = urllib.parse.quote_plus(text).replace("+", "%20").replace("*", "%2A").replace("%7E", "~")
    voice = SUPPORTED_VOICES[voice]
    # Transform rate and volume to match Aliyun API requirements
    transformed_rate = int(rate * 5)  # Assuming rate -100 to 100 maps to -500 to 500
    transformed_volume = int((volume + 100) / 2)  # Assuming volume -100 to 100 maps to 0 to 100

    url = processGETRequest(host, appKey, token, text_encoded, audioSaveFile, format, sampleRate, voice, transformed_volume, transformed_rate, stream=False)
    if url is None:
        logging.info("First attempt failed. Updating token and retrying...")
        update_global_token()
        url = processGETRequest(host, appKey, token, text_encoded, audioSaveFile, format, sampleRate, voice, transformed_volume, transformed_rate, stream=False)
    return url, audioSaveFile

async def aliyun_text_to_speech_stream(text, voice, rate=0, volume=0):
    print(f"text : {text}")
    format = 'wav'
    audioSaveFile = 'output/tmp/temp.wav'
    sampleRate = 16000
    text_encoded = urllib.parse.quote_plus(text).replace("+", "%20").replace("*", "%2A").replace("%7E", "~")
    voice = SUPPORTED_VOICES[voice]
    # Transform rate and volume to match Aliyun API requirements
    transformed_rate = int(rate * 5)  # Assuming rate -100 to 100 maps to -500 to 500
    transformed_volume = int((volume + 100) / 2)  # Assuming volume -100 to 100 maps to 0 to 100

    audio_data = processGETRequest(host, appKey, token, text_encoded, audioSaveFile , format, sampleRate, voice, transformed_volume, transformed_rate)
    if audio_data is None:
        logging.info("First attempt failed. Updating token and retrying...")
        update_global_token()
        audio_data = processGETRequest(host, appKey, token, text_encoded, audioSaveFile, format, sampleRate, voice, transformed_volume, transformed_rate)
    return BytesIO(audio_data)

# 全局配置
host=os.getenv('ALIYUN_TTS_HOST').strip()
TOKEN=os.getenv('ALIYUN_TOKEN')
if TOKEN is None or len(TOKEN) == 0:
    TOKEN = get_token()
    logging.info(f'Token:{TOKEN}')
token=TOKEN.strip()
appKey=os.getenv('ALIYUN_APP_KEY').strip()

def update_global_token():
    global token  # 声明使用全局变量
    new_token = get_token()
    if new_token:
        token = new_token
        logging.info(f"Global token updated to: {token}")

if __name__ == "__main__":
    text = '你好呀hello how are you'
    # update_global_token()
    # GET请求方式
    # processGETRequest(host, appKey, token, textUrlencode, audioSaveFile, format, sampleRate)
    asyncio.run(aliyun_text_to_speech(text, voice="知锋_多情感"))