digitalhumantalk/thirdparty/aliyun/aliyuntts.py
2024-12-10 17:05:37 +08:00

245 lines
8.5 KiB
Python

# -*- coding: UTF-8 -*-
# Python 2.x引入httplib模块。
# import httplib
# Python 3.x引入http.client模块。
import requests
# Python 2.x引入urllib模块。
# import urllib
# Python 3.x引入urllib.parse模块。
import urllib.parse
import json
import os
from .aliyuntokenaccess import get_token
import asyncio
from io import BytesIO
from urllib.parse import urlencode, urlunparse, urljoin
# 支持的语音列表 https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis
SUPPORTED_VOICES = {
'阿斌': 'abin',
'知小白': 'zhixiaobai',
'知小夏': 'zhixiaoxia',
'知小妹': 'zhixiaomei',
'知柜': 'zhigui',
'知硕': 'zhishuo',
'艾夏': 'aixia',
'Cally': 'cally',
'知锋_多情感': 'zhifeng_emo',
'知冰_多情感': 'zhibing_emo',
'知妙_多情感': 'zhimiao_emo',
'知米_多情感': 'zhimi_emo',
'知燕_多情感': 'zhiyan_emo',
'知贝_多情感': 'zhibei_emo',
'知甜_多情感': 'zhitian_emo',
'小云': 'xiaoyun',
'小刚': 'xiaogang',
'若兮': 'ruoxi',
'思琪': 'siqi',
'思佳': 'sijia',
'思诚': 'sicheng',
'艾琪': 'aiqi',
'艾佳': 'aijia',
'艾诚': 'aicheng',
'艾达': 'aida',
'宁儿': 'ninger',
'瑞琳': 'ruilin',
'思悦': 'siyue',
'艾雅': 'aiya',
'艾美': 'aimei',
'艾雨': 'aiyu',
'艾悦': 'aiyue',
'艾婧': 'aijing',
'小美': 'xiaomei',
'艾娜': 'aina',
'伊娜': 'yina',
'思婧': 'sijing',
'思彤': 'sitong',
'小北': 'xiaobei',
'艾彤': 'aitong',
'艾薇': 'aiwei',
'艾宝': 'aibao',
'Harry': 'harry',
'Abby': 'abby',
'Andy': 'andy',
'Eric': 'eric',
'Emily': 'emily',
'Luna': 'luna',
'Luca': 'luca',
'Wendy': 'wendy',
'William': 'william',
'Olivia': 'olivia',
'姗姗': 'shanshan',
'小玥': 'chuangirl',
'Lydia': 'lydia',
'艾硕': 'aishuo',
'青青': 'qingqing',
'翠姐': 'cuijie',
'小泽': 'xiaoze',
'智香': 'tomoka',
'智也': 'tomoya',
'Annie': 'annie',
'佳佳': 'jiajia',
'Indah': 'indah',
'桃子': 'taozi',
'柜姐': 'guijie',
'Stella': 'stella',
'Stanley': 'stanley',
'Kenny': 'kenny',
'Rosa': 'rosa',
'Farah': 'farah',
'马树': 'mashu',
'小仙': 'xiaoxian',
'悦儿': 'yuer',
'猫小美': 'maoxiaomei',
'艾飞': 'aifei',
'亚群': 'yaqun',
'巧薇': 'qiaowei',
'大虎': 'dahu',
'ava': 'ava',
'艾伦': 'ailun',
'杰力豆': 'jielidou',
'老铁': 'laotie',
'老妹': 'laomei',
'艾侃': 'aikan',
'Talat': 'talat',
'Tien': 'tien',
'Becca': 'becca',
'Kyong': 'kyong',
'masha': 'masha',
'camila': 'camila',
'perla': 'perla',
'知猫': 'zhimao',
'知媛': 'zhiyuan',
'知雅': 'zhiya',
'知悦': 'zhiyue',
'知达': 'zhida',
'知莎': 'zhistella',
'Kelly': 'kelly',
'clara': 'clara',
'hanna': 'hanna',
'waan': 'waan',
'betty': 'betty',
'beth': 'beth',
'cindy': 'cindy',
'donna': 'donna',
'eva': 'eva',
'brian': 'brian'
}
import logging
# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
def processGETRequest(host, appKey, token, text, audioSaveFile, format='wav', sampleRate=16000, voice="xiaoyun", volume=50, speech_rate=0, stream=True):
# Set URL request parameters
params = {
'appkey': appKey,
'token': token,
'text': text,
'format': format,
'sample_rate': str(sampleRate),
'voice': voice,
'volume': str(volume),
'speech_rate': str(speech_rate)
}
logging.debug(f"Parameters: {params}")
# Manually construct the query string without URL encoding the text field
query_string = '&'.join([f"{key}={value}" if key != 'text' else f"{key}={text}" for key, value in params.items()])
base_url = f'https://{host}/stream/v1/tts'
# Construct the full URL
url = urlunparse(('https', host, '/stream/v1/tts', '', query_string, ''))
logging.debug(f"Constructed URL: {url}")
try:
# Send GET request
response = requests.get(url)
logging.debug(f"Response status and reason: {response.status_code} - {response.reason}")
contentType = response.headers['Content-Type']
logging.debug(f"Content Type: {contentType}")
body = response.content
if 'audio/mpeg' == contentType:
logging.info("Received audio data successfully.")
if stream:
logging.debug("Returning audio data as bytes.")
return body
else:
#确保存储的文件路径存在
os.makedirs(os.path.dirname(audioSaveFile), exist_ok=True)
with open(audioSaveFile, mode='wb') as f:
f.write(body)
logging.info(f"Audio saved to {audioSaveFile}")
return base_url # Return URL or the file path based on your need
logging.error(f"The GET request failed: Content Type is not audio/mpeg. Response content: {response.text}")
return None
except requests.exceptions.RequestException as e:
logging.exception("An error occurred during the HTTP request.")
return None
async def aliyun_text_to_speech(text, voice, rate=0, volume=0):
print(f"text : {text}")
audioSaveFile = 'output/tmp/temp.wav'
format = 'wav'
sampleRate = 16000
text_encoded = urllib.parse.quote_plus(text).replace("+", "%20").replace("*", "%2A").replace("%7E", "~")
voice = SUPPORTED_VOICES[voice]
# Transform rate and volume to match Aliyun API requirements
transformed_rate = int(rate * 5) # Assuming rate -100 to 100 maps to -500 to 500
transformed_volume = int((volume + 100) / 2) # Assuming volume -100 to 100 maps to 0 to 100
url = processGETRequest(host, appKey, token, text_encoded, audioSaveFile, format, sampleRate, voice, transformed_volume, transformed_rate, stream=False)
if url is None:
logging.info("First attempt failed. Updating token and retrying...")
update_global_token()
url = processGETRequest(host, appKey, token, text_encoded, audioSaveFile, format, sampleRate, voice, transformed_volume, transformed_rate, stream=False)
return url, audioSaveFile
async def aliyun_text_to_speech_stream(text, voice, rate=0, volume=0):
print(f"text : {text}")
format = 'wav'
audioSaveFile = 'output/tmp/temp.wav'
sampleRate = 16000
text_encoded = urllib.parse.quote_plus(text).replace("+", "%20").replace("*", "%2A").replace("%7E", "~")
voice = SUPPORTED_VOICES[voice]
# Transform rate and volume to match Aliyun API requirements
transformed_rate = int(rate * 5) # Assuming rate -100 to 100 maps to -500 to 500
transformed_volume = int((volume + 100) / 2) # Assuming volume -100 to 100 maps to 0 to 100
audio_data = processGETRequest(host, appKey, token, text_encoded, audioSaveFile , format, sampleRate, voice, transformed_volume, transformed_rate)
if audio_data is None:
logging.info("First attempt failed. Updating token and retrying...")
update_global_token()
audio_data = processGETRequest(host, appKey, token, text_encoded, audioSaveFile, format, sampleRate, voice, transformed_volume, transformed_rate)
return BytesIO(audio_data)
# 全局配置
host=os.getenv('ALIYUN_TTS_HOST').strip()
TOKEN=os.getenv('ALIYUN_TOKEN')
if TOKEN is None or len(TOKEN) == 0:
TOKEN = get_token()
logging.info(f'Token:{TOKEN}')
token=TOKEN.strip()
appKey=os.getenv('ALIYUN_APP_KEY').strip()
def update_global_token():
global token # 声明使用全局变量
new_token = get_token()
if new_token:
token = new_token
logging.info(f"Global token updated to: {token}")
if __name__ == "__main__":
text = '你好呀hello how are you'
# update_global_token()
# GET请求方式
# processGETRequest(host, appKey, token, textUrlencode, audioSaveFile, format, sampleRate)
asyncio.run(aliyun_text_to_speech(text, voice="知锋_多情感"))