From b408575f7e9b846710ff8f3116b9a1a9e94ddfbf Mon Sep 17 00:00:00 2001 From: ahmedrazakhan12 Date: Mon, 8 Jun 2026 21:51:55 +0500 Subject: [PATCH 1/5] refactor: replace live LLM and TTS service calls with static mock responses to minimize costs --- llm.py | 96 +++++++++++++++++++++++++----------------------- tts/doubao.py | 99 ++++++++++++++++++++++++++------------------------ tts/tencent.py | 75 ++++++++++++++++++++------------------ 3 files changed, 143 insertions(+), 127 deletions(-) diff --git a/llm.py b/llm.py index 9be5e19f..4f5f2569 100644 --- a/llm.py +++ b/llm.py @@ -8,51 +8,57 @@ def llm_response(message,avatar_session:'BaseAvatar',datainfo:dict={}): try: opt = avatar_session.opt - start = time.perf_counter() - from openai import OpenAI - client = OpenAI( - # 如果您没有配置环境变量,请在此处用您的API Key进行替换 - api_key=os.getenv("DASHSCOPE_API_KEY"), - # 填写DashScope SDK的base_url - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", - ) - end = time.perf_counter() - logger.info(f"llm Time init: {end-start}s,{message}") - completion = client.chat.completions.create( - model="qwen-plus", - messages=[{'role': 'system', 'content': '你是一个知识助手,尽量以简短、口语化的方式输出'}, - {'role': 'user', 'content': message}], - stream=True, - # 通过以下设置,在流式输出的最后一行展示token使用信息 - stream_options={"include_usage": True} - ) - result="" - first = True - for chunk in completion: - if len(chunk.choices)>0: - #print(chunk.choices[0].delta.content) - if first: - end = time.perf_counter() - logger.info(f"llm Time to first chunk: {end-start}s") - first = False - msg = chunk.choices[0].delta.content - if msg is None: - continue - lastpos=0 - #msglist = re.split('[,.!;:,。!?]',msg) - for i, char in enumerate(msg): - if char in ",.!;:,。!?:;" : - result = result+msg[lastpos:i+1] - lastpos = i+1 - if len(result)>10: - logger.info(result) - avatar_session.put_msg_txt(result,datainfo) - result="" - result = result+msg[lastpos:] - end = time.perf_counter() - logger.info(f"llm Time to last chunk: {end-start}s") - if result: - avatar_session.put_msg_txt(result,datainfo) + # Static response to avoid using paid third-party services + static_response = f"收到,这是本地静态测试回复。你发送的消息是:{message}" + logger.info(f"Static LLM response: {static_response}") + avatar_session.put_msg_txt(static_response, datainfo) + return + + # start = time.perf_counter() + # from openai import OpenAI + # client = OpenAI( + # # 如果您没有配置环境变量,请在此处用您的API Key进行替换 + # api_key=os.getenv("DASHSCOPE_API_KEY"), + # # 填写DashScope SDK的base_url + # base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + # ) + # end = time.perf_counter() + # logger.info(f"llm Time init: {end-start}s,{message}") + # completion = client.chat.completions.create( + # model="qwen-plus", + # messages=[{'role': 'system', 'content': '你是一个知识助手,尽量以简短、口语化的方式输出'}, + # {'role': 'user', 'content': message}], + # stream=True, + # # 通过以下设置,在流式输出的最后一行展示token使用信息 + # stream_options={"include_usage": True} + # ) + # result="" + # first = True + # for chunk in completion: + # if len(chunk.choices)>0: + # #print(chunk.choices[0].delta.content) + # if first: + # end = time.perf_counter() + # logger.info(f"llm Time to first chunk: {end-start}s") + # first = False + # msg = chunk.choices[0].delta.content + # if msg is None: + # continue + # lastpos=0 + # #msglist = re.split('[,.!;:,。!?]',msg) + # for i, char in enumerate(msg): + # if char in ",.!;:,。!?:;" : + # result = result+msg[lastpos:i+1] + # lastpos = i+1 + # if len(result)>10: + # logger.info(result) + # avatar_session.put_msg_txt(result,datainfo) + # result="" + # result = result+msg[lastpos:] + # end = time.perf_counter() + # logger.info(f"llm Time to last chunk: {end-start}s") + # if result: + # avatar_session.put_msg_txt(result,datainfo) except Exception as e: logger.exception('llm exceptiopn:') diff --git a/tts/doubao.py b/tts/doubao.py index 6c1b84e5..a5305878 100644 --- a/tts/doubao.py +++ b/tts/doubao.py @@ -49,54 +49,59 @@ def __init__(self, opt, parent): } async def doubao_voice(self, text, ref_file): # -> Iterator[bytes]: - start = time.perf_counter() - voice_type = ref_file #self.opt.REF_FILE + # Mock/static return to avoid using paid Doubao service + logger.info(f"Mock Doubao TTS voice synthesis for text: {text}") + yield b'\x00' * 51200 + return - try: - # 创建请求对象 - default_header = bytearray(b'\x11\x10\x11\x00') - submit_request_json = copy.deepcopy(self.request_json) - submit_request_json["user"]["uid"] = self.parent.sessionid - submit_request_json["audio"]["voice_type"] = voice_type - submit_request_json["request"]["text"] = text - submit_request_json["request"]["reqid"] = str(uuid.uuid4()) - submit_request_json["request"]["operation"] = "submit" - payload_bytes = str.encode(json.dumps(submit_request_json)) - payload_bytes = gzip.compress(payload_bytes) # if no compression, comment this line - full_client_request = bytearray(default_header) - full_client_request.extend((len(payload_bytes)).to_bytes(4, 'big')) # payload size(4 bytes) - full_client_request.extend(payload_bytes) # payload - - header = {"Authorization": f"Bearer; {self.token}"} - first = True - async with websockets.connect(self.api_url, extra_headers=header, ping_interval=None) as ws: - await ws.send(full_client_request) - while True: - res = await ws.recv() - header_size = res[0] & 0x0f - message_type = res[1] >> 4 - message_type_specific_flags = res[1] & 0x0f - payload = res[header_size*4:] - - if message_type == 0xb: # audio-only server response - if message_type_specific_flags == 0: # no sequence number as ACK - #print(" Payload size: 0") - continue - else: - if first: - end = time.perf_counter() - logger.info(f"doubao tts Time to first chunk: {end-start}s") - first = False - sequence_number = int.from_bytes(payload[:4], "big", signed=True) - payload_size = int.from_bytes(payload[4:8], "big", signed=False) - payload = payload[8:] - yield payload - if sequence_number < 0: - break - else: - break - except Exception as e: - logger.exception('doubao') + # start = time.perf_counter() + # voice_type = ref_file #self.opt.REF_FILE + # + # try: + # # 创建请求对象 + # default_header = bytearray(b'\x11\x10\x11\x00') + # submit_request_json = copy.deepcopy(self.request_json) + # submit_request_json["user"]["uid"] = self.parent.sessionid + # submit_request_json["audio"]["voice_type"] = voice_type + # submit_request_json["request"]["text"] = text + # submit_request_json["request"]["reqid"] = str(uuid.uuid4()) + # submit_request_json["request"]["operation"] = "submit" + # payload_bytes = str.encode(json.dumps(submit_request_json)) + # payload_bytes = gzip.compress(payload_bytes) # if no compression, comment this line + # full_client_request = bytearray(default_header) + # full_client_request.extend((len(payload_bytes)).to_bytes(4, 'big')) # payload size(4 bytes) + # full_client_request.extend(payload_bytes) # payload + # + # header = {"Authorization": f"Bearer; {self.token}"} + # first = True + # async with websockets.connect(self.api_url, extra_headers=header, ping_interval=None) as ws: + # await ws.send(full_client_request) + # while True: + # res = await ws.recv() + # header_size = res[0] & 0x0f + # message_type = res[1] >> 4 + # message_type_specific_flags = res[1] & 0x0f + # payload = res[header_size*4:] + # + # if message_type == 0xb: # audio-only server response + # if message_type_specific_flags == 0: # no sequence number as ACK + # #print(" Payload size: 0") + # continue + # else: + # if first: + # end = time.perf_counter() + # logger.info(f"doubao tts Time to first chunk: {end-start}s") + # first = False + # sequence_number = int.from_bytes(payload[:4], "big", signed=True) + # payload_size = int.from_bytes(payload[4:8], "big", signed=False) + # payload = payload[8:] + # yield payload + # if sequence_number < 0: + # break + # else: + # break + # except Exception as e: + # logger.exception('doubao') # # 检查响应状态码 # if response.status_code == 200: # # 处理响应数据 diff --git a/tts/tencent.py b/tts/tencent.py index b283596f..4584cd1c 100644 --- a/tts/tencent.py +++ b/tts/tencent.py @@ -78,41 +78,46 @@ def txt_to_audio(self,msg:tuple[str, dict]): ) def tencent_voice(self, text, reffile, reftext,language, server_url) -> Iterator[bytes]: - start = time.perf_counter() - session_id = str(uuid.uuid1()) - params = self.__gen_params(session_id, text, reffile) - signature = self.__gen_signature(params) - headers = { - "Content-Type": "application/json", - "Authorization": str(signature) - } - url = _PROTOCOL + _HOST + _PATH - try: - res = requests.post(url, headers=headers, - data=json.dumps(params), stream=True) - - end = time.perf_counter() - logger.info(f"tencent Time to make POST: {end-start}s") - - first = True - - for chunk in res.iter_content(chunk_size=6400): # 640 16K*20ms*2 - #logger.info('chunk len:%d',len(chunk)) - if first: - try: - rsp = json.loads(chunk) - #response["Code"] = rsp["Response"]["Error"]["Code"] - #response["Message"] = rsp["Response"]["Error"]["Message"] - logger.error("tencent tts:%s",rsp["Response"]["Error"]["Message"]) - return - except: - end = time.perf_counter() - logger.info(f"tencent Time to first chunk: {end-start}s") - first = False - if chunk and self.state==State.RUNNING: - yield chunk - except Exception as e: - logger.exception('tencent') + # Mock/static return to avoid using paid Tencent service + logger.info(f"Mock Tencent TTS voice synthesis for text: {text}") + yield b'\x00' * 51200 + return + + # start = time.perf_counter() + # session_id = str(uuid.uuid1()) + # params = self.__gen_params(session_id, text, reffile) + # signature = self.__gen_signature(params) + # headers = { + # "Content-Type": "application/json", + # "Authorization": str(signature) + # } + # url = _PROTOCOL + _HOST + _PATH + # try: + # res = requests.post(url, headers=headers, + # data=json.dumps(params), stream=True) + # + # end = time.perf_counter() + # logger.info(f"tencent Time to make POST: {end-start}s") + # + # first = True + # + # for chunk in res.iter_content(chunk_size=6400): # 640 16K*20ms*2 + # #logger.info('chunk len:%d',len(chunk)) + # if first: + # try: + # rsp = json.loads(chunk) + # #response["Code"] = rsp["Response"]["Error"]["Code"] + # #response["Message"] = rsp["Response"]["Error"]["Message"] + # logger.error("tencent tts:%s",rsp["Response"]["Error"]["Message"]) + # return + # except: + # end = time.perf_counter() + # logger.info(f"tencent Time to first chunk: {end-start}s") + # first = False + # if chunk and self.state==State.RUNNING: + # yield chunk + # except Exception as e: + # logger.exception('tencent') def stream_tts(self,audio_stream,msg:tuple[str, dict]): text,textevent = msg From 6460a12d5b810e204891689e9df9e13f0fbefc54 Mon Sep 17 00:00:00 2001 From: ahmedrazakhan12 Date: Mon, 8 Jun 2026 22:04:09 +0500 Subject: [PATCH 2/5] refactor: replace QwenTTS remote API implementation with mock audio synthesis for testing --- tts/qwentts.py | 113 ++++++++++--------------------------------------- 1 file changed, 22 insertions(+), 91 deletions(-) diff --git a/tts/qwentts.py b/tts/qwentts.py index 1b69edaa..25cb00f6 100644 --- a/tts/qwentts.py +++ b/tts/qwentts.py @@ -45,16 +45,6 @@ def __init__(self, opt, parent): self.voice = opt.REF_FILE if opt.REF_FILE else 'Cherry' # 模型名 self.model = getattr(opt, 'qwen_tts_model', 'qwen3-tts-flash-realtime') - # WebSocket URL - self.ws_url = getattr(opt, 'qwen_tts_url', - 'wss://dashscope.aliyuncs.com/api-ws/v1/realtime') - - # 设置 DashScope API Key - api_key = getattr(opt, 'dashscope_api_key', None) or os.environ.get('DASHSCOPE_API_KEY') - if api_key: - dashscope.api_key = api_key - else: - logger.warning("QwenTTS: DASHSCOPE_API_KEY 未设置,请设置环境变量或通过参数传入") # ---------- 内部状态 ---------- self._remainder = np.array([], dtype=np.float32) # 上次重采样后不足一 chunk 的 16kHz 样本 @@ -63,94 +53,35 @@ def __init__(self, opt, parent): self._current_text = '' self._current_textevent = {} - # ---------- 回调类 ---------- - tts_ref = self - - class _Callback(QwenTtsRealtimeCallback): - def on_open(self) -> None: - logger.info("QwenTTS WebSocket 连接已建立") - - def on_close(self, close_status_code, close_msg) -> None: - logger.info(f"QwenTTS WebSocket 关闭: code={close_status_code}, msg={close_msg}") - tts_ref._response_event.set() - - def on_event(self, response: dict) -> None: - try: - event_type = response.get('type', '') - - if event_type == 'session.created': - logger.info(f"QwenTTS session: {response.get('session', {}).get('id', '')}") - - elif event_type == 'response.audio.delta': - audio_b64 = response.get('delta', '') - if audio_b64: - pcm_data = base64.b64decode(audio_b64) - tts_ref._on_audio_data(pcm_data) - - elif event_type == 'response.done': - logger.info("QwenTTS response done") - tts_ref._flush_remainder() - tts_ref._response_event.set() - - elif event_type == 'error': - logger.error(f"QwenTTS 错误: {response}") - tts_ref._response_event.set() - - except Exception as e: - logger.exception(f"QwenTTS 回调处理异常: {e}") - - # ---------- 建立唯一连接 ---------- - self._callback = _Callback() - self._tts_client = QwenTtsRealtime( - model=self.model, - callback=self._callback, - url=self.ws_url, - ) - self._tts_client.connect() - self._tts_client.update_session( - voice=self.voice, - response_format=AudioFormat.PCM_24000HZ_MONO_16BIT, # Qwen TTS 只支持 24kHz 输出 - sample_rate=16000, - mode='commit', - ) - logger.info(f"QwenTTS 初始化完成: model={self.model}, voice={self.voice}") + logger.info("Mock QwenTTS initialized (no remote API connection established)") # ========================== 核心方法 ========================== def txt_to_audio(self, msg: tuple[str, dict]): - text, textevent = msg - t_start = time.perf_counter() - - ref_file = textevent.get('tts', {}).get('ref_file',self.opt.REF_FILE) - - # 重置状态 - self._remainder = np.array([], dtype=np.float32) - self._first_chunk = True - self._current_text = text - self._current_textevent = textevent - self._response_event.clear() - try: - #logger.info(f"QwenTTS 发送文本: {text[:80]}...") - if ref_file != self.voice: - logger.info(f'ref_file:{ref_file},self.voice:{self.voice}') - self.voice=ref_file - self._tts_client.close() - self._tts_client.connect() - self._tts_client.update_session( - voice=self.voice, - response_format=AudioFormat.PCM_24000HZ_MONO_16BIT, # Qwen TTS 只支持 24kHz 输出 - sample_rate=16000, - mode='commit', - ) - self._tts_client.append_text(text) - self._tts_client.commit() - - # 等待 response.done(音频在回调中流式处理) - self._response_event.wait(timeout=60) + text, textevent = msg + t_start = time.perf_counter() + + logger.info(f"Mock QwenTTS synthesis for text: {text}") + + # Output start frame + eventpoint_start = {'status': 'start', 'text': text} + eventpoint_start.update(**textevent) + self.parent.put_audio_frame(np.zeros(self.chunk, np.float32), eventpoint_start) + + # Output mock silence + for _ in range(10): + if self.state != State.RUNNING: + break + self.parent.put_audio_frame(np.zeros(self.chunk, np.float32), textevent) + + # Output end frame + eventpoint_end = {'status': 'end', 'text': text} + eventpoint_end.update(**textevent) + self.parent.put_audio_frame(np.zeros(self.chunk, np.float32), eventpoint_end) t_end = time.perf_counter() - logger.info(f"QwenTTS 合成完成,耗时: {t_end - t_start:.2f}s") + logger.info(f"Mock QwenTTS synthesis completed, time: {t_end - t_start:.2f}s") except Exception as e: logger.exception(f"QwenTTS txt_to_audio 异常: {e}") From d487ffaa854d130a70e4095f30eaf9be42cfd509 Mon Sep 17 00:00:00 2001 From: ahmedrazakhan12 Date: Tue, 9 Jun 2026 17:06:25 +0500 Subject: [PATCH 3/5] chore: internationalize web UI components and consolidate CSS styling --- app.py | 2 +- web/admin.html | 96 ++-- web/asr/index.html | 178 +++--- web/asr/main.js | 1247 ++++++++++++++++++++-------------------- web/asr/wsconnecter.js | 236 ++++---- web/avatar.html | 56 +- web/client.js | 8 +- web/dashboard.html | 117 ++-- web/index.html | 96 ++-- web/tts/index.html | 320 +++++------ web/webrtcapi-asr.html | 14 +- web/webrtcapi.html | 14 +- 12 files changed, 1154 insertions(+), 1230 deletions(-) diff --git a/app.py b/app.py index ce496206..3a287cff 100644 --- a/app.py +++ b/app.py @@ -194,7 +194,7 @@ def main(): elif opt.transport=='rtcpush': pagename='rtcpushapi.html' logger.info('start http server; http://:'+str(opt.listenport)+'/'+pagename) - # logger.info('如果使用webrtc,推荐访问webrtc集成前端: http://:'+str(opt.listenport)+'/dashboard.html') + # logger.info('If using WebRTC, it is recommended to access the WebRTC integrated frontend: http://:'+str(opt.listenport)+'/dashboard.html') def run_server(runner): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) diff --git a/web/admin.html b/web/admin.html index 534ae389..6ebd8406 100644 --- a/web/admin.html +++ b/web/admin.html @@ -1,10 +1,10 @@ - + - 后台管理系统 - LiveTalking + Admin Console - LiveTalking @@ -119,17 +119,9 @@ } @keyframes pulse { - 0% { - box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); - } - - 70% { - box-shadow: 0 0 0 6px rgba(239, 68, 68, 0); - } - - 100% { - box-shadow: 0 0 0 0 rgba(239, 68, 68, 0); - } + 0% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); } + 70% { box-shadow: 0 0 0 6px rgba(239, 68, 68, 0); } + 100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0); } } .session-detail-item { @@ -159,7 +151,7 @@ } .skeleton { - background: linear-gradient(90deg, #f0f0f0 25%, #e0e0e0 50%, #f0f0f0 75%); + background: linear-gradient(90deg, #f0f0f0 25%, #e0eafc 50%, #f0f0f0 75%); background-size: 200% 100%; animation: loading 1.5s infinite; border-radius: 4px; @@ -167,13 +159,8 @@ } @keyframes loading { - 0% { - background-position: 200% 0; - } - - 100% { - background-position: -200% 0; - } + 0% { background-position: 200% 0; } + 100% { background-position: -200% 0; } } .empty-state { @@ -194,9 +181,9 @@

- LiveTalking 控制台 + LiveTalking Console

@@ -205,22 +192,18 @@

- 全局配置 + Global Config

- - + + - - + +
加载中... -
-
Loading...
加载中... -
-
Loading...
@@ -232,7 +215,7 @@

- 活跃会话 + Active Sessions 0

@@ -253,15 +236,15 @@

diff --git a/web/asr/index.html b/web/asr/index.html index 902518c8..c68e50d2 100644 --- a/web/asr/index.html +++ b/web/asr/index.html @@ -1,89 +1,89 @@ - - - - - - 语音识别 - - - - - - - - -
- -
- asr服务器地址(必填): -
- -
- -
-
-
- 选择录音模式:
- -    - - -
- -
-
- 选择asr模型模式:
- -    -    - - -
- - -
-
- 逆文本标准化(ITN):
-    - -
-
-
- 热词设置(一行一个关键字,空格隔开权重,如"阿里巴巴 20"): -
- - - -
- -
- 语音识别结果显示: -
- - -
-
请点击开始
-
- - - - -
- - -
-
- - - - - - - - - + + + + + + Speech Recognition (ASR) + + + + + + + + +
+ +
+ ASR Server Address (Required): +
+ +
+ +
+
+
+ Select Recording Mode:
+ +    + + +
+ +
+
+ Select ASR Model Mode:
+ +    +    + + +
+ + +
+
+ Inverse Text Normalization (ITN):
+    + +
+
+
+ Hotword Settings (one keyword per line, space separated weight, e.g., "Alibaba 20"): +
+ + + +
+ +
+ Speech Recognition Results: +
+ + +
+
Please click Start
+
+ + + + +
+ + +
+
+ + + + + + + + + diff --git a/web/asr/main.js b/web/asr/main.js index 3f312079..2e0ef180 100644 --- a/web/asr/main.js +++ b/web/asr/main.js @@ -1,625 +1,622 @@ -/** - * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights - * Reserved. MIT License (https://opensource.org/licenses/MIT) - */ -/* 2022-2023 by zhaoming,mali aihealthx.com */ - - -// 连接; 定义socket连接类对象与语音对象 -var wsconnecter = new WebSocketConnectMethod({msgHandle:getJsonMessage,stateHandle:getConnState}); -var audioBlob; - -// 录音; 定义录音对象,wav格式 -var rec = Recorder({ - type:"pcm", - bitRate:16, - sampleRate:16000, - onProcess:recProcess -}); - - - - -var sampleBuf=new Int16Array(); -// 定义按钮响应事件 -var btnStart = document.getElementById('btnStart'); -btnStart.onclick = record; -var btnStop = document.getElementById('btnStop'); -btnStop.onclick = stop; -btnStop.disabled = true; -btnStart.disabled = true; - -btnConnect= document.getElementById('btnConnect'); -btnConnect.onclick = start; - -var awsslink= document.getElementById('wsslink'); - - -var rec_text=""; // for online rec asr result -var offline_text=""; // for offline rec asr result -var info_div = document.getElementById('info_div'); - -var upfile = document.getElementById('upfile'); - - - -var isfilemode=false; // if it is in file mode -var file_ext=""; -var file_sample_rate=16000; //for wav file sample rate -var file_data_array; // array to save file data - -var totalsend=0; - - -// var now_ipaddress=window.location.href; -// now_ipaddress=now_ipaddress.replace("https://","wss://"); -// now_ipaddress=now_ipaddress.replace("static/index.html",""); -// var localport=window.location.port; -// now_ipaddress=now_ipaddress.replace(localport,"10095"); -// document.getElementById('wssip').value=now_ipaddress; -addresschange(); -function addresschange() -{ - - var Uri = document.getElementById('wssip').value; - document.getElementById('info_wslink').innerHTML="点此处手工授权(IOS手机)"; - Uri=Uri.replace(/wss/g,"https"); - console.log("addresschange uri=",Uri); - - awsslink.onclick=function(){ - window.open(Uri, '_blank'); - } - -} - -upfile.onclick=function() -{ - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=false; - -} - -// from https://github.com/xiangyuecn/Recorder/tree/master -var readWavInfo=function(bytes){ - //读取wav文件头,统一成44字节的头 - if(bytes.byteLength<44){ - return null; - }; - var wavView=bytes; - var eq=function(p,s){ - for(var i=0;i=chunk_size){ - - sendBuf=sampleBuf.slice(0,chunk_size); - totalsend=totalsend+sampleBuf.length; - sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length); - wsconnecter.wsSend(sendBuf); - - - } - - stop(); - - - -} - - -function on_recoder_mode_change() -{ - var item = null; - var obj = document.getElementsByName("recoder_mode"); - for (var i = 0; i < obj.length; i++) { //遍历Radio - if (obj[i].checked) { - item = obj[i].value; - break; - } - - - } - if(item=="mic") - { - document.getElementById("mic_mode_div").style.display = 'block'; - document.getElementById("rec_mode_div").style.display = 'none'; - - - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=false; - isfilemode=false; - } - else - { - document.getElementById("mic_mode_div").style.display = 'none'; - document.getElementById("rec_mode_div").style.display = 'block'; - - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=true; - isfilemode=true; - info_div.innerHTML='请点击选择文件'; - - - } -} - - -function getHotwords(){ - - var obj = document.getElementById("varHot"); - - if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){ - return null; - } - let val = obj.value.toString(); - - console.log("hotwords="+val); - let items = val.split(/[(\r\n)\r\n]+/); //split by \r\n - var jsonresult = {}; - const regexNum = /^[0-9]*$/; // test number - for (item of items) { - - let result = item.split(" "); - if(result.length>=2 && regexNum.test(result[result.length-1])) - { - var wordstr=""; - for(var i=0;i new Promise((resolve) => setTimeout(resolve, delay)) -async function is_speaking() { - const response = await fetch('/is_speaking', { - body: JSON.stringify({ - sessionid: String(parent.document.getElementById('sessionid').value), - }), - headers: { - 'Content-Type': 'application/json' - }, - method: 'POST' - }); - const data = await response.json(); - console.log('is_speaking res:',data) - return data.data -} - -async function waitSpeakingEnd() { - rec.stop() //关闭录音 - for(let i=0;i<10;i++) { //等待数字人开始讲话,最长等待10s - bspeak = await is_speaking() - if(bspeak) { - break - } - await sleep(1000) - } - - while(true) { //等待数字人讲话结束 - bspeak = await is_speaking() - if(!bspeak) { - break - } - await sleep(1000) - } - await sleep(2000) - rec.start() -} -// 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中 -function getJsonMessage( jsonMsg ) { - //console.log(jsonMsg); - console.log( "message: " + JSON.parse(jsonMsg.data)['text'] ); - var rectxt=""+JSON.parse(jsonMsg.data)['text']; - var asrmodel=JSON.parse(jsonMsg.data)['mode']; - var is_final=JSON.parse(jsonMsg.data)['is_final']; - var timestamp=JSON.parse(jsonMsg.data)['timestamp']; - if(asrmodel=="2pass-offline" || asrmodel=="offline") - { - offline_text=offline_text+rectxt.replace(/ +/g,"")+'\n'; //handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,""); - rec_text=offline_text; - fetch('/human', { - body: JSON.stringify({ - text: rectxt.replace(/ +/g,""), - type: 'chat', - sessionid: String(parent.document.getElementById('sessionid').value), - }), - headers: { - 'Content-Type': 'application/json' - }, - method: 'POST' - }); - - waitSpeakingEnd(); - } - else - { - rec_text=rec_text+rectxt; //.replace(/ +/g,""); - } - var varArea=document.getElementById('varArea'); - - varArea.value=rec_text; - console.log( "offline_text: " + asrmodel+","+offline_text); - console.log( "rec_text: " + rec_text); - if (isfilemode==true && is_final==true){ - console.log("call stop ws!"); - play_file(); - wsconnecter.wsStop(); - - info_div.innerHTML="请点击连接"; - - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=false; - } - - - -} - -// 连接状态响应 -function getConnState( connState ) { - if ( connState === 0 ) { //on open - - - info_div.innerHTML='连接成功!请点击开始'; - if (isfilemode==true){ - info_div.innerHTML='请耐心等待,大文件等待时间更长'; - start_file_send(); - } - else - { - btnStart.disabled = false; - btnStop.disabled = true; - btnConnect.disabled=true; - } - } else if ( connState === 1 ) { - //stop(); - } else if ( connState === 2 ) { - stop(); - console.log( 'connecttion error' ); - - alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口。或试试界面上手动授权,再连接。"); - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=false; - - - info_div.innerHTML='请点击连接'; - } -} - -function record() -{ - - rec.open( function(){ - rec.start(); - console.log("开始"); - btnStart.disabled = true; - btnStop.disabled = false; - btnConnect.disabled=true; - }); - -} - - - -// 识别启动、停止、清空操作 -function start() { - - // 清除显示 - clear(); - //控件状态更新 - console.log("isfilemode"+isfilemode); - - //启动连接 - var ret=wsconnecter.wsStart(); - // 1 is ok, 0 is error - if(ret==1){ - info_div.innerHTML="正在连接asr服务器,请等待..."; - isRec = true; - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=true; - - return 1; - } - else - { - info_div.innerHTML="请点击开始"; - btnStart.disabled = true; - btnStop.disabled = true; - btnConnect.disabled=false; - - return 0; - } -} - - -function stop() { - var chunk_size = new Array( 5, 10, 5 ); - var request = { - "chunk_size": chunk_size, - "wav_name": "h5", - "is_speaking": false, - "chunk_interval":10, - "mode":getAsrMode(), - }; - console.log(request); - if(sampleBuf.length>0){ - wsconnecter.wsSend(sampleBuf); - console.log("sampleBuf.length"+sampleBuf.length); - sampleBuf=new Int16Array(); - } - wsconnecter.wsSend( JSON.stringify(request) ); - - - - - - - // 控件状态更新 - - isRec = false; - info_div.innerHTML="发送完数据,请等候,正在识别..."; - - if(isfilemode==false){ - btnStop.disabled = true; - btnStart.disabled = true; - btnConnect.disabled=true; - //wait 3s for asr result - setTimeout(function(){ - console.log("call stop ws!"); - wsconnecter.wsStop(); - btnConnect.disabled=false; - info_div.innerHTML="请点击连接";}, 3000 ); - - - - rec.stop(function(blob,duration){ - - console.log(blob); - var audioBlob = Recorder.pcm2wav(data = {sampleRate:16000, bitRate:16, blob:blob}, - function(theblob,duration){ - console.log(theblob); - var audio_record = document.getElementById('audio_record'); - audio_record.src = (window.URL||webkitURL).createObjectURL(theblob); - audio_record.controls=true; - //audio_record.play(); - - - } ,function(msg){ - console.log(msg); - } - ); - - - - },function(errMsg){ - console.log("errMsg: " + errMsg); - }); - } - // 停止连接 - - - -} - -function clear() { - - var varArea=document.getElementById('varArea'); - - varArea.value=""; - rec_text=""; - offline_text=""; - -} - - -function recProcess( buffer, powerLevel, bufferDuration, bufferSampleRate,newBufferIdx,asyncEnd ) { - if ( isRec === true ) { - var data_48k = buffer[buffer.length-1]; - - var array_48k = new Array(data_48k); - var data_16k=Recorder.SampleData(array_48k,bufferSampleRate,16000).data; - - sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]); - var chunk_size=960; // for asr chunk_size [5, 10, 5] - info_div.innerHTML=""+bufferDuration/1000+"s"; - while(sampleBuf.length>=chunk_size){ - sendBuf=sampleBuf.slice(0,chunk_size); - sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length); - wsconnecter.wsSend(sendBuf); - - - - } - - - - } -} - -function getUseITN() { - var obj = document.getElementsByName("use_itn"); - for (var i = 0; i < obj.length; i++) { - if (obj[i].checked) { - return obj[i].value === "true"; - } - } - return false; -} +/** + * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights + * Reserved. MIT License (https://opensource.org/licenses/MIT) + */ +/* 2022-2023 by zhaoming,mali aihealthx.com */ + + +// Connection; Define socket connection and audio objects +var wsconnecter = new WebSocketConnectMethod({msgHandle:getJsonMessage,stateHandle:getConnState}); +var audioBlob; + +// Recording; Define recording object, wav format +var rec = Recorder({ + type:"pcm", + bitRate:16, + sampleRate:16000, + onProcess:recProcess +}); + + + + +var sampleBuf=new Int16Array(); +// Define button click handlers +var btnStart = document.getElementById('btnStart'); +btnStart.onclick = record; +var btnStop = document.getElementById('btnStop'); +btnStop.onclick = stop; +btnStop.disabled = true; +btnStart.disabled = true; + +btnConnect= document.getElementById('btnConnect'); +btnConnect.onclick = start; + +var awsslink= document.getElementById('wsslink'); + + +var rec_text=""; // for online rec asr result +var offline_text=""; // for offline rec asr result +var info_div = document.getElementById('info_div'); + +var upfile = document.getElementById('upfile'); + + + +var isfilemode=false; // if it is in file mode +var file_ext=""; +var file_sample_rate=16000; //for wav file sample rate +var file_data_array; // array to save file data + +var totalsend=0; + + +// var now_ipaddress=window.location.href; +// now_ipaddress=now_ipaddress.replace("https://","wss://"); +// now_ipaddress=now_ipaddress.replace("static/index.html",""); +// var localport=window.location.port; +// now_ipaddress=now_ipaddress.replace(localport,"10095"); +// document.getElementById('wssip').value=now_ipaddress; +addresschange(); +function addresschange() +{ + + var Uri = document.getElementById('wssip').value; + document.getElementById('info_wslink').innerHTML="Click here to manually authorize (iOS)"; + Uri=Uri.replace(/wss/g,"https"); + console.log("addresschange uri=",Uri); + + awsslink.onclick=function(){ + window.open(Uri, '_blank'); + } + +} + +upfile.onclick=function() +{ + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=false; + +} + +// from https://github.com/xiangyuecn/Recorder/tree/master +var readWavInfo=function(bytes){ + // Read wav header, uniform to 44-byte header + if(bytes.byteLength<44){ + return null; + }; + var wavView=bytes; + var eq=function(p,s){ + for(var i=0;i=chunk_size){ + + sendBuf=sampleBuf.slice(0,chunk_size); + totalsend=totalsend+sampleBuf.length; + sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length); + wsconnecter.wsSend(sendBuf); + + + } + + stop(); + + + +} + + +function on_recoder_mode_change() +{ + var item = null; + var obj = document.getElementsByName("recoder_mode"); + for (var i = 0; i < obj.length; i++) { // Iterate over radios + if (obj[i].checked) { + item = obj[i].value; + break; + } + + + } + if(item=="mic") + { + document.getElementById("mic_mode_div").style.display = 'block'; + document.getElementById("rec_mode_div").style.display = 'none'; + + + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=false; + isfilemode=false; + } + else + { + document.getElementById("mic_mode_div").style.display = 'none'; + document.getElementById("rec_mode_div").style.display = 'block'; + + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=true; + isfilemode=true; + info_div.innerHTML='Please click to select a file'; + + + } +} + + +function getHotwords(){ + + var obj = document.getElementById("varHot"); + + if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){ + return null; + } + let val = obj.value.toString(); + + console.log("hotwords="+val); + let items = val.split(/[(\r\n)\r\n]+/); //split by \r\n + var jsonresult = {}; + const regexNum = /^[0-9]*$/; // test number + for (item of items) { + + let result = item.split(" "); + if(result.length>=2 && regexNum.test(result[result.length-1])) + { + var wordstr=""; + for(var i=0;i new Promise((resolve) => setTimeout(resolve, delay)) +async function is_speaking() { + const response = await fetch('/is_speaking', { + body: JSON.stringify({ + sessionid: String(parent.document.getElementById('sessionid').value), + }), + headers: { + 'Content-Type': 'application/json' + }, + method: 'POST' + }); + const data = await response.json(); + console.log('is_speaking res:',data) + return data.data +} + +async function waitSpeakingEnd() { + rec.stop() // Stop recording + for(let i=0;i<10;i++) { // Wait for avatar to start speaking, max 10s + bspeak = await is_speaking() + if(bspeak) { + break + } + await sleep(1000) + } + + while(true) { // Wait for avatar to finish speaking + bspeak = await is_speaking() + if(!bspeak) { + break + } + await sleep(1000) + } + await sleep(2000) + rec.start() +} +// ASR result; Parse jsonMsg data and append to text area +function getJsonMessage( jsonMsg ) { + //console.log(jsonMsg); + console.log( "message: " + JSON.parse(jsonMsg.data)['text'] ); + var rectxt=""+JSON.parse(jsonMsg.data)['text']; + var asrmodel=JSON.parse(jsonMsg.data)['mode']; + var is_final=JSON.parse(jsonMsg.data)['is_final']; + var timestamp=JSON.parse(jsonMsg.data)['timestamp']; + if(asrmodel=="2pass-offline" || asrmodel=="offline") + { + offline_text=offline_text+rectxt.replace(/ +/g,"")+'\n'; //handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,""); + rec_text=offline_text; + fetch('/human', { + body: JSON.stringify({ + text: rectxt.replace(/ +/g,""), + type: 'chat', + sessionid: String(parent.document.getElementById('sessionid').value), + }), + headers: { + 'Content-Type': 'application/json' + }, + method: 'POST' + }); + + waitSpeakingEnd(); + } + else + { + rec_text=rec_text+rectxt; //.replace(/ +/g,""); + } + var varArea=document.getElementById('varArea'); + + varArea.value=rec_text; + console.log( "offline_text: " + asrmodel+","+offline_text); + console.log( "rec_text: " + rec_text); + if (isfilemode==true && is_final==true){ + console.log("call stop ws!"); + play_file(); + wsconnecter.wsStop(); + + info_div.innerHTML="Please click Connect"; + + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=false; + } + + + +} + +// Connection status handler +function getConnState( connState ) { + if ( connState === 0 ) { //on open + + + info_div.innerHTML='Connected successfully! Please click Start'; + if (isfilemode==true){ + info_div.innerHTML='Please wait patiently, larger files take longer'; + start_file_send(); + } + else + { + btnStart.disabled = false; + btnStop.disabled = true; + btnConnect.disabled=true; + } + } else if ( connState === 1 ) { + //stop(); + } else if ( connState === 2 ) { + stop(); + console.log( 'connecttion error' ); + + alert("Connection to " + document.getElementById('wssip').value + " failed. Please check the ASR address and port, or try manually authorizing first."); + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=false; + + + info_div.innerHTML='Please click Connect'; + } +} + +function record() +{ + + rec.open( function(){ + rec.start(); + console.log("Start"); + btnStart.disabled = true; + btnStop.disabled = false; + btnConnect.disabled=true; + }); + +} + + + +// Recognition start, stop, clear operations +function start() { + + // Clear display + clear(); + // Control state update + console.log("isfilemode"+isfilemode); + + // Start connection + var ret=wsconnecter.wsStart(); + // 1 is ok, 0 is error + if(ret==1){ + info_div.innerHTML="Connecting to ASR server, please wait..."; + isRec = true; + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=true; + + return 1; + } + else + { + info_div.innerHTML="Please click Start"; + btnStart.disabled = true; + btnStop.disabled = true; + btnConnect.disabled=false; + + return 0; + } +} + + +function stop() { + var chunk_size = new Array( 5, 10, 5 ); + var request = { + "chunk_size": chunk_size, + "wav_name": "h5", + "is_speaking": false, + "chunk_interval":10, + "mode":getAsrMode(), + }; + console.log(request); + if(sampleBuf.length>0){ + wsconnecter.wsSend(sampleBuf); + console.log("sampleBuf.length"+sampleBuf.length); + sampleBuf=new Int16Array(); + } + wsconnecter.wsSend( JSON.stringify(request) ); + + + + + + + // Control state update + + isRec = false; + info_div.innerHTML="Data sent, please wait, recognizing..."; + + if(isfilemode==false){ + btnStop.disabled = true; + btnStart.disabled = true; + btnConnect.disabled=true; + //wait 3s for asr result + setTimeout(function(){ + console.log("call stop ws!"); + wsconnecter.wsStop(); + btnConnect.disabled=false; + info_div.innerHTML="Please click Connect";}, 3000 ); + + + + rec.stop(function(blob,duration){ + + console.log(blob); + var audioBlob = Recorder.pcm2wav(data = {sampleRate:16000, bitRate:16, blob:blob}, + function(theblob,duration){ + console.log(theblob); + var audio_record = document.getElementById('audio_record'); + audio_record.src = (window.URL||webkitURL).createObjectURL(theblob); + audio_record.controls=true; + //audio_record.play(); + + + } ,function(msg){ + console.log(msg); + } + ); + + + + + },function(errMsg){ + console.log("errMsg: " + errMsg); + }); + } + // Stop connection + + + +} + +function clear() { + + var varArea=document.getElementById('varArea'); + + varArea.value=""; + rec_text=""; + offline_text=""; + +} + + +function recProcess( buffer, powerLevel, bufferDuration, bufferSampleRate,newBufferIdx,asyncEnd ) { + if ( isRec === true ) { + var data_48k = buffer[buffer.length-1]; + + var array_48k = new Array(data_48k); + var data_16k=Recorder.SampleData(array_48k,bufferSampleRate,16000).data; + + sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]); + var chunk_size=960; // for asr chunk_size [5, 10, 5] + info_div.innerHTML=""+bufferDuration/1000+"s"; + while(sampleBuf.length>=chunk_size){ + sendBuf=sampleBuf.slice(0,chunk_size); + sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length); + wsconnecter.wsSend(sendBuf); + + + + } + + + + } +} + +function getUseITN() { + var obj = document.getElementsByName("use_itn"); + for (var i = 0; i < obj.length; i++) { + if (obj[i].checked) { + return obj[i].value === "true"; + } + } + return false; +} diff --git a/web/asr/wsconnecter.js b/web/asr/wsconnecter.js index db140efc..7e2ee995 100644 --- a/web/asr/wsconnecter.js +++ b/web/asr/wsconnecter.js @@ -1,119 +1,119 @@ -/** - * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights - * Reserved. MIT License (https://opensource.org/licenses/MIT) - */ -/* 2021-2023 by zhaoming,mali aihealthx.com */ - -function WebSocketConnectMethod( config ) { //定义socket连接方法类 - - - var speechSokt; - var connKeeperID; - - var msgHandle = config.msgHandle; - var stateHandle = config.stateHandle; - - this.wsStart = function () { - var Uri = document.getElementById('wssip').value; //"wss://111.205.137.58:5821/wss/" //设置wss asr online接口地址 如 wss://X.X.X.X:port/wss/ - if(Uri.match(/wss:\S*|ws:\S*/)) - { - console.log("Uri"+Uri); - } - else - { - alert("请检查wss地址正确性"); - return 0; - } - - if ( 'WebSocket' in window ) { - speechSokt = new WebSocket( Uri ); // 定义socket连接对象 - speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数 - speechSokt.onclose = function(e){ - console.log("onclose ws!"); - //speechSokt.close(); - onClose(e); - }; - speechSokt.onmessage = function(e){onMessage(e);}; - speechSokt.onerror = function(e){onError(e);}; - return 1; - } - else { - alert('当前浏览器不支持 WebSocket'); - return 0; - } - }; - - // 定义停止与发送函数 - this.wsStop = function () { - if(speechSokt != undefined) { - console.log("stop ws!"); - speechSokt.close(); - } - }; - - this.wsSend = function ( oneData ) { - - if(speechSokt == undefined) return; - if ( speechSokt.readyState === 1 ) { // 0:CONNECTING, 1:OPEN, 2:CLOSING, 3:CLOSED - - speechSokt.send( oneData ); - - - } - }; - - // SOCEKT连接中的消息与状态响应 - function onOpen( e ) { - // 发送json - var chunk_size = new Array( 5, 10, 5 ); - var request = { - "chunk_size": chunk_size, - "wav_name": "h5", - "is_speaking": true, - "chunk_interval":10, - "itn":getUseITN(), - "mode":getAsrMode(), - - }; - if(isfilemode) - { - request.wav_format=file_ext; - if(file_ext=="wav") - { - request.wav_format="PCM"; - request.audio_fs=file_sample_rate; - } - } - - var hotwords=getHotwords(); - - if(hotwords!=null ) - { - request.hotwords=hotwords; - } - console.log(JSON.stringify(request)); - speechSokt.send(JSON.stringify(request)); - console.log("连接成功"); - stateHandle(0); - - } - - function onClose( e ) { - stateHandle(1); - } - - function onMessage( e ) { - - msgHandle( e ); - } - - function onError( e ) { - - info_div.innerHTML="连接"+e; - console.log(e); - stateHandle(2); - - } - - +/** + * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights + * Reserved. MIT License (https://opensource.org/licenses/MIT) + */ +/* 2021-2023 by zhaoming,mali aihealthx.com */ + +function WebSocketConnectMethod( config ) { // Define socket connection method class + + + var speechSokt; + var connKeeperID; + + var msgHandle = config.msgHandle; + var stateHandle = config.stateHandle; + + this.wsStart = function () { + var Uri = document.getElementById('wssip').value; //"wss://111.205.137.58:5821/wss/" // Set wss asr online interface address e.g. wss://X.X.X.X:port/wss/ + if(Uri.match(/wss:\S*|ws:\S*/)) + { + console.log("Uri"+Uri); + } + else + { + alert("Please check that the WSS address is correct"); + return 0; + } + + if ( 'WebSocket' in window ) { + speechSokt = new WebSocket( Uri ); // Define socket connection object + speechSokt.onopen = function(e){onOpen(e);}; // Define handler functions + speechSokt.onclose = function(e){ + console.log("onclose ws!"); + //speechSokt.close(); + onClose(e); + }; + speechSokt.onmessage = function(e){onMessage(e);}; + speechSokt.onerror = function(e){onError(e);}; + return 1; + } + else { + alert('WebSocket is not supported by this browser'); + return 0; + } + }; + + // Define stop and send functions + this.wsStop = function () { + if(speechSokt != undefined) { + console.log("stop ws!"); + speechSokt.close(); + } + }; + + this.wsSend = function ( oneData ) { + + if(speechSokt == undefined) return; + if ( speechSokt.readyState === 1 ) { // 0:CONNECTING, 1:OPEN, 2:CLOSING, 3:CLOSED + + speechSokt.send( oneData ); + + + } + }; + + // Message and status handlers for socket connection + function onOpen( e ) { + // Send JSON + var chunk_size = new Array( 5, 10, 5 ); + var request = { + "chunk_size": chunk_size, + "wav_name": "h5", + "is_speaking": true, + "chunk_interval":10, + "itn":getUseITN(), + "mode":getAsrMode(), + + }; + if(isfilemode) + { + request.wav_format=file_ext; + if(file_ext=="wav") + { + request.wav_format="PCM"; + request.audio_fs=file_sample_rate; + } + } + + var hotwords=getHotwords(); + + if(hotwords!=null ) + { + request.hotwords=hotwords; + } + console.log(JSON.stringify(request)); + speechSokt.send(JSON.stringify(request)); + console.log("Connection successful"); + stateHandle(0); + + } + + function onClose( e ) { + stateHandle(1); + } + + function onMessage( e ) { + + msgHandle( e ); + } + + function onError( e ) { + + info_div.innerHTML="Connection " + e; + console.log(e); + stateHandle(2); + + } + + } \ No newline at end of file diff --git a/web/avatar.html b/web/avatar.html index 0932bb6d..4ea3be3b 100644 --- a/web/avatar.html +++ b/web/avatar.html @@ -1,9 +1,9 @@ - + - Avatar 生成 - LiveTalking + Avatar Creator - LiveTalking