|
@@ -0,0 +1,408 @@
|
|
|
+package com.dt.ai.service;
|
|
|
+
|
|
|
+import cn.hutool.core.util.StrUtil;
|
|
|
+import com.baidu.aip.speech.AipSpeech;
|
|
|
+import com.baidu.aip.speech.TtsResponse;
|
|
|
+import com.baidu.aip.util.Util;
|
|
|
+import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
+import okhttp3.*;
|
|
|
+import org.json.JSONObject;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+
|
|
|
+import javax.sound.sampled.*;
|
|
|
+import java.io.ByteArrayOutputStream;
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileInputStream;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
+
|
|
|
+@Component
|
|
|
+public class VoiceCallService {
|
|
|
+ private final AipSpeech aipSpeech;
|
|
|
+ private final ObjectMapper objectMapper = new ObjectMapper();
|
|
|
+ private static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder()
|
|
|
+ .readTimeout(300, TimeUnit.SECONDS)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ @Value("${baidu.asr.format}")
|
|
|
+ private String asrFormat;
|
|
|
+ @Value("${baidu.asr.rate}")
|
|
|
+ private int asrRate;
|
|
|
+ @Value("${baidu.tts.lang}")
|
|
|
+ private String ttsLang;
|
|
|
+ @Value("${baidu.tts.per}")
|
|
|
+ private int ttsPer;
|
|
|
+ @Value("${baidu.wenxin.api-key}")
|
|
|
+ private String wenxinApiKey;
|
|
|
+
|
|
|
+ private static final double SILENCE_THRESHOLD = 100.0; // 静音阈值
|
|
|
+ private static final int SILENCE_DURATION = 3000; // 静音持续时间阈值(毫秒)
|
|
|
+ private volatile boolean isListening = false; // 是否正在监听
|
|
|
+ private volatile boolean isSpeaking = false; // 是否正在播放
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 检查播放是否正常完成(未被打断)
|
|
|
+ */
|
|
|
+ public boolean isPlaybackCompleted() {
|
|
|
+ return !isSpeaking;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 检测音频数据中是否有声音
|
|
|
+ */
|
|
|
+ private boolean hasSound(byte[] buffer, int bytesRead) {
|
|
|
+ if (bytesRead <= 0) return false;
|
|
|
+
|
|
|
+ // 计算音频能量
|
|
|
+ double sum = 0;
|
|
|
+ for (int i = 0; i < bytesRead; i += 2) {
|
|
|
+ if (i + 1 < bytesRead) {
|
|
|
+ // 将两个字节转换为16位整数
|
|
|
+ short sample = (short) ((buffer[i + 1] << 8) | (buffer[i] & 0xFF));
|
|
|
+ sum += Math.abs(sample);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ double average = sum / (bytesRead / 2);
|
|
|
+ return average > SILENCE_THRESHOLD;
|
|
|
+ }
|
|
|
+
|
|
|
+ public VoiceCallService(AipSpeech aipSpeech) {
|
|
|
+ this.aipSpeech = aipSpeech;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 1. 录音功能 - 支持动态检测说话停止
|
|
|
+ */
|
|
|
+ public String recordVoice(String savePath, int maxRecordSeconds) {
|
|
|
+ try {
|
|
|
+ // 设置音频格式为16kHz, 16bit, 单声道,符合百度语音识别要求
|
|
|
+ AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
|
|
|
+ DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
|
|
+
|
|
|
+ if (!AudioSystem.isLineSupported(info)) {
|
|
|
+ System.err.println("不支持该音频格式!");
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取并打开录音设备
|
|
|
+ TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
|
|
+ line.open(format);
|
|
|
+ line.start();
|
|
|
+
|
|
|
+ System.out.println("\n=== 开始录音 ===");
|
|
|
+ System.out.println("请说话...(静音3秒后自动停止)");
|
|
|
+
|
|
|
+ // 创建临时缓冲区
|
|
|
+ int bufferSize = 4096;
|
|
|
+ byte[] buffer = new byte[bufferSize];
|
|
|
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
|
+
|
|
|
+ // 录音
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
+ long lastSoundTime = startTime;
|
|
|
+ isListening = true;
|
|
|
+
|
|
|
+ while (isListening && (System.currentTimeMillis() - startTime < maxRecordSeconds * 1000)) {
|
|
|
+ int count = line.read(buffer, 0, buffer.length);
|
|
|
+ if (count > 0) {
|
|
|
+ if (hasSound(buffer, count)) {
|
|
|
+ lastSoundTime = System.currentTimeMillis();
|
|
|
+ } else if (System.currentTimeMillis() - lastSoundTime > SILENCE_DURATION) {
|
|
|
+ // 检测到持续静音,停止录音
|
|
|
+ System.out.println("检测到停止说话");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ out.write(buffer, 0, count);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ isListening = false;
|
|
|
+
|
|
|
+ // 停止录音
|
|
|
+ line.stop();
|
|
|
+ line.close();
|
|
|
+
|
|
|
+ // 保存录音文件
|
|
|
+ byte[] audioData = out.toByteArray();
|
|
|
+ if (audioData.length == 0) {
|
|
|
+ System.out.println("未检测到声音");
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ File audioFile = new File(savePath);
|
|
|
+ if (!audioFile.getParentFile().exists()) {
|
|
|
+ audioFile.getParentFile().mkdirs();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 创建音频输入流
|
|
|
+ AudioInputStream ais = new AudioInputStream(
|
|
|
+ new java.io.ByteArrayInputStream(audioData),
|
|
|
+ format,
|
|
|
+ audioData.length / format.getFrameSize()
|
|
|
+ );
|
|
|
+
|
|
|
+ // 写入WAV文件
|
|
|
+ AudioSystem.write(ais, AudioFileFormat.Type.WAVE, audioFile);
|
|
|
+
|
|
|
+ System.out.println("录音完成!");
|
|
|
+ System.out.println("文件已保存:" + audioFile.getAbsolutePath());
|
|
|
+ System.out.println("文件大小:" + audioFile.length() + " bytes");
|
|
|
+
|
|
|
+ return savePath;
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("录音过程出错:" + e.getMessage());
|
|
|
+ e.printStackTrace();
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 2. 语音识别
|
|
|
+ */
|
|
|
+ public String speechToText(String audioPath) {
|
|
|
+ try {
|
|
|
+ // 读取音频文件
|
|
|
+ byte[] data = Util.readFileByBytes(audioPath);
|
|
|
+ if (data == null || data.length == 0) {
|
|
|
+ return "错误:音频文件为空";
|
|
|
+ }
|
|
|
+
|
|
|
+ // 调用语音识别
|
|
|
+ JSONObject result = aipSpeech.asr(data, asrFormat, asrRate, new HashMap<>());
|
|
|
+ System.out.println("语音识别结果:" + result.toString());
|
|
|
+
|
|
|
+ // 检查错误码
|
|
|
+ if (!result.has("err_no")) {
|
|
|
+ return "识别失败:返回结果缺少错误码";
|
|
|
+ }
|
|
|
+
|
|
|
+ int errorCode = result.getInt("err_no");
|
|
|
+ if (errorCode != 0) {
|
|
|
+ return "识别失败:" + result.getString("err_msg");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 检查识别结果
|
|
|
+ if (!result.has("result") || result.getJSONArray("result").length() == 0) {
|
|
|
+ return "识别失败:未返回识别结果";
|
|
|
+ }
|
|
|
+
|
|
|
+ // 返回第一个识别结果
|
|
|
+ return result.getJSONArray("result").getString(0);
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("语音识别异常:" + e.getMessage());
|
|
|
+ e.printStackTrace();
|
|
|
+ return "识别异常:" + e.getMessage();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 3. AI回复生成
|
|
|
+ */
|
|
|
+ public String getAiResponse(String userText) {
|
|
|
+ if (StrUtil.isEmpty(userText)) {
|
|
|
+ return "请再说一遍,我没听清。";
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 构建请求体
|
|
|
+ HashMap<String, Object> requestMap = new HashMap<>();
|
|
|
+ requestMap.put("model", "ernie-4.5-turbo-128k");
|
|
|
+
|
|
|
+ // 构建消息数组
|
|
|
+ HashMap<String, String> systemMessage = new HashMap<>();
|
|
|
+ systemMessage.put("role", "system");
|
|
|
+ systemMessage.put("content", "你是一位专业的婴儿护理医生,拥有丰富的婴儿护理经验。你只回答与婴儿护理相关的问题,包括新生儿护理、婴儿喂养、婴儿健康、生长发育、疫苗接种、常见疾病预防等方面的咨询。如果遇到其他领域的问题,你会礼貌地表示:'抱歉,这个问题不在我的专业范围内。我是一名婴儿护理医生,只能为您解答婴儿护理相关的问题。'");
|
|
|
+
|
|
|
+ HashMap<String, String> userMessage = new HashMap<>();
|
|
|
+ userMessage.put("role", "user");
|
|
|
+ userMessage.put("content", userText);
|
|
|
+
|
|
|
+ requestMap.put("messages", new Object[]{systemMessage, userMessage});
|
|
|
+
|
|
|
+ // 转换为JSON
|
|
|
+ String requestJson = objectMapper.writeValueAsString(requestMap);
|
|
|
+
|
|
|
+ // 构建HTTP请求
|
|
|
+ MediaType mediaType = MediaType.parse("application/json");
|
|
|
+ RequestBody body = RequestBody.create(mediaType, requestJson);
|
|
|
+
|
|
|
+ Request request = new Request.Builder()
|
|
|
+ .url("https://qianfan.baidubce.com/v2/chat/completions")
|
|
|
+ .post(body)
|
|
|
+ .addHeader("Content-Type", "application/json")
|
|
|
+ .addHeader("Authorization", "Bearer " + wenxinApiKey)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ // 发送请求并处理响应
|
|
|
+ try (Response response = HTTP_CLIENT.newCall(request).execute()) {
|
|
|
+ if (!response.isSuccessful()) {
|
|
|
+ System.err.println("API调用失败: " + response.code());
|
|
|
+ return "抱歉,我现在无法回答,请稍后再试。";
|
|
|
+ }
|
|
|
+
|
|
|
+ String responseBody = response.body().string();
|
|
|
+ JSONObject jsonResponse = new JSONObject(responseBody);
|
|
|
+
|
|
|
+ if (jsonResponse.has("choices") && jsonResponse.getJSONArray("choices").length() > 0) {
|
|
|
+ return jsonResponse.getJSONArray("choices")
|
|
|
+ .getJSONObject(0)
|
|
|
+ .getJSONObject("message")
|
|
|
+ .getString("content");
|
|
|
+ }
|
|
|
+
|
|
|
+ return "抱歉,我没有得到有效的回答。";
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("调用文心一言API异常: " + e.getMessage());
|
|
|
+ e.printStackTrace();
|
|
|
+ return "系统出现了一点小问题,请稍后再试。";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 4. 语音合成
|
|
|
+ */
|
|
|
+ public String textToSpeech(String text, String savePath) {
|
|
|
+ try {
|
|
|
+ if (text == null || text.trim().isEmpty()) {
|
|
|
+ throw new IllegalArgumentException("合成文本不能为空");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 调用语音合成
|
|
|
+ HashMap<String, Object> options = new HashMap<>();
|
|
|
+ options.put("per", ttsPer);
|
|
|
+ options.put("spd", 5);
|
|
|
+ options.put("pit", 5);
|
|
|
+ options.put("vol", 5);
|
|
|
+
|
|
|
+ TtsResponse response = aipSpeech.synthesis(text, ttsLang, 1, options);
|
|
|
+
|
|
|
+ // 检查合成结果
|
|
|
+ if (response == null) {
|
|
|
+ throw new RuntimeException("合成响应为空");
|
|
|
+ }
|
|
|
+
|
|
|
+ byte[] data = response.getData();
|
|
|
+ if (data == null || data.length == 0) {
|
|
|
+ JSONObject error = response.getResult();
|
|
|
+ if (error != null) {
|
|
|
+ throw new RuntimeException("合成失败:" + error.toString());
|
|
|
+ } else {
|
|
|
+ throw new RuntimeException("合成失败:返回数据为空");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 保存音频文件
|
|
|
+ File file = new File(savePath);
|
|
|
+ if (!file.getParentFile().exists()) {
|
|
|
+ file.getParentFile().mkdirs();
|
|
|
+ }
|
|
|
+
|
|
|
+ Util.writeBytesToFileSystem(data, savePath);
|
|
|
+ return savePath;
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("语音合成异常:" + e.getMessage());
|
|
|
+ e.printStackTrace();
|
|
|
+ return "合成失败:" + e.getMessage();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 5. 播放语音 - 支持动态打断
|
|
|
+ */
|
|
|
+ public void playVoice(String audioPath) {
|
|
|
+ if (audioPath == null || audioPath.startsWith("合成失败")) {
|
|
|
+ System.err.println(audioPath);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ File audioFile = new File(audioPath);
|
|
|
+ if (!audioFile.exists()) {
|
|
|
+ System.err.println("播放失败:文件不存在 - " + audioPath);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ isSpeaking = true;
|
|
|
+
|
|
|
+ // 启动监听线程
|
|
|
+ Thread monitorThread = new Thread(() -> {
|
|
|
+ try {
|
|
|
+ // 设置音频格式
|
|
|
+ AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
|
|
|
+ DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
|
|
+ TargetDataLine monitorLine = (TargetDataLine) AudioSystem.getLine(info);
|
|
|
+ monitorLine.open(format);
|
|
|
+ monitorLine.start();
|
|
|
+
|
|
|
+ byte[] buffer = new byte[4096];
|
|
|
+ while (isSpeaking) {
|
|
|
+ int count = monitorLine.read(buffer, 0, buffer.length);
|
|
|
+ if (hasSound(buffer, count)) {
|
|
|
+ System.out.println("\n检测到用户说话,停止播放...");
|
|
|
+ isSpeaking = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ monitorLine.stop();
|
|
|
+ monitorLine.close();
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ });
|
|
|
+ monitorThread.start();
|
|
|
+
|
|
|
+ // 尝试使用JavaZoom库播放MP3
|
|
|
+ try {
|
|
|
+ javazoom.jl.player.Player player = new javazoom.jl.player.Player(new FileInputStream(audioFile));
|
|
|
+ System.out.println("开始播放...");
|
|
|
+ while (!player.isComplete() && isSpeaking) {
|
|
|
+ player.play(1); // 每次播放一帧,以便能够及时响应中断
|
|
|
+ }
|
|
|
+ System.out.println(isSpeaking ? "播放结束" : "播放被打断");
|
|
|
+ return;
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 如果不是MP3格式,尝试用普通方式播放WAV
|
|
|
+ System.out.println("尝试作为WAV文件播放...");
|
|
|
+ }
|
|
|
+
|
|
|
+ // WAV文件播放逻辑
|
|
|
+ AudioInputStream audioStream = AudioSystem.getAudioInputStream(audioFile);
|
|
|
+ AudioFormat format = audioStream.getFormat();
|
|
|
+ DataLine.Info info = new DataLine.Info(SourceDataLine.class, format);
|
|
|
+
|
|
|
+ if (!AudioSystem.isLineSupported(info)) {
|
|
|
+ throw new LineUnavailableException("不支持的音频格式");
|
|
|
+ }
|
|
|
+
|
|
|
+ SourceDataLine line = (SourceDataLine) AudioSystem.getLine(info);
|
|
|
+ line.open(format);
|
|
|
+ line.start();
|
|
|
+
|
|
|
+ System.out.println("开始播放...");
|
|
|
+ byte[] buffer = new byte[4096];
|
|
|
+ int bytesRead = 0;
|
|
|
+
|
|
|
+ while ((bytesRead = audioStream.read(buffer)) != -1 && isSpeaking) {
|
|
|
+ line.write(buffer, 0, bytesRead);
|
|
|
+ }
|
|
|
+
|
|
|
+ line.drain();
|
|
|
+ line.stop();
|
|
|
+ line.close();
|
|
|
+ audioStream.close();
|
|
|
+
|
|
|
+ System.out.println(isSpeaking ? "播放结束" : "播放被打断");
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("播放失败:" + e.getMessage());
|
|
|
+ e.printStackTrace();
|
|
|
+ } finally {
|
|
|
+ isSpeaking = false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|