注意事項
下載安裝
從Maven伺服器下載最新版本SDK。
<dependency>
<groupId>com.alibaba.nls</groupId>
<artifactId>nls-sdk-recognizer</artifactId>
<version>2.2.1</version>
</dependency>
解壓ZIP檔案,在pom目錄運行mvn package
,會在target目錄產生可執行JAR:nls-example-recognizer-2.0.0-jar-with-dependencies.jar,將JAR包拷貝至目標伺服器,用於快速驗證及服務壓測。
服務驗證。
運行如下代碼,並按提示提供相應參數。運行後在命令執行目錄產生logs/nls.log。
java -cp nls-example-recognizer-2.0.0-jar-with-dependencies.jar com.alibaba.nls.client.SpeechRecognizerDemo
服務壓測。
運行如下代碼,並按提示提供相應參數。其中阿里雲服務URL參數為: wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1
,語音檔案為16k採樣率PCM格式檔案,並發數根據您的購買情況進行選擇。
java -jar nls-example-recognizer-2.0.0-jar-with-dependencies.jar
關鍵介面
NlsClient:語音處理用戶端,利用該用戶端可以進行一句話識別、即時語音辨識和語音合成的語音處理任務。該用戶端為安全執行緒,建議全域僅建立一個執行個體。
SpeechRecognizer:一句話識別處理類,通過該介面佈建要求參數,發送請求及聲音資料。非安全執行緒。
SpeechRecognizerListener:識別結果監聽類,監聽識別結果。非安全執行緒。
更多介紹,請參見Java API介面說明。
重要
SDK調用注意事項:
NlsClient使用了Netty架構,NlsClient對象的建立會消耗一定時間和資源,一經建立可以重複使用。建議調用程式將NlsClient的建立和關閉與程式本身的生命週期結合。
SpeechRecognizer對象不可重複使用,一個識別任務對應一個SpeechRecognizer對象。例如,N個音頻檔案要進行N次識別任務,需要建立N個SpeechRecognizer對象。
SpeechRecognizerListener對象和SpeechRecognizer對象是一一對應的,不能將一個SpeechRecognizerListener對象設定到多個SpeechRecognizer對象中,否則不能將各識別任務區分開。
Java SDK依賴Netty網路程式庫,如果您的應用依賴Netty,其版本需更新至4.1.17.Final及以上。
程式碼範例
說明
下載nls-sample-16k.wav。
樣本中使用的音頻檔案為16000 Hz採樣率,請在管控台中將AppKey對應專案的模型設定為通用模型,以擷取準確的識別效果。如果使用其他音頻,請設定為支援該音頻情境的模型,關於模型設定,請參見管理專案。
樣本中使用SDK內建的預設一句話識別服務的外網訪問服務URL,如果您使用阿里雲上海ECS,且需要使用內網訪問服務URL,則在建立NlsClient對象時,設定內網訪問的URL:
client = new NlsClient("ws://nls-gateway-cn-shanghai-internal.aliyuncs.com/ws/v1", accessToken);
調用介面前,需配置環境變數,通過環境變數讀取存取憑證。Intelligent Speech Interaction的AccessKey ID、AccessKey Secret和AppKey的環境變數名:ALIYUN_AK_ID、ALIYUN_AK_SECRET、NLS_APP_KEY。
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechRecognizer;
import com.alibaba.nls.client.protocol.asr.SpeechRecognizerListener;
import com.alibaba.nls.client.protocol.asr.SpeechRecognizerResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SpeechRecognizerDemo {
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerDemo.class);
private String appKey;
NlsClient client;
public SpeechRecognizerDemo(String appKey, String id, String secret, String url) {
this.appKey = appKey;
AccessToken accessToken = new AccessToken(id, secret);
try {
accessToken.apply();
System.out.println("get token: " + accessToken.getToken() + ", expire time: " + accessToken.getExpireTime());
if(url.isEmpty()) {
client = new NlsClient(accessToken.getToken());
}else {
client = new NlsClient(url, accessToken.getToken());
}
} catch (IOException e) {
e.printStackTrace();
}
}
private static SpeechRecognizerListener getRecognizerListener(int myOrder, String userParam) {
SpeechRecognizerListener listener = new SpeechRecognizerListener() {
@Override
public void onRecognitionResultChanged(SpeechRecognizerResponse response) {
System.out.println("name: " + response.getName() + ", status: " + response.getStatus() + ", result: " + response.getRecognizedText());
}
@Override
public void onRecognitionCompleted(SpeechRecognizerResponse response) {
System.out.println("name: " + response.getName() + ", status: " + response.getStatus() + ", result: " + response.getRecognizedText());
}
@Override
public void onStarted(SpeechRecognizerResponse response) {
System.out.println("myOrder: " + myOrder + "; myParam: " + userParam + "; task_id: " + response.getTaskId());
}
@Override
public void onFail(SpeechRecognizerResponse response) {
System.out.println("task_id: " + response.getTaskId() + ", status: " + response.getStatus() + ", status_text: " + response.getStatusText());
}
};
return listener;
}
public static int getSleepDelta(int dataSize, int sampleRate) {
int sampleBytes = 16;
int soundChannel = 1;
return (dataSize * 10 * 8000) / (160 * sampleRate);
}
public void process(String filepath, int sampleRate) {
SpeechRecognizer recognizer = null;
try {
String myParam = "user-param";
int myOrder = 1234;
SpeechRecognizerListener listener = getRecognizerListener(myOrder, myParam);
recognizer = new SpeechRecognizer(client, listener);
recognizer.setAppKey(appKey);
recognizer.setFormat(InputFormatEnum.PCM);
if(sampleRate == 16000) {
recognizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);
} else if(sampleRate == 8000) {
recognizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K);
}
recognizer.setEnableIntermediateResult(true);
recognizer.addCustomedParam("enable_voice_detection",true);
long now = System.currentTimeMillis();
recognizer.start();
logger.info("ASR start latency : " + (System.currentTimeMillis() - now) + " ms");
File file = new File(filepath);
FileInputStream fis = new FileInputStream(file);
byte[] b = new byte[3200];
int len;
while ((len = fis.read(b)) > 0) {
logger.info("send data pack length: " + len);
recognizer.send(b, len);
int deltaSleep = getSleepDelta(len, sampleRate);
Thread.sleep(deltaSleep);
}
now = System.currentTimeMillis();
logger.info("ASR wait for complete");
recognizer.stop();
logger.info("ASR stop latency : " + (System.currentTimeMillis() - now) + " ms");
fis.close();
} catch (Exception e) {
System.err.println(e.getMessage());
} finally {
if (null != recognizer) {
recognizer.close();
}
}
}
public void shutdown() {
client.shutdown();
}
public static void main(String[] args) throws Exception {
String appKey = System.getenv().get("NLS_APP_KEY");
String id = System.getenv().get("ALIYUN_AK_ID");
String secret = System.getenv().get("ALIYUN_AK_SECRET");
String url = System.getenv().getOrDefault("NLS_GATEWAY_URL", "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1");
SpeechRecognizerDemo demo = new SpeechRecognizerDemo(appKey, id, secret, url);
demo.process("./nls-sample-16k.wav", 16000);
demo.shutdown();
}
}