public class SoundModifier implements Runnable
private static final String ACOUSTIC_MODEL = "resource:/edu/cmu/sphinx/models/en-us/en-us";
private static final String DICTIONARY_PATH = "resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict";
private static final String GRAMMAR_PATH = "resource:/edu/cmu/sphinx/demo/dialog/";
private static final String LANGUAGE_MODEL = "resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin";
// other unrelated stuff
public SoundModifier(ConcurrentLinkedQueue inputQueue, ConcurrentLinkedQueue outputQueue, String saveFolder) throws IOException
Configuration configuration = new Configuration();
recognizer = new StreamSpeechRecognizer(configuration);
// other unrelated stuff
public void run()
var now = ZonedDateTime.now();
while (inputQueue.size() > 0)
byte[] chunk = (byte[]) inputQueue.poll();
byte[] copy = Arrays.copyOf(chunk, chunk.length);
catch (IOException ex)
Logger.getLogger(SoundModifier.class.getName()).log(Level.SEVERE, null, ex);
recordBytes.write(copy, 0, copy.length);
byte[][] send = new byte[][]{"audio".getBytes(), copy };
String time = now.getYear() + "-" + now.getMonthValue() + "-" + now.getDayOfMonth() + "--" + now.getHour() + "-" + now.getMinute() + "-" + now.getSecond();
String filename = saveFolder + time + " SoundModifier.wav";
File file = new File(filename);
catch (IOException ex)
Logger.getLogger(SoundRecorder.class.getName()).log(Level.WARNING, null, ex);
private ArrayList<WordResult> getText(byte[] input) throws IOException
ArrayList<WordResult> utteredWords = new ArrayList<>();
stream = new ByteArrayInputStream(input);
SpeechResult result;
while ((result = recognizer.getResult()) != null)
// var words = result.getWords();
// System.out.println("words: " + words);
// utteredWords.addAll(words);
System.out.format("Hypothesis: %s\n", result.getHypothesis());
return utteredWords;
public void save(File wavFile) throws IOException
byte[] audioData = recordBytes.toByteArray();
ByteArrayInputStream bais = new ByteArrayInputStream(audioData);
try (AudioInputStream audioInputStream = new AudioInputStream(bais, format, audioData.length / format.getFrameSize()))
AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, wavFile);
LOGGER.log(Level.INFO, "recordBytes close");
11:23:33.703 INFO trieNgramModel LM Cache Size: 0 Hits: 0 Misses: 0
11:23:33.703 INFO speedTracker # ----------------------------- Timers----------------------------------------
11:23:33.703 INFO speedTracker # Name Count CurTime MinTime MaxTime AvgTime TotTime
11:23:33.703 INFO speedTracker Load Dictionary 46 0.0350s 0.0340s 0.0740s 0.0415s 1.9100s
11:23:33.703 INFO speedTracker Load AM 1 0.8700s 0.8700s 0.8700s 0.8700s 0.8700s
11:23:33.703 INFO speedTracker Frontend 184 0.0000s 0.0000s 0.0030s 0.0000s 0.0090s
11:23:33.703 INFO speedTracker Load LM 46 0.2640s 0.2320s 0.3450s 0.2699s 12.4150s
11:23:33.703 INFO speedTracker Score 184 0.0000s 0.0000s 0.0030s 0.0000s 0.0090s
11:23:33.703 INFO speedTracker Prune 460 0.0000s 0.0000s 0.0000s 0.0000s 0.0000s
11:23:33.703 INFO speedTracker Grow 644 0.0000s 0.0000s 0.0030s 0.0001s 0.0380s
11:23:33.703 INFO speedTracker Compile 46 0.3450s 0.2990s 0.6200s 0.3422s 15.7400s
11:23:33.703 INFO speedTracker Total Time Audio: 5.89s Proc: 0.03s 0.00 X real time
11:23:33.703 INFO memoryTracker Mem Total: 1186.00 Mb Free: 689.00 Mb
11:23:33.703 INFO memoryTracker Used: This: 497.00 Mb Avg: 657.31 Mb Max: 1468.03 Mb
11:23:33.703 INFO dictionary Loading dictionary from: jar:file:/C:/Users/???/.m2/repository/de/sciss/sphinx4-data/1.0.0/sphinx4-data-1.0.0.jar!/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict
11:23:33.743 INFO dictionary Loading filler dictionary from: jar:file:/C:/Users/???/.m2/repository/de/sciss/sphinx4-data/1.0.0/sphinx4-data-1.0.0.jar!/edu/cmu/sphinx/models/en-us/en-us/noisedict
11:23:33.743 INFO trieNgramModel Loading n-gram language model from: jar:file:/C:/Users/???/.m2/repository/de/sciss/sphinx4-data/1.0.0/sphinx4-data-1.0.0.jar!/edu/cmu/sphinx/models/en-us/en-us.lm.bin
11:23:33.902 INFO dictionary The dictionary is missing a phonetic transcription for the word '3-d'
11:23:33.903 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word '3-d'
11:23:33.903 INFO dictionary The dictionary is missing a phonetic transcription for the word 'adjustors'
11:23:33.904 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'adjustors'
11:23:33.904 INFO dictionary The dictionary is missing a phonetic transcription for the word 'adulyadej'
11:23:33.904 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'adulyadej'
11:23:33.915 INFO dictionary The dictionary is missing a phonetic transcription for the word 'chloroflourocarbons'
11:23:33.915 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'chloroflourocarbons'
11:23:33.925 INFO dictionary The dictionary is missing a phonetic transcription for the word 'déjà'
11:23:33.925 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'déjà'
11:23:33.940 INFO dictionary The dictionary is missing a phonetic transcription for the word 'iife'
11:23:33.940 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'iife'
11:23:33.952 INFO dictionary The dictionary is missing a phonetic transcription for the word 'mm-hm'
11:23:33.952 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'mm-hm'
11:23:33.952 INFO dictionary The dictionary is missing a phonetic transcription for the word 'mm-hmm'
11:23:33.952 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'mm-hmm'
11:23:33.952 INFO dictionary The dictionary is missing a phonetic transcription for the word 'mmmm'
11:23:33.952 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'mmmm'
11:23:33.954 INFO dictionary The dictionary is missing a phonetic transcription for the word 'ngo's'
11:23:33.954 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'ngo's'
11:23:33.956 INFO dictionary The dictionary is missing a phonetic transcription for the word 'occured'
11:23:33.956 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'occured'
11:23:33.956 INFO dictionary The dictionary is missing a phonetic transcription for the word 'offical'
11:23:33.956 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'offical'
11:23:33.956 INFO dictionary The dictionary is missing a phonetic transcription for the word 'officals'
11:23:33.956 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'officals'
11:23:33.963 INFO dictionary The dictionary is missing a phonetic transcription for the word 'port_au_prince'
11:23:33.963 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'port_au_prince'
11:23:33.963 INFO dictionary The dictionary is missing a phonetic transcription for the word 'possiblity'
11:23:33.963 WARNING trieNgramModel The dictionary is missing a phonetic transcription for the word 'possiblity'
11:23:33.987 WARNING trieNgramModel Dictionary is missing 15 words that are contained in the language model.
11:23:34.080 INFO dictionary The dictionary is missing a phonetic transcription for the word 'offical'
11:23:34.080 INFO dictionary The dictionary is missing a phonetic transcription for the word 'mm-hm'
11:23:34.081 INFO dictionary The dictionary is missing a phonetic transcription for the word 'adulyadej'
11:23:34.081 INFO dictionary The dictionary is missing a phonetic transcription for the word 'adjustors'
11:23:34.082 INFO dictionary The dictionary is missing a phonetic transcription for the word 'mm-hmm'
11:23:34.082 INFO dictionary The dictionary is missing a phonetic transcription for the word 'ngo's'
11:23:34.082 INFO dictionary The dictionary is missing a phonetic transcription for the word 'officals'
11:23:34.083 INFO dictionary The dictionary is missing a phonetic transcription for the word 'chloroflourocarbons'
11:23:34.083 INFO dictionary The dictionary is missing a phonetic transcription for the word '3-d'
11:23:34.084 INFO dictionary The dictionary is missing a phonetic transcription for the word 'déjà'
11:23:34.085 INFO dictionary The dictionary is missing a phonetic transcription for the word 'port_au_prince'
11:23:34.086 INFO dictionary The dictionary is missing a phonetic transcription for the word 'mmmm'
11:23:34.086 INFO dictionary The dictionary is missing a phonetic transcription for the word 'iife'
11:23:34.089 INFO dictionary The dictionary is missing a phonetic transcription for the word 'possiblity'
11:23:34.090 INFO dictionary The dictionary is missing a phonetic transcription for the word 'occured'
11:23:34.281 INFO lexTreeLinguist Max CI Units 43
11:23:34.281 INFO lexTreeLinguist Unit table size 79507
11:23:34.281 INFO speedTracker # ----------------------------- Timers----------------------------------------
11:23:34.281 INFO speedTracker # Name Count CurTime MinTime MaxTime AvgTime TotTime
11:23:34.281 INFO speedTracker Load Dictionary 47 0.0400s 0.0340s 0.0740s 0.0415s 1.9500s
11:23:34.281 INFO speedTracker Load AM 1 0.8700s 0.8700s 0.8700s 0.8700s 0.8700s
11:23:34.281 INFO speedTracker Frontend 184 0.0000s 0.0000s 0.0030s 0.0000s 0.0090s
11:23:34.281 INFO speedTracker Load LM 47 0.2440s 0.2320s 0.3450s 0.2693s 12.6590s
11:23:34.281 INFO speedTracker Score 184 0.0000s 0.0000s 0.0030s 0.0000s 0.0090s
11:23:34.281 INFO speedTracker Prune 460 0.0000s 0.0000s 0.0000s 0.0000s 0.0000s
11:23:34.281 INFO speedTracker Grow 644 0.0000s 0.0000s 0.0030s 0.0001s 0.0380s
11:23:34.281 INFO speedTracker Compile 47 0.2940s 0.2940s 0.6200s 0.3411s 16.0340s
11:23:34.282 INFO speedTracker This Time Audio: 0.13s Proc: 0.00s Speed: 0.00 X real time
11:23:34.282 INFO speedTracker Total Time Audio: 6.02s Proc: 0.03s 0.00 X real time
11:23:34.282 INFO memoryTracker Mem Total: 1186.00 Mb Free: 301.00 Mb
11:23:34.282 INFO memoryTracker Used: This: 885.00 Mb Avg: 659.76 Mb Max: 1468.03 Mb
11:23:34.282 INFO trieNgramModel LM Cache Size: 0 Hits: 0 Misses: 0
11:23:34.282 INFO trieNgramModel LM Cache Size: 0 Hits: 0 Misses: 0
11:23:34.282 INFO speedTracker # ----------------------------- Timers----------------------------------------
11:23:34.282 INFO speedTracker # Name Count CurTime MinTime MaxTime AvgTime TotTime
11:23:34.282 INFO speedTracker Load Dictionary 47 0.0400s 0.0340s 0.0740s 0.0415s 1.9500s
11:23:34.282 INFO speedTracker Load AM 1 0.8700s 0.8700s 0.8700s 0.8700s 0.8700s
11:23:34.282 INFO speedTracker Frontend 188 0.0000s 0.0000s 0.0030s 0.0000s 0.0090s
11:23:34.282 INFO speedTracker Load LM 47 0.2440s 0.2320s 0.3450s 0.2693s 12.6590s
11:23:34.282 INFO speedTracker Score 188 0.0000s 0.0000s 0.0030s 0.0000s 0.0090s
11:23:34.282 INFO speedTracker Prune 470 0.0000s 0.0000s 0.0000s 0.0000s 0.0000s
11:23:34.282 INFO speedTracker Grow 658 0.0000s 0.0000s 0.0030s 0.0001s 0.0380s
11:23:34.282 INFO speedTracker Compile 47 0.2940s 0.2940s 0.6200s 0.3411s 16.0340s
11:23:34.282 INFO speedTracker Total Time Audio: 6.02s Proc: 0.03s 0.00 X real time
11:23:34.282 INFO memoryTracker Mem Total: 1186.00 Mb Free: 301.00 Mb
11:23:34.282 INFO memoryTracker Used: This: 885.00 Mb Avg: 662.16 Mb Max: 1468.03 Mb
public class StaticAudioFormat
private static final int channels = 1;
private static final boolean signed = true;
private static final boolean bigEndian = false;
private static final float sampleRate = 16000;
private static final int sampleSizeInBits = 16;
* Defines a default audio format used to record
static AudioFormat getAudioFormat()
return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
recognizer.startRecognition(new FileInputStream("???/2020-5-12--13-9-37 SoundModifier.wav"));
SpeechResult result = recognizer.getResult();
while ((result = recognizer.getResult()) != null) {