Система Microsoft Speech имеет хороший пример кода, но у меня есть проблема при добавлении обратной петли для записи того, что она играет, а не того, что идет через микрофон. Чтобы дать текстовое описание видео, например, не воспроизводя его на динамике. Кажется, что это - библиотека, которая делает это, но я получаю ошибки типа, выдвигающие его в аудиопоток распознавателя:
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
captureStream.Flush();
};
capture.StartRecording();
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
recognizer.SetInputToAudioStream(captureStream, new SpeechAudioFormatInfo(
capture.WaveFormat.AverageBytesPerSecond, AudioBitsPerSample.Sixteen, AudioChannel.Stereo));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
Обновлено КакВы можете видеть в пересмотренном коде, что он компилируется по крайней мере сейчас, но не распознает никакой речи, внутренней или внешней. На самом деле он выдает:
384000
32
Так что, поскольку AudioBitsPerSample не имеет значения «тридцать два», возможно, я даже не могу использовать класс NAudio для получения системного звука ??
Обновление Кажется, это работает несколько иначе, основываясь на другом ответе, но он не очень улавливается, я думаю, что он отправляет замедленное или быстрое аудио, возможно?
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
Stream client;
public FakeStreamer(Stream client)
{
this.client = client;
this.stream = client;
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
Console.WriteLine("ouch");
}
/*if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}*/
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer, offset, count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
Stream buffStream = new FakeStreamer(captureStream);
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
};
capture.StartRecording();
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
//recognizer.SetInputToDefaultAudioDevice();
recognizer.SetInputToAudioStream(buffStream, new SpeechAudioFormatInfo(
capture.WaveFormat.AverageBytesPerSecond/4, AudioBitsPerSample.Eight, AudioChannel.Stereo));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
Обновление 3 - Попытка перекодировать аудиопоток в режим распознавания голоса: к сожалению, он не попадает в перезаписанный перекодированный звук, как вы можете видеть ...
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
Stream client;
public FakeStreamer(Stream client)
{
this.client = client;
this.stream = client;
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
Console.WriteLine("ouch");
}
/*if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}*/
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer, offset, count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
//Stream buffStream = new FakeStreamer(captureStream);
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
};
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
var newFormat = new WaveFormat(8000, 16, 1);
//using (var conversionStream = new WaveFormatConversionStream(newFormat, capture)
var resampler = new MediaFoundationResampler(new NAudio.Wave.RawSourceWaveStream(captureStream,capture.WaveFormat), newFormat);
Stream captureConvertStream = new System.IO.MemoryStream();
resampler.ResamplerQuality = 60;
//WaveFileWriter.WriteWavFileToStream(captureConvertStream, resampler);
//recognizer.SetInputToDefaultAudioDevice();
Stream buffStream = new FakeStreamer(captureConvertStream);
recognizer.SetInputToAudioStream(buffStream, new SpeechAudioFormatInfo(
8000, AudioBitsPerSample.Sixteen, AudioChannel.Mono));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
capture.StartRecording();
var arr = new byte[128];
while (resampler.Read(arr, 0, arr.Length) > 0)
{
captureConvertStream.Write(arr, 0, arr.Length);
Console.WriteLine("Never getting here");
}
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}