如何使TTS立即停止讲话?

问题描述 投票:0回答:2

我正在使用ISpVoice来表达输入字符串。现在,即使我在SPF_ASYNC方法中使用SPF_PURGEBEFORESPEAKSpeak标签,tts也不会在调用Pause时停止,而是继续直到tts完成一个单词。

我是这样做的:

void speakSentence()
{
    pVoice->Pause();
    pVoice->Speak(L"This is a sentence.", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
    pVoice->Resume();
}

每当我尝试在单词“sentence”的中间调用此函数时,tts不会暂停,而是继续说出该单词直到结束。

从microsoft文档:

ISpVoice ::暂停在最近的警报边界暂停语音并关闭输出设备,允许从其他语音访问待处理的发言请求。

我尝试通过以下方式更改警报边界:

pVoice->SetAlertBoundary(SPEI_PHONEME);

它不起作用。

有NVDA屏幕阅读器解决了这个问题,但我不知道他们是如何做到的。

无论如何都有解决我的问题?

编辑:这是我的完整代码。我正在创建一个使用UIAutomation和MSAA的小屏幕阅读器程序。在比较UI对象时,程序可能有些不稳定,但大多数时候它都有效。

screeenreader.h:

#ifndef _SCREENREADER_H_
#define _SCREENREADER_H_

#define WIN32_LEAN_AND_MEAN
#ifndef UNICODE
#define UNICODE
#endif

#include <windows.h>

#include <memory>

#include "speechsynthesis.h"
#include "uiautomator.h"

class ScreenReader
{
public:
    explicit ScreenReader(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR pScmdline, int iCmdShow);
    virtual ~ScreenReader();

    LRESULT CALLBACK MessageHandler(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
    int Exec();
private:
    void InitializeWindows();
    void InitRawInputDevices();
    bool IsMouseMove();
private:
    LPCWSTR m_applicationName;
    HINSTANCE m_hInstance;
    HINSTANCE m_hPrevInstance;
    PSTR m_pScmdline;
    int m_iCmdShow;

    HWND m_hWnd;

    SpeechSynthesis *m_pSpeech;
    UIAutomator *m_pAutomator;

    RAWINPUTDEVICE rid[2];

    LONG m_prevMouseX;
    LONG m_prevMouseY;

    BSTR currItem;
};

static ScreenReader *application;

static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);

#endif

screenreader.cpp:在这部分我在messageloop部分调用了ISpVoice。在ScreenReader::MessageHandler()处于IsMouseMove状态。

#include "screenreader.h"



ScreenReader::ScreenReader(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR pScmdline, int iCmdShow)
{
    CoInitialize(NULL);
    m_pSpeech = new SpeechSynthesis;
    m_pAutomator = new UIAutomator;

    // Get current Cursor position.
    POINT pt;
    GetCursorPos(&pt);
    m_prevMouseX = pt.x;
    m_prevMouseY = pt.y;

    // Notify user the program is loading.
    m_pSpeech->Speak(L"Loading Rescan. Please wait.", SPF_DEFAULT, NULL);
    m_hInstance = hInstance;
    m_hPrevInstance = hPrevInstance;
    m_pScmdline = pScmdline;
    m_iCmdShow = iCmdShow;

    application = this;
    InitializeWindows();
    InitRawInputDevices();
}


ScreenReader::~ScreenReader()
{
    if (m_pSpeech != nullptr)
    {
        delete m_pSpeech;
        m_pSpeech = nullptr;
    }
    if (m_pAutomator != nullptr)
    {
        delete m_pAutomator;
        m_pAutomator = nullptr;
    }
    if (currItem != NULL)
    {
        SysFreeString(currItem);
        currItem = NULL;
    }
    CoUninitialize();
}

LRESULT CALLBACK ScreenReader::MessageHandler(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    switch (message)
    {
    case WM_INPUT:
    {
        UINT dwSize;
        GetRawInputData(
            (HRAWINPUT)lParam,
            RID_INPUT,
            NULL,
            &dwSize,
            sizeof(RAWINPUTHEADER)
        );
        std::unique_ptr<BYTE[]> lpb(new BYTE[dwSize]);
        if (!lpb)
            return 0;
        if (GetRawInputData(
            (HRAWINPUT)lParam,
            RID_INPUT,
            lpb.get(),
            &dwSize,
            sizeof(RAWINPUTHEADER)
        ) != dwSize)
            OutputDebugString(L"GetRawInputData does not return correct size!\n");

        RAWINPUT *raw = (RAWINPUT*)lpb.get();
        if (raw->header.dwType == RIM_TYPEKEYBOARD)
        {
            UINT mess = raw->data.keyboard.Message;
            UINT vKey = raw->data.keyboard.VKey;
            if (mess == WM_KEYDOWN)
            {
            }
        }
        else if (raw->header.dwType == RIM_TYPEMOUSE)
        {
            if (IsMouseMove())
            {
                BSTR item;
                HRESULT hr = m_pAutomator->GetUIAutomationItemNameAtMousePoint(&item);
                if (item == NULL)
                    return 0;
                if (currItem == NULL)
                    currItem = SysAllocString(item);
                if (wcscmp(currItem, item) != 0)
                {
                    m_pSpeech->Stop();
                    m_pSpeech->Speak(item);
                    if (currItem != NULL)
                        SysFreeString(currItem);
                    currItem = SysAllocString(item);
                }
                SysFreeString(item);
            }
        }
    }
        return 0;
    default:
        return DefWindowProc(hWnd, message, wParam, lParam);
    }
}

int ScreenReader::Exec()
{
    MSG msg;

    ShowWindow(m_hWnd, m_iCmdShow);

    // Tell the user that the program is ready.
    m_pSpeech->Speak(L"Rescan ready.", SPF_PURGEBEFORESPEAK);

    // The message loop
    while (GetMessage(&msg, NULL, 0, 0))
    {
        TranslateMessage(&msg);
        DispatchMessage(&msg);
    }

    return msg.wParam;
}

void ScreenReader::InitializeWindows()
{
    // Create Window class.
    WNDCLASSEX wc;

    m_applicationName = L"Rescan Screen Reader";

    wc.cbSize = sizeof(WNDCLASSEX);
    wc.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
    wc.lpfnWndProc = WndProc;
    wc.cbClsExtra = 0;
    wc.cbWndExtra = 0;
    wc.hInstance = m_hInstance;
    wc.hIcon = LoadIcon(NULL, IDI_APPLICATION);
    wc.hCursor = LoadCursor(NULL, IDC_ARROW);
    wc.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1);
    wc.lpszMenuName = NULL;
    wc.lpszClassName = m_applicationName;
    wc.hIconSm = wc.hIcon;

    // Register the window class.
    RegisterClassEx(&wc);

    m_hWnd = CreateWindowEx(
        WS_EX_OVERLAPPEDWINDOW,
        m_applicationName,
        L"Rescan Screen Reader",
        WS_CAPTION | WS_MINIMIZEBOX | WS_OVERLAPPED | WS_SYSMENU,
        (GetSystemMetrics(SM_CXSCREEN) - 500) / 2,
        (GetSystemMetrics(SM_CYSCREEN) - 300) / 2,
        500,
        300,
        NULL,
        NULL,
        m_hInstance,
        NULL
    );
}

void ScreenReader::InitRawInputDevices()
{
    // Initialize Keyboard
    rid[0].usUsagePage = 0x01;
    rid[0].usUsage = 0x06;
    rid[0].dwFlags = RIDEV_INPUTSINK;
    rid[0].hwndTarget = m_hWnd;
    // Initialize Mouse
    rid[1].usUsagePage = 0x01;
    rid[1].usUsage = 0x02;
    rid[1].dwFlags = RIDEV_INPUTSINK;
    rid[1].hwndTarget = m_hWnd;

    // Register RIDs
    RegisterRawInputDevices(rid, 2, sizeof(RAWINPUTDEVICE));
}

bool ScreenReader::IsMouseMove()
{
    POINT pt;
    GetCursorPos(&pt);
    bool result = !(m_prevMouseX == pt.x && m_prevMouseY == pt.y);
    m_prevMouseX = pt.x;
    m_prevMouseY = pt.y;
    return result;
}

LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    switch (message)
    {
    case WM_QUIT:
        PostQuitMessage(0);
        return 0;
    case WM_DESTROY:
        PostQuitMessage(0);
        return 0;
    default:
        return application->MessageHandler(hWnd, message, wParam, lParam);
    }
}

我将ISpVoice包装到SpeechSynthesis类中。

speechsynthesis.h:

#ifndef _SPEECHSYNTHESIS_H_
#define _SPEECHSYNTHESIS_H_

#pragma warning(disable :  4996)

#define SPCAT_VOICES_ONECORE L"HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices"

#include <sapi.h>
#include <sphelper.h>
#include <atlbase.h>

class SpeechSynthesis
{
public:
    SpeechSynthesis();
    ~SpeechSynthesis();

    HRESULT Speak(LPCWSTR pwcs, DWORD dwFlags = SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, ULONG *pulStreamNumber = NULL);
    HRESULT Resume();
    HRESULT Pause();
    HRESULT Stop();

    ISpVoice* getVoice();

private:
    CComPtr<ISpObjectToken> cpVoiceToken;
    CComPtr<IEnumSpObjectTokens> cpEnum;
    ISpVoice* pVoice;
    ULONG count;
};

#endif

speechsynthesis.cpp:

#include "speechsynthesis.h"



SpeechSynthesis::SpeechSynthesis()
{
    HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);
    if (SUCCEEDED(hr))
        hr = SpEnumTokens(SPCAT_VOICES_ONECORE, NULL, NULL, &cpEnum);
    if (SUCCEEDED(hr))
        hr = cpEnum->GetCount(&count);
    if (SUCCEEDED(hr))
    {
        cpEnum->Item(1, &cpVoiceToken);
        pVoice->SetPriority(SPVPRIORITY::SPVPRI_ALERT);
        pVoice->SetAlertBoundary(SPEI_PHONEME);
        pVoice->SetOutput(NULL, TRUE);
        pVoice->SetVoice(cpVoiceToken);
    }
    if (FAILED(hr))
    {
        MessageBox(NULL, "A fatal error has occured", "Error Message", MB_ABORTRETRYIGNORE);
    }
}


SpeechSynthesis::~SpeechSynthesis()
{
    pVoice->Release();
}

HRESULT SpeechSynthesis::Speak(LPCWSTR pwcs, DWORD dwFlags, ULONG *pulStreamNumber)
{
    return pVoice->Speak(pwcs, dwFlags, pulStreamNumber);
}

HRESULT SpeechSynthesis::Resume()
{
    return pVoice->Resume();
}

HRESULT SpeechSynthesis::Pause()
{
    return pVoice->Pause();
}

HRESULT SpeechSynthesis::Stop()
{
    return Speak(NULL);
}

ISpVoice * SpeechSynthesis::getVoice()
{
    return pVoice;
}

uiautomator.h

#ifndef _UIAUTOMATOR_H_
#define _UIAUTOMATOR_H_

#include <windows.h>
#include <oleacc.h>
#include <uiautomation.h>

#pragma comment(lib, "oleacc.lib")

class UIAutomator
{
public:
    UIAutomator();
    ~UIAutomator();

    HRESULT GetItemNameAtMousePoint(BSTR *pStr);
    HRESULT GetUIAutomationItemNameAtMousePoint(BSTR *pStr);
private:
    HRESULT InitUIAutomation();

private:
    IUIAutomation *m_automation;
};

#endif

uiautomator.cpp

#include "uiautomator.h"



UIAutomator::UIAutomator()
{
    SetProcessDPIAware();
    HRESULT hr = InitUIAutomation();
}


UIAutomator::~UIAutomator()
{
}

HRESULT UIAutomator::GetItemNameAtMousePoint(BSTR * pStr)
{
    POINT pt;
    GetPhysicalCursorPos(&pt);
    VARIANT varItem;
    IAccessible *pAcc;
    HRESULT hr = AccessibleObjectFromPoint(pt, &pAcc, &varItem);
    if (SUCCEEDED(hr))
    {
        hr = pAcc->get_accName(varItem, pStr);
        VariantClear(&varItem);
        pAcc->Release();
    }
    return hr;
}

HRESULT UIAutomator::GetUIAutomationItemNameAtMousePoint(BSTR * pStr)
{
    CONTROLTYPEID id;
    POINT pt;
    IUIAutomationElement *elem;
    VARIANT val;
    GetCursorPos(&pt);
    HRESULT hr = m_automation->ElementFromPoint(pt, &elem);
    if (SUCCEEDED(hr))
    {
        hr = elem->get_CurrentControlType(&id);
        if (SUCCEEDED(hr))
        {
            if (id == UIA_PaneControlTypeId)
                GetItemNameAtMousePoint(pStr);
            else if (id == UIA_EditControlTypeId)
            {
                hr = elem->GetCurrentPropertyValue(UIA_ValueValuePropertyId, &val);
                if (SUCCEEDED(hr))
                {
                    *pStr = SysAllocString(val.bstrVal);
                    VariantClear(&val);
                }
            }
            else
            {
                hr = elem->get_CurrentName(pStr);
            }
        }
        elem->Release();
    }
    return hr;
}

HRESULT UIAutomator::InitUIAutomation()
{
    HRESULT hr = CoCreateInstance(__uuidof(CUIAutomation), NULL, CLSCTX_INPROC_SERVER,
        __uuidof(IUIAutomation), (void**)&m_automation);
    return hr;
}

main.cpp中

#include "vld.h"
#include "screenreader.h"
#include <memory>

int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR pScmdline, int iCmdShow)
{
    std::unique_ptr<ScreenReader> app(new ScreenReader(hInstance, hPrevInstance, pScmdline, iCmdShow));
    return app->Exec();
}

If you don't have time to compile here's the program.

如果您启动它并将鼠标悬停在程序窗口上,则突出显示最小化和关闭按钮时会出现延迟。有时,当您将鼠标悬停在另一个物体上时,tts不会立即停止。

将此与NVDA屏幕阅读器进行比较。你会发现很大的不同。

c++ winapi text-to-speech sapi
2个回答
0
投票

无论有没有设置SetAlertBoundary(SPEI_PHONEME),它都适用于我。

以下是我的测试代码,你可以尝试一下。

HRESULT hr = ::CoInitialize(nullptr);
if (FAILED(hr))
{
    return EXIT_FAILURE;
}
std::wstring text;

text = L"This is a sentence.";

CComPtr<ISpVoice> cpVoice;
// Create a SAPI voice
hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
//cpVoice->SetAlertBoundary(SPEI_PHONEME);
// set the output to the default audio device
if (SUCCEEDED(hr))
{
    hr = cpVoice->SetOutput(NULL, TRUE);
}
// Speak the text
if (SUCCEEDED(hr))
{
    hr = cpVoice->Speak(text.c_str(), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
}

text = L"The third type, a logarithm of the unsigned fold change, is undoubtedly the most tractable.";

Sleep(600);

hr = cpVoice->Pause();      
hr = cpVoice->Resume();
hr = cpVoice->Speak(text.c_str(), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);

Sleep(10000);

::CoUninitialize();
if (SUCCEEDED(hr))
{
    return EXIT_SUCCESS;
}
return EXIT_FAILURE;

0
投票

我终于明白了!

CComPtr<ISpAudio> audio;
CSpStreamFormat format;
format.AssignFormat(SPSF_11kHz8BitMono);

初始化音频

SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOOUT, &audio);

然后,设置其格式并将其设置为输出到pVoice。

audio->SetFormat(format.FormatId(), format.WaveFormatExPtr());
pVoice->SetOutput(audio, FALSE);

现在我可以访问音频流了!

现在立即停止音频,请致电:

audio->SetState(SPAS_STOP, 0);

然后再说一遍:

audio->SetState(SPAS_RUN, 0);
pVoice->Speak(L"This is a sentence", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
© www.soinside.com 2019 - 2024. All rights reserved.