From b1288d9d3e78402d5577f7d4e64be5792697c3b4 Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Wed, 7 Jan 2026 00:43:35 -0700 Subject: [PATCH 1/3] feat(stt): add NVIDIA Canary STT engine support Add support for NVIDIA's Canary speech-to-text models via NeMo toolkit: - Canary 1B v2: 4.89% WER, 630x RTF (5x faster than Whisper) - Canary Qwen 2.5B: Higher accuracy variant for demanding use cases Both models use NeMo's EncDecMultiTaskModel architecture with automatic model download via HuggingFace. Supports GPU acceleration (CUDA/ROCm), translation (s2t_translation), and punctuation restoration. New files: - src/canary_engine.hpp: Engine class definition - src/canary_engine.cpp: NeMo Python integration via py_executor Modified: - models_manager.h/cpp: Add stt_canary engine type and feature flags - speech_service.cpp: Engine instantiation and type checking - CMakeLists.txt: Add canary_engine source files - config/models.json: Add both Canary model entries Requires: pip install nemo_toolkit[asr] --- CMakeLists.txt | 2 + config/models.json | 39 ++++- src/canary_engine.cpp | 389 +++++++++++++++++++++++++++++++++++++++++ src/canary_engine.hpp | 56 ++++++ src/models_manager.cpp | 35 +++- src/models_manager.h | 3 + src/speech_service.cpp | 13 +- 7 files changed, 524 insertions(+), 13 deletions(-) create mode 100644 src/canary_engine.cpp create mode 100644 src/canary_engine.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index dce3bdd2..bdbc4e73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -374,6 +374,8 @@ set(dsnote_lib_sources ${sources_dir}/app_server.cpp ${sources_dir}/fasterwhisper_engine.hpp ${sources_dir}/fasterwhisper_engine.cpp + ${sources_dir}/canary_engine.hpp + ${sources_dir}/canary_engine.cpp ${sources_dir}/mimic3_engine.hpp ${sources_dir}/mimic3_engine.cpp ${sources_dir}/april_engine.hpp diff --git a/config/models.json b/config/models.json index 54a56e48..1fb6d896 100644 --- a/config/models.json +++ b/config/models.json @@ -35916,14 +35916,37 @@ "engine": "stt_whisper", "lang_id": "yo" }, - { - "name": "中文 (FasterWhisper)", - "id": "zh_fasterwhisper", - "engine": "stt_fasterwhisper", - "lang_id": "zh" - }, - { - "name": "中文 (WhisperCpp)", + { + "name": "中文 (FasterWhisper)", + "id": "zh_fasterwhisper", + "engine": "stt_fasterwhisper", + "lang_id": "zh" + }, + { + "name": "Multilingual (Canary 1B v2)", + "model_id": "multilang_canary_1b_v2", + "engine": "stt_canary", + "lang_id": "multilang", + "info": "NVIDIA Canary 1B v2 - 4.89% WER, 5x faster than Whisper (RTFx 630), best accuracy-per-watt", + "options": "ti", + "score": 5, + "features": ["high_quality", "medium_processing", "stt_punctuation"], + "default_for_lang": true, + "hidden": false + }, + { + "name": "Multilingual (Canary Qwen 2.5B)", + "model_id": "multilang_canary_qwen", + "engine": "stt_canary", + "lang_id": "multilang", + "info": "NVIDIA Canary Qwen 2.5B - Larger model for maximum accuracy", + "options": "ti", + "score": 4, + "features": ["high_quality", "slow_processing", "stt_punctuation"], + "hidden": false + }, + { + "name": "中文 (WhisperCpp)", "id": "zh_whisper", "engine": "stt_whisper", "lang_id": "zh" diff --git a/src/canary_engine.cpp b/src/canary_engine.cpp new file mode 100644 index 00000000..8530ddee --- /dev/null +++ b/src/canary_engine.cpp @@ -0,0 +1,389 @@ +/* Copyright (C) 2024-2025 Cole Leavitt + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "canary_engine.hpp" + +#include +#include + +#include +#include +#include +#include + +#include "cpu_tools.hpp" +#include "gpu_tools.hpp" +#include "logger.hpp" +#include "py_executor.hpp" +#include "text_tools.hpp" + +using namespace pybind11::literals; + +canary_engine::canary_engine(config_t config, callbacks_t call_backs) + : stt_engine{std::move(config), std::move(call_backs)} { + m_speech_buf.reserve(m_speech_max_size); + m_auto_lang = m_config.lang == "auto"; +} + +canary_engine::~canary_engine() { + LOGD("canary dtor"); + stop(); +} + +void canary_engine::stop() { + stt_engine::stop(); + + auto task = py_executor::instance()->execute([&]() { + try { + m_model.reset(); + py::module_::import("gc").attr("collect")(); + } catch (const std::exception& err) { + LOGE("py error: " << err.what()); + } + return std::any{}; + }); + + if (task) task->get(); + + LOGD("canary stopped"); +} + +void canary_engine::push_buf_to_audio_buf( + const std::vector& buf, + audio_buf_t& audio_buf) { + std::transform(buf.cbegin(), buf.cend(), std::back_inserter(audio_buf), + [](auto sample) { + return static_cast(sample) / + 32768.0F; + }); +} + +void canary_engine::push_buf_to_audio_buf(in_buf_t::buf_t::value_type* data, + in_buf_t::buf_t::size_type size, + audio_buf_t& audio_buf) { + audio_buf.reserve(audio_buf.size() + size); + for (size_t i = 0; i < size; ++i) { + audio_buf.push_back(static_cast(data[i]) / + 32768.0F); + } +} + +void canary_engine::reset_impl() { m_speech_buf.clear(); } + +void canary_engine::stop_processing_impl() { LOGD("canary cancel"); } + +void canary_engine::start_processing_impl() { create_model(); } + +void canary_engine::create_model() { + if (m_model) return; + + LOGD("creating canary model"); + + auto task = py_executor::instance()->execute([&]() { + auto n_threads = static_cast( + std::min(m_config.cpu_threads, + std::max(1U, std::thread::hardware_concurrency()))); + auto use_cuda = + m_config.use_gpu && ((m_config.gpu_device.api == gpu_api_t::cuda && + gpu_tools::has_cudnn()) || + (m_config.gpu_device.api == gpu_api_t::rocm && + gpu_tools::has_hip())); + + LOGD("cpu info: arch=" << cpu_tools::arch() + << ", cores=" << std::thread::hardware_concurrency()); + LOGD("using threads: " << n_threads << "/" + << std::thread::hardware_concurrency()); + LOGD("using device: " << (use_cuda ? "cuda" : "cpu") << " " + << m_config.gpu_device.id); + + try { + auto torch = py::module_::import("torch"); + auto nemo_asr = py::module_::import("nemo.collections.asr"); + auto os_path = py::module_::import("os.path"); + + std::string device = use_cuda ? "cuda:" + std::to_string(m_config.gpu_device.id) : "cpu"; + + std::string model_path = m_config.model_files.model_file; + bool is_local_path = !model_path.empty() && + os_path.attr("exists")(model_path).cast(); + bool is_hf_model = !model_path.empty() && + model_path.find('/') != std::string::npos && + !is_local_path; + + std::string pretrained_name = "nvidia/canary-1b-v2"; + if (model_path.find("qwen") != std::string::npos || + model_path.find("2.5b") != std::string::npos || + model_path.find("2_5b") != std::string::npos) { + pretrained_name = "nvidia/canary-qwen-2.5b"; + } + + LOGD("canary model_path: " << model_path); + LOGD("canary is_local: " << is_local_path << ", is_hf: " << is_hf_model); + LOGD("canary pretrained_name: " << pretrained_name); + + py::object model; + if (is_local_path) { + model = nemo_asr.attr("models").attr("EncDecMultiTaskModel") + .attr("restore_from")(model_path); + } else if (is_hf_model) { + model = nemo_asr.attr("models").attr("EncDecMultiTaskModel") + .attr("from_pretrained")(model_path); + } else { + model = nemo_asr.attr("models").attr("EncDecMultiTaskModel") + .attr("from_pretrained")(pretrained_name); + } + + model.attr("to")(device); + model.attr("eval")(); + + if (use_cuda) { + torch.attr("cuda").attr("empty_cache")(); + } + + m_model.emplace(std::move(model)); + return true; + } catch (const std::exception& err) { + LOGE("py error: " << err.what()); + m_model.reset(); + return false; + } + }); + + if (!task || !std::any_cast(task->get())) { + LOGE("failed to create canary model"); + throw std::runtime_error{"failed to create canary model"}; + } + + LOGD("canary model created"); +} + +stt_engine::samples_process_result_t canary_engine::process_buff() { + if (!lock_buff_for_processing()) + return samples_process_result_t::wait_for_samples; + + auto eof = m_in_buf.eof; + auto sof = m_in_buf.sof; + + LOGD("process samples buf: mode=" + << m_config.speech_mode << ", in-buf size=" << m_in_buf.size + << ", speech-buf size=" << m_speech_buf.size() << ", sof=" << sof + << ", eof=" << eof); + + if (sof) { + m_speech_buf.clear(); + m_start_time.reset(); + m_vad.reset(); + reset_segment_counters(); + } + + m_denoiser.process(m_in_buf.buf.data(), m_in_buf.size); + + const auto& vad_buf = + m_vad.remove_silence(m_in_buf.buf.data(), m_in_buf.size); + + bool vad_status = !vad_buf.empty(); + + if (vad_status) { + LOGD("vad: speech detected"); + + if (m_config.speech_mode != speech_mode_t::manual && + m_config.speech_mode != speech_mode_t::single_sentence) + set_speech_detection_status( + speech_detection_status_t::speech_detected); + + if (m_config.text_format == text_format_t::raw) + push_buf_to_audio_buf(vad_buf, m_speech_buf); + else + push_buf_to_audio_buf(m_in_buf.buf.data(), m_in_buf.size, + m_speech_buf); + + restart_sentence_timer(); + } else { + LOGD("vad: no speech"); + + if (m_config.speech_mode == speech_mode_t::single_sentence && + m_speech_buf.empty() && sentence_timer_timed_out()) { + LOGD("sentence timeout"); + m_call_backs.sentence_timeout(); + } + + if (m_config.speech_mode == speech_mode_t::automatic) + set_speech_detection_status(speech_detection_status_t::no_speech); + + if (m_speech_buf.empty()) + m_segment_time_discarded_before += + (1000 * m_in_buf.size) / m_sample_rate; + else + m_segment_time_discarded_after += + (1000 * m_in_buf.size) / m_sample_rate; + } + + m_in_buf.clear(); + + auto decode_samples = [&] { + if (m_speech_buf.size() > m_speech_max_size) { + LOGD("speech buf reached max size"); + return true; + } + + if (m_speech_buf.empty()) return false; + + if ((m_config.speech_mode == speech_mode_t::manual || + m_speech_detection_status == + speech_detection_status_t::speech_detected) && + vad_status && !eof) + return false; + + if ((m_config.speech_mode == speech_mode_t::manual || + m_config.speech_mode == speech_mode_t::single_sentence) && + m_speech_detection_status == speech_detection_status_t::no_speech && + !eof) + return false; + + return true; + }(); + + if (!decode_samples) { + if (eof || (m_config.speech_mode == speech_mode_t::manual && + m_speech_detection_status == + speech_detection_status_t::no_speech)) { + flush(eof ? flush_t::eof : flush_t::regular); + free_buf(); + return samples_process_result_t::no_samples_needed; + } + + free_buf(); + return samples_process_result_t::wait_for_samples; + } + + if (m_thread_exit_requested) { + free_buf(); + return samples_process_result_t::no_samples_needed; + } + + set_state(state_t::decoding); + + if (!vad_status) { + set_speech_detection_status(speech_detection_status_t::no_speech); + } + + LOGD("speech frame: samples=" << m_speech_buf.size()); + + m_segment_time_offset += m_segment_time_discarded_before; + m_segment_time_discarded_before = 0; + + decode_speech(m_speech_buf); + + m_segment_time_offset += (m_segment_time_discarded_after + + (1000 * m_speech_buf.size() / m_sample_rate)); + m_segment_time_discarded_after = 0; + + set_state(state_t::idle); + + if (m_config.speech_mode == speech_mode_t::single_sentence && + (!m_intermediate_text || m_intermediate_text->empty())) { + LOGD("no speech decoded, forcing sentence timeout"); + m_call_backs.sentence_timeout(); + } + + m_speech_buf.clear(); + + flush(eof || m_config.speech_mode == speech_mode_t::single_sentence + ? flush_t::eof + : flush_t::regular); + + free_buf(); + + return samples_process_result_t::wait_for_samples; +} + +void canary_engine::decode_speech(const audio_buf_t& buf) { + LOGD("speech decoding started"); + + create_model(); + + auto decoding_start = std::chrono::steady_clock::now(); + + auto task = py_executor::instance()->execute([&]() { + try { + py::array_t array(buf.size()); + auto r = array.mutable_unchecked<1>(); + for (py::ssize_t i = 0; i < r.shape(0); ++i) r(i) = buf[i]; + + auto torch = py::module_::import("torch"); + auto sf = py::module_::import("soundfile"); + auto tempfile = py::module_::import("tempfile"); + auto os = py::module_::import("os"); + + auto tmp_dir = tempfile.attr("gettempdir")(); + auto tmp_path = py::str(tmp_dir) + py::str("/canary_temp.wav"); + + sf.attr("write")(tmp_path, array, m_sample_rate); + + std::string source_lang = m_auto_lang ? "en" : m_config.lang; + std::string target_lang = m_config.translate ? "en" : source_lang; + std::string task_type = m_config.translate ? "s2t_translation" : "asr"; + + py::list paths; + paths.append(tmp_path); + + auto result = m_model->attr("transcribe")( + paths, + "batch_size"_a = 1, + "source_lang"_a = source_lang, + "target_lang"_a = target_lang, + "task"_a = task_type, + "pnc"_a = m_config.has_option('i') + ); + + os.attr("unlink")(tmp_path); + + std::string text; + if (py::isinstance(result) && py::len(result) > 0) { + text = result[py::int_(0)].cast(); + } + + rtrim(text); + ltrim(text); + + std::string auto_lang = m_auto_lang ? "en" : m_config.lang; + + return std::pair(std::move(text), + std::move(auto_lang)); + } catch (const std::exception& err) { + LOGE("canary py error: " << err.what()); + return std::pair({}, {}); + } + }); + + if (!task) return; + + auto [text, auto_lang] = + std::any_cast>(task->get()); + + if (m_thread_exit_requested) return; + + auto stats = report_stats( + buf.size(), m_sample_rate, + static_cast(std::max( + 0L, static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now() - decoding_start) + .count())))); + + auto result = merge_texts(m_intermediate_text.value_or(std::string{}), + std::move(text)); + + if (m_config.insert_stats) result.append(" " + stats); + +#ifdef DEBUG + LOGD("speech decoded: text=" << result); +#endif + + if (!m_intermediate_text || m_intermediate_text != result) + set_intermediate_text(result, auto_lang); +} diff --git a/src/canary_engine.hpp b/src/canary_engine.hpp new file mode 100644 index 00000000..d4ec807d --- /dev/null +++ b/src/canary_engine.hpp @@ -0,0 +1,56 @@ +/* Copyright (C) 2024-2025 Cole Leavitt + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef CANARY_ENGINE_H +#define CANARY_ENGINE_H + +#undef slots +#include +#include +#define slots Q_SLOTS + +#include +#include +#include +#include + +#include "stt_engine.hpp" + +namespace py = pybind11; + +class canary_engine : public stt_engine { + public: + canary_engine(config_t config, callbacks_t call_backs); + ~canary_engine() override; + + private: + using audio_buf_t = std::vector; + + inline static const size_t m_speech_max_size = m_sample_rate * 30; + inline static const int m_threads = 8; + + std::optional m_model; + audio_buf_t m_speech_buf; + bool m_auto_lang = false; + + void create_model(); + samples_process_result_t process_buff() override; + void decode_speech(const audio_buf_t& buf); + static void push_buf_to_audio_buf( + const std::vector& buf, + audio_buf_t& audio_buf); + static void push_buf_to_audio_buf(in_buf_t::buf_t::value_type* data, + in_buf_t::buf_t::size_type size, + audio_buf_t& audio_buf); + + void reset_impl() override; + void stop_processing_impl() override; + void start_processing_impl() override; + void stop(); +}; + +#endif diff --git a/src/models_manager.cpp b/src/models_manager.cpp index edb20718..35f8311c 100644 --- a/src/models_manager.cpp +++ b/src/models_manager.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2021-2025 Michal Kosciesza +/* Copyright (C) 2021-2025 Michal Kosciesza * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this @@ -136,6 +136,7 @@ QDebug operator<<(QDebug d, models_manager::feature_flags flags) { if (flags & models_manager::engine_stt_fasterwhisper) d << "engine-stt-fasterwhisper, "; if (flags & models_manager::engine_stt_april) d << "engine-stt-april, "; + if (flags & models_manager::engine_stt_canary) d << "engine-stt-canary, "; if (flags & models_manager::engine_tts_espeak) d << "engine-tts-espeak, "; if (flags & models_manager::engine_tts_piper) d << "engine-tts-piper, "; if (flags & models_manager::engine_tts_rhvoice) d << "engine-tts-rhvoice, "; @@ -173,6 +174,9 @@ QDebug operator<<(QDebug d, models_manager::model_engine_t engine) { case models_manager::model_engine_t::stt_april: d << "stt-april"; break; + case models_manager::model_engine_t::stt_canary: + d << "stt-canary"; + break; case models_manager::model_engine_t::ttt_hftc: d << "ttt-hftc"; break; @@ -262,6 +266,7 @@ QDebug operator<<(QDebug d, if (models_availability.tts_kokoro_ja) d << "tts_kokoro_ja,"; if (models_availability.tts_kokoro_zh) d << "tts_kokoro_zh,"; if (models_availability.stt_fasterwhisper) d << "stt_fasterwhisper,"; + if (models_availability.stt_canary) d << "stt_canary,"; if (models_availability.stt_ds) d << "stt_ds,"; if (models_availability.stt_vosk) d << "stt_vosk,"; if (models_availability.stt_whispercpp) d << "stt_whispercpp,"; @@ -1581,6 +1586,7 @@ bool models_manager::is_modelless_engine(model_engine_t engine) { case model_engine_t::stt_whisper: case model_engine_t::stt_fasterwhisper: case model_engine_t::stt_april: + case model_engine_t::stt_canary: case model_engine_t::ttt_hftc: case model_engine_t::ttt_tashkeel: case model_engine_t::ttt_unikud: @@ -1606,6 +1612,7 @@ bool models_manager::is_ignore_on_sfos(model_engine_t engine, case model_engine_t::ttt_tashkeel: case model_engine_t::ttt_unikud: case model_engine_t::stt_fasterwhisper: + case model_engine_t::stt_canary: case model_engine_t::tts_mimic3: case model_engine_t::tts_whisperspeech: case model_engine_t::tts_parler: @@ -1704,6 +1711,7 @@ models_manager::model_role_t models_manager::role_of_engine( case model_engine_t::stt_whisper: case model_engine_t::stt_fasterwhisper: case model_engine_t::stt_april: + case model_engine_t::stt_canary: return model_role_t::stt; case model_engine_t::ttt_hftc: case model_engine_t::ttt_tashkeel: @@ -1736,6 +1744,7 @@ models_manager::model_engine_t models_manager::engine_from_name( if (name == QStringLiteral("stt_fasterwhisper")) return model_engine_t::stt_fasterwhisper; if (name == QStringLiteral("stt_april")) return model_engine_t::stt_april; + if (name == QStringLiteral("stt_canary")) return model_engine_t::stt_canary; if (name == QStringLiteral("ttt_hftc")) return model_engine_t::ttt_hftc; if (name == QStringLiteral("ttt_tashkeel")) return model_engine_t::ttt_tashkeel; @@ -1884,6 +1893,7 @@ models_manager::feature_flags models_manager::add_new_feature( case feature_flags::engine_stt_whisper: case feature_flags::engine_stt_fasterwhisper: case feature_flags::engine_stt_april: + case feature_flags::engine_stt_canary: case feature_flags::engine_tts_espeak: case feature_flags::engine_tts_piper: case feature_flags::engine_tts_rhvoice: @@ -1901,6 +1911,7 @@ models_manager::feature_flags models_manager::add_new_feature( existing_features & feature_flags::engine_stt_whisper || existing_features & feature_flags::engine_stt_fasterwhisper || existing_features & feature_flags::engine_stt_april || + existing_features & feature_flags::engine_stt_canary || existing_features & feature_flags::engine_tts_espeak || existing_features & feature_flags::engine_tts_piper || existing_features & feature_flags::engine_tts_rhvoice || @@ -1965,11 +1976,14 @@ models_manager::feature_flags models_manager::add_implicit_feature_flags( break; case model_engine_t::stt_whisper: case model_engine_t::stt_fasterwhisper: + case model_engine_t::stt_canary: existing_features = add_new_feature(existing_features, engine == model_engine_t::stt_whisper ? feature_flags::engine_stt_whisper - : feature_flags::engine_stt_fasterwhisper); + : engine == model_engine_t::stt_canary + ? feature_flags::engine_stt_canary + : feature_flags::engine_stt_fasterwhisper); if (model_id.contains("tiny")) { existing_features = add_new_feature(existing_features, @@ -2268,7 +2282,8 @@ auto models_manager::extract_models( return model_alias_of; } if (engine != model_engine_t::stt_whisper && - engine != model_engine_t::stt_fasterwhisper) { + engine != model_engine_t::stt_fasterwhisper && + engine != model_engine_t::stt_canary) { return {}; } auto l = model_id.split('_'); @@ -2397,6 +2412,11 @@ auto models_manager::extract_models( qDebug() << "ignoring fasterwhisper model:" << model_id; continue; } + if (!models_availability->stt_canary && + engine == model_engine_t::stt_canary) { + qDebug() << "ignoring canary model:" << model_id; + continue; + } if (!models_availability->stt_ds && engine == model_engine_t::stt_ds) { qDebug() << "ignoring ds model:" << model_id; @@ -2552,7 +2572,8 @@ auto models_manager::extract_models( // add split by words option for all sam tts models model.options.push_back('w'); } else if ((model.engine == model_engine_t::stt_whisper || - model.engine == model_engine_t::stt_fasterwhisper) && + model.engine == model_engine_t::stt_fasterwhisper || + model.engine == model_engine_t::stt_canary) && !model.disabled && !model.hidden && model.options.contains('t') && model.lang_id == "en") { // remove translate to english option for all english models @@ -2780,6 +2801,7 @@ QString models_manager::file_name_from_id(const QString& id, case model_engine_t::ttt_tashkeel: return id + ".ort"; case model_engine_t::stt_fasterwhisper: + case model_engine_t::stt_canary: case model_engine_t::stt_vosk: case model_engine_t::ttt_hftc: case model_engine_t::ttt_unikud: @@ -3022,6 +3044,11 @@ void models_manager::update_models_using_availability_internal() { pair.second.disabled = true; return; } + if (!m_models_availability->stt_canary && + pair.second.engine == model_engine_t::stt_canary) { + pair.second.disabled = true; + return; + } if (!m_models_availability->stt_ds && pair.second.engine == model_engine_t::stt_ds) { pair.second.disabled = true; diff --git a/src/models_manager.h b/src/models_manager.h index 328cd11d..a27073f3 100644 --- a/src/models_manager.h +++ b/src/models_manager.h @@ -49,6 +49,7 @@ class models_manager : public QObject, public singleton { stt_whisper, stt_fasterwhisper, stt_april, + stt_canary, ttt_hftc, ttt_tashkeel, ttt_unikud, @@ -81,6 +82,7 @@ class models_manager : public QObject, public singleton { engine_stt_whisper = 1U << 8U, engine_stt_fasterwhisper = 1U << 9U, engine_stt_april = 1U << 10U, + engine_stt_canary = 1U << 21U, engine_tts_espeak = 1U << 11U, engine_tts_piper = 1U << 12U, engine_tts_rhvoice = 1U << 13U, @@ -190,6 +192,7 @@ class models_manager : public QObject, public singleton { bool tts_kokoro_ja = false; bool tts_kokoro_zh = false; bool stt_fasterwhisper = false; + bool stt_canary = false; bool stt_ds = false; bool stt_vosk = false; bool stt_whispercpp = false; diff --git a/src/speech_service.cpp b/src/speech_service.cpp index 572aecc8..56f68df2 100644 --- a/src/speech_service.cpp +++ b/src/speech_service.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2021-2025 Michal Kosciesza +/* Copyright (C) 2021-2025 Michal Kosciesza * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this @@ -26,6 +26,7 @@ #include "espeak_engine.hpp" #include "f5_engine.hpp" #include "fasterwhisper_engine.hpp" +#include "canary_engine.hpp" #include "file_source.h" #include "gpu_tools.hpp" #include "kokoro_engine.hpp" @@ -1347,6 +1348,8 @@ QString speech_service::restart_stt_engine(speech_mode_t speech_mode, } } else if (model_config->stt->engine == models_manager::model_engine_t::stt_fasterwhisper) { ENGINE_OPTS(fasterwhisper) + } else if (model_config->stt->engine == models_manager::model_engine_t::stt_canary) { + ENGINE_OPTS(canary) } #undef ENGINE_OPTS // clang-format on @@ -1371,6 +1374,10 @@ QString speech_service::restart_stt_engine(speech_mode_t speech_mode, models_manager::model_engine_t::stt_fasterwhisper && type != typeid(fasterwhisper_engine)) return true; + if (model_config->stt->engine == + models_manager::model_engine_t::stt_canary && + type != typeid(canary_engine)) + return true; if (model_config->stt->engine == models_manager::model_engine_t::stt_april && type != typeid(april_engine)) @@ -1463,6 +1470,10 @@ QString speech_service::restart_stt_engine(speech_mode_t speech_mode, m_stt_engine = std::make_unique( std::move(config), std::move(call_backs)); break; + case models_manager::model_engine_t::stt_canary: + m_stt_engine = std::make_unique( + std::move(config), std::move(call_backs)); + break; case models_manager::model_engine_t::stt_april: m_stt_engine = std::make_unique( std::move(config), std::move(call_backs)); From 0cfa962eb9788b78e99fafb0a890a45f2d11b88a Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Wed, 7 Jan 2026 00:46:11 -0700 Subject: [PATCH 2/3] feat(stt): add NeMo availability detection for Canary engine Check for nemo.collections.asr module availability at startup. This enables dsnote to automatically detect if NeMo is installed and show/hide Canary models accordingly in the UI. - py_tools.hpp: Add nemo_asr to libs_availability_t - py_tools.cpp: Add nemo.collections.asr import check - speech_service.cpp: Map nemo_asr availability to stt_canary --- src/py_tools.cpp | 9 +++++++++ src/py_tools.hpp | 1 + src/speech_service.cpp | 1 + 3 files changed, 11 insertions(+) diff --git a/src/py_tools.cpp b/src/py_tools.cpp index a41190b6..9335f6b1 100644 --- a/src/py_tools.cpp +++ b/src/py_tools.cpp @@ -36,6 +36,7 @@ std::ostream& operator<<(std::ostream& os, os << "py-version=" << availability.py_version << ", coqui-tts=" << availability.coqui_tts << ", faster-whisper=" << availability.faster_whisper + << ", nemo-asr=" << availability.nemo_asr << ", ctranslate2-cuda=" << availability.ctranslate2_cuda << ", mimic3-tts=" << availability.mimic3_tts << ", whisperspeech-tts=" << availability.whisperspeech_tts @@ -234,6 +235,14 @@ libs_availability_t libs_availability(libs_scan_type_t scan_type, LOGD("faster-whisper check py error: " << err.what()); } + try { + LOGD("checking: nemo-asr"); + py::module_::import("nemo.collections.asr"); + availability.nemo_asr = true; + } catch (const std::exception& err) { + LOGD("nemo-asr check py error: " << err.what()); + } + try { LOGD("checking: transformers"); py::module_::import("transformers"); diff --git a/src/py_tools.hpp b/src/py_tools.hpp index 7ac76b51..55e4add5 100644 --- a/src/py_tools.hpp +++ b/src/py_tools.hpp @@ -26,6 +26,7 @@ struct libs_availability_t { bool torch_cuda = false; bool torch_hip = false; bool faster_whisper = false; + bool nemo_asr = false; bool ctranslate2_cuda = false; bool mimic3_tts = false; bool whisperspeech_tts = false; diff --git a/src/speech_service.cpp b/src/speech_service.cpp index 56f68df2..a376f207 100644 --- a/src/speech_service.cpp +++ b/src/speech_service.cpp @@ -3310,6 +3310,7 @@ QVariantMap speech_service::features_availability() { /*tts_kokoro_ja=*/py_availability->kokoro_ja, /*tts_kokoro_zh=*/py_availability->kokoro_zh, /*stt_fasterwhisper=*/py_availability->faster_whisper, + /*stt_canary=*/py_availability->nemo_asr, /*stt_ds=*/stt_ds, /*stt_vosk=*/stt_vosk, /*stt_whispercpp=*/stt_whispercpp, From cd44a8458324f4dffbfcba9cc2c01883e20cbffb Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Wed, 7 Jan 2026 01:34:54 -0700 Subject: [PATCH 3/3] feat: migrate from Qt5 to Qt6 - Update CMakeLists.txt to use Qt6 instead of Qt5 - Update cmake/*.cmake files for Qt6 compatibility - Replace deprecated Qt5 APIs with Qt6 equivalents: - QRegExp -> QRegularExpression - QX11Info -> QNativeInterface::QX11Application - QMediaPlayer::State -> QMediaPlayer::PlaybackState - QMediaPlayer::stateChanged -> playbackStateChanged - setMedia(QMediaContent) -> setSource(QUrl) - QAudioInput (recording) -> QAudioSource - QAudioDeviceInfo -> QAudioDevice + QMediaDevices - QAudioFormat::setSampleSize/setCodec -> setSampleFormat - QNetworkRequest::FollowRedirectsAttribute -> RedirectPolicyAttribute - Remove Qt::AA_EnableHighDpiScaling (default in Qt6) - Remove QTextCodec usage - Remove QQuickStyle::availableStyles() (not in Qt6) - Fix GCC 15 type strictness (std::clamp/max int vs qsizetype) - Update qhotkey external project to build with Qt6 --- CMakeLists.txt | 16 ++++---- cmake/dbus_api.cmake | 6 +-- cmake/openblas_pkgconfig.cmake | 2 +- cmake/qhotkey.cmake | 6 +-- cmake/translations.cmake | 4 +- src/app_server.cpp | 10 ++--- src/dbus_application_adaptor.h | 2 +- src/dbus_speech_adaptor.h | 2 +- src/dsnote_app.cpp | 70 +++++++++++++++++----------------- src/fake_keyboard.cpp | 27 +++++++------ src/fake_keyboard.hpp | 4 +- src/main.cpp | 3 +- src/mic_source.cpp | 70 ++++++++++++++-------------------- src/mic_source.h | 4 +- src/models_manager.cpp | 9 +++-- src/module_tools.cpp | 4 +- src/py_tools.cpp | 2 +- src/recorder.cpp | 39 +++++++++---------- src/recorder.hpp | 4 +- src/settings.cpp | 44 ++++++++++++--------- src/settings.h | 4 ++ src/speech_service.cpp | 27 +++++++------ src/speech_service.h | 2 +- src/wl_clipboard.cpp | 2 +- 24 files changed, 184 insertions(+), 179 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bdbc4e73..ec2b64d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -535,17 +535,17 @@ pkg_search_module(pulse REQUIRED libpulse) list(APPEND deps_libs ${pulse_LIBRARIES}) list(APPEND includes ${pulse_INCLUDE_DIRS}) -find_package(Qt5 COMPONENTS Core Network Multimedia Qml Xml Sql Gui Quick DBus LinguistTools REQUIRED) -list(APPEND deps_libs Qt5::Core Qt5::Network Qt5::Multimedia Qt5::Gui Qt5::Quick Qt5::DBus Qt5::Xml) +find_package(Qt6 COMPONENTS Core Network Multimedia Qml Xml Sql Gui Quick DBus LinguistTools REQUIRED) +list(APPEND deps_libs Qt6::Core Qt6::Network Qt6::Multimedia Qt6::Gui Qt6::Quick Qt6::DBus Qt6::Xml) if(WITH_DESKTOP) - find_package(Qt5 COMPONENTS QuickControls2 Widgets REQUIRED) - list(APPEND deps_libs Qt5::QuickControls2 Qt5::Widgets) + find_package(Qt6 COMPONENTS QuickControls2 Widgets REQUIRED) + list(APPEND deps_libs Qt6::QuickControls2 Qt6::Widgets) pkg_search_module(wayland REQUIRED wayland-client) list(APPEND deps_libs ${wayland_LIBRARIES}) list(APPEND includes ${wayland_INCLUDE_DIRS}) - include_directories(${Qt5Gui_PRIVATE_INCLUDE_DIRS}) + include_directories(${Qt6Gui_PRIVATE_INCLUDE_DIRS}) if(BUILD_XKBCOMMON) include(${cmake_path}/xkbcommon.cmake) @@ -567,8 +567,10 @@ if(WITH_DESKTOP) find_package(X11 REQUIRED) list(APPEND deps_libs X11) - find_package(Qt5 COMPONENTS X11Extras REQUIRED) - list(APPEND deps_libs Qt5::X11Extras) + # Qt6: X11Extras removed, functionality now in QtGui via QNativeInterface + pkg_search_module(xcb REQUIRED xcb) + list(APPEND deps_libs ${xcb_LIBRARIES}) + list(APPEND includes ${xcb_INCLUDE_DIRS}) if(BUILD_QHOTKEY) include(${cmake_path}/qhotkey.cmake) diff --git a/cmake/dbus_api.cmake b/cmake/dbus_api.cmake index 93bb5298..74143997 100644 --- a/cmake/dbus_api.cmake +++ b/cmake/dbus_api.cmake @@ -2,12 +2,12 @@ set(dbus_dsnote_interface_file "${PROJECT_BINARY_DIR}/${info_dbus_app_interface} configure_file(${dbus_dir}/dsnote.xml.in ${dbus_dsnote_interface_file}) -find_package(Qt5 COMPONENTS DBus REQUIRED) +find_package(Qt6 COMPONENTS DBus REQUIRED) unset(qdbusxml2cpp_bin CACHE) -find_program(qdbusxml2cpp_bin qdbusxml2cpp) +find_program(qdbusxml2cpp_bin qdbusxml2cpp HINTS ${Qt6_DIR}/../../../libexec ${Qt6_DIR}/../../../bin) if(${qdbusxml2cpp_bin} MATCHES "-NOTFOUND$") - find_program(qdbusxml2cpp_bin qdbusxml2cpp-qt5) + find_program(qdbusxml2cpp_bin qdbusxml2cpp-qt6) if(${qdbusxml2cpp_bin} MATCHES "-NOTFOUND$") message(FATAL_ERROR "qdbusxml2cpp not found but it is required") endif() diff --git a/cmake/openblas_pkgconfig.cmake b/cmake/openblas_pkgconfig.cmake index 4b6f33a6..b2ba9576 100644 --- a/cmake/openblas_pkgconfig.cmake +++ b/cmake/openblas_pkgconfig.cmake @@ -1,6 +1,6 @@ pkg_search_module(openblas openblas) -if(NOT DEFINED ${openblas_FOUND}) +if(NOT openblas_FOUND) # check without pkg-config set(BLA_STATIC OFF) set(BLA_VENDOR "OpenBLAS") diff --git a/cmake/qhotkey.cmake b/cmake/qhotkey.cmake index c454cb96..79c9f258 100644 --- a/cmake/qhotkey.cmake +++ b/cmake/qhotkey.cmake @@ -15,10 +15,10 @@ ExternalProject_Add(qhotkey -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POLICY_VERSION_MINIMUM=3.5 + -DQT_DEFAULT_MAJOR_VERSION=6 BUILD_ALWAYS False ) -find_package(Qt5 COMPONENTS X11Extras REQUIRED) - -list(APPEND deps_libs Qt5::X11Extras "${external_lib_dir}/libqhotkey.a") +# Qt6: X11Extras removed, functionality now in QtGui via QNativeInterface +list(APPEND deps_libs "${external_lib_dir}/libqhotkey.a") list(APPEND deps qhotkey) diff --git a/cmake/translations.cmake b/cmake/translations.cmake index c2642019..5fd1d531 100644 --- a/cmake/translations.cmake +++ b/cmake/translations.cmake @@ -3,7 +3,7 @@ set(enabled_translations ar ca_ES cs de en es fr fr_CA it nl no pl ru sv sl tr_T # finished translations set(enabled_translations ar ca_ES de en es fr fr_CA it nl no pl ru sv sl tr_TR uk zh_CN zh_TW) -find_package(Qt5 COMPONENTS Core LinguistTools) +find_package(Qt6 COMPONENTS Core LinguistTools) set(ts_files "") foreach(lang ${enabled_translations}) @@ -24,6 +24,6 @@ function(ADD_TRANSLATIONS_RESOURCE res_file) set(${res_file} ${_res_file} PARENT_SCOPE) endfunction() -qt5_create_translation(qm_files ${CMAKE_SOURCE_DIR}/src ${desktop_dir}/qml ${sfos_dir}/qml ${ts_files}) +qt_add_translation(qm_files ${ts_files}) add_translations_resource(translations_res ${qm_files}) diff --git a/src/app_server.cpp b/src/app_server.cpp index 4309240a..ce1657fe 100644 --- a/src/app_server.cpp +++ b/src/app_server.cpp @@ -134,7 +134,7 @@ int app_server::request_another_instance(const cmd::options &options) { auto model = qdbus_cast( m.template value()); return std::max(model.contains("id") - ? model.value("id").toString().size() + ? static_cast(model.value("id").toString().size()) : size, size); }); @@ -204,10 +204,10 @@ int app_server::request_another_instance(const cmd::options &options) { g_max_size = std::max(g_max_size, std::max(modelStt.contains("id") - ? modelStt.value("id").toString().size() + ? static_cast(modelStt.value("id").toString().size()) : 1, modelTts.contains("id") - ? modelTts.value("id").toString().size() + ? static_cast(modelTts.value("id").toString().size()) : 1)); print_active_model("STT", g_max_size, modelStt); print_active_model("TTS", g_max_size, modelTts); @@ -215,14 +215,14 @@ int app_server::request_another_instance(const cmd::options &options) { auto modelStt = iface.activeSttModel(); g_max_size = std::max(g_max_size, modelStt.contains("id") - ? modelStt.value("id").toString().size() + ? static_cast(modelStt.value("id").toString().size()) : 1); print_active_model("STT", g_max_size, modelStt); } else if (options.active_model_to_print_role & cmd::role_tts) { auto modelTts = iface.activeTtsModel(); g_max_size = std::max(g_max_size, modelTts.contains("id") - ? modelTts.value("id").toString().size() + ? static_cast(modelTts.value("id").toString().size()) : 1); print_active_model("TTS", g_max_size, modelTts); } diff --git a/src/dbus_application_adaptor.h b/src/dbus_application_adaptor.h index a5816fe4..af185d32 100644 --- a/src/dbus_application_adaptor.h +++ b/src/dbus_application_adaptor.h @@ -19,7 +19,7 @@ class QByteArray; template class QList; template class QMap; class QString; -class QStringList; +// class QStringList; // Removed: typedef in Qt6 class QVariant; QT_END_NAMESPACE diff --git a/src/dbus_speech_adaptor.h b/src/dbus_speech_adaptor.h index 564641c7..056b146e 100644 --- a/src/dbus_speech_adaptor.h +++ b/src/dbus_speech_adaptor.h @@ -19,7 +19,7 @@ class QByteArray; template class QList; template class QMap; class QString; -class QStringList; +// class QStringList; // Removed: typedef in Qt6 class QVariant; QT_END_NAMESPACE diff --git a/src/dsnote_app.cpp b/src/dsnote_app.cpp index 0490590f..ee91ed81 100644 --- a/src/dsnote_app.cpp +++ b/src/dsnote_app.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -477,13 +477,11 @@ dsnote_app::dsnote_app(QObject *parent) } void dsnote_app::create_player() { - m_player = std::make_unique(QObject::parent(), - QMediaPlayer::LowLatency); - m_player->setNotifyInterval(100); + m_player = std::make_unique(QObject::parent()); connect( - m_player.get(), &QMediaPlayer::stateChanged, this, - [this](QMediaPlayer::State state) { + m_player.get(), &QMediaPlayer::playbackStateChanged, this, + [this](QMediaPlayer::PlaybackState state) { qDebug() << "player state changed:" << state; emit player_playing_changed(); }, @@ -738,11 +736,12 @@ settings::trans_rule_flags_t dsnote_app::apply_trans_rule( ? rule.flags : trans_rule_flags_t::TransRuleNone; break; - case trans_rule_type_t::TransRuleTypeMatchRe: - rule_matches = text.contains( - QRegExp{rule.pattern, - case_sens ? Qt::CaseSensitive : Qt::CaseInsensitive}); + case trans_rule_type_t::TransRuleTypeMatchRe: { + QRegularExpression::PatternOptions opts = QRegularExpression::NoPatternOption; + if (!case_sens) opts |= QRegularExpression::CaseInsensitiveOption; + rule_matches = text.contains(QRegularExpression{rule.pattern, opts}); break; + } case trans_rule_type_t::TransRuleTypeReplaceSimple: { rule_matches = text.contains(rule.pattern, case_sens ? Qt::CaseSensitive @@ -757,29 +756,30 @@ settings::trans_rule_flags_t dsnote_app::apply_trans_rule( auto replace = rule.replace; replace.replace("\\n", "\n"); - QRegExp rx{rule.pattern, - case_sens ? Qt::CaseSensitive : Qt::CaseInsensitive}; + QRegularExpression::PatternOptions opts = QRegularExpression::NoPatternOption; + if (!case_sens) opts |= QRegularExpression::CaseInsensitiveOption; + QRegularExpression rx{rule.pattern, opts}; rule_matches = text.contains(rx); if (rule_matches) { if (!rule.replace.contains("\\U") && !replace.contains("\\u")) { text.replace(rx, replace); } else { - int pos = 0; + qsizetype pos = 0; + QRegularExpressionMatch match; while (pos < text.size() && - (pos = rx.indexIn(text, pos)) != -1 && - rx.matchedLength() > 0) { + (match = rx.match(text, pos)).hasMatch()) { QString after = replace; - for (int i = 1; i < rx.captureCount() + 1; ++i) { + for (int i = 1; i <= match.lastCapturedIndex(); ++i) { after.replace(QStringLiteral("\\U\\%1").arg(i), - rx.cap(i).toUpper()); + match.captured(i).toUpper()); after.replace(QStringLiteral("\\u\\%1").arg(i), - rx.cap(i).toLower()); + match.captured(i).toLower()); after.replace(QStringLiteral("\\%1").arg(i), - rx.cap(i)); + match.captured(i)); } - text.replace(pos, rx.matchedLength(), after); - pos += after.size(); + text.replace(match.capturedStart(), match.capturedLength(), after); + pos = match.capturedStart() + after.size(); } } } @@ -868,7 +868,7 @@ QVariantList dsnote_app::test_trans_rule(unsigned int flags, } bool dsnote_app::trans_rule_re_pattern_valid(const QString &pattern) { - return QRegExp{pattern}.isValid(); + return QRegularExpression{pattern}.isValid(); } void dsnote_app::update_trans_rule(int index, unsigned int flags, @@ -1927,7 +1927,7 @@ void dsnote_app::update_available_tts_ref_voices() { QVariantMap new_available_tts_ref_voices_map{}; const auto ref_voices_dir = - QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)} + QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)} .filePath(s_ref_voices_dir_name); auto scan_ref_voices = [&] { @@ -2873,7 +2873,7 @@ void dsnote_app::play_speech() { } void dsnote_app::play_speech_selected(int start, int end) { - auto size = note().size(); + auto size = static_cast(note().size()); if (size == 0) return; @@ -2893,7 +2893,7 @@ void dsnote_app::play_speech_selected(int start, int end) { void dsnote_app::play_speech_translator_selected(int start, int end, bool transtalated) { - auto size = transtalated ? m_translated_text.size() : note().size(); + auto size = static_cast(transtalated ? m_translated_text.size() : note().size()); if (size == 0) return; @@ -3027,7 +3027,7 @@ void dsnote_app::handle_translate_delayed() { } void dsnote_app::translate_selected(int start, int end) { - auto size = note().size(); + auto size = static_cast(note().size()); if (size == 0) return; @@ -5254,7 +5254,7 @@ QString dsnote_app::import_ref_voice_file_path() { void dsnote_app::player_stop_voice_ref() { if (!m_player) return; - m_player->setMedia({}); + m_player->setSource(QUrl{}); m_player_current_voice_ref_idx = -1; emit player_current_voice_ref_idx_changed(); @@ -5329,7 +5329,7 @@ void dsnote_app::player_import_rec() { void dsnote_app::player_set_path(const QString &wav_file_path) { if (!m_player) create_player(); - m_player->setMedia( + m_player->setSource( QUrl{QStringLiteral("gst-pipeline: filesrc location=%1 ! wavparse ! " "audioconvert ! alsasink") .arg(wav_file_path)}); @@ -5351,10 +5351,12 @@ QString dsnote_app::tts_ref_voice_unique_name(QString name, if (!add_number && !names.contains(name)) return name; int i = 1; - QRegExp rx{"\\d+$"}; - if (auto idx = rx.indexIn(name); idx >= 0) { + QRegularExpression rx{QStringLiteral("\\d+$")}; + QRegularExpressionMatch match = rx.match(name); + if (match.hasMatch()) { + auto idx = match.capturedStart(); bool ok = false; - auto ii = name.midRef(idx).toInt(&ok); + auto ii = QStringView{name}.mid(idx).toInt(&ok); if (ok && ii < 99999) { i = ii; name = name.mid(0, idx) + "%1"; @@ -5381,7 +5383,7 @@ void dsnote_app::player_export_ref_voice(long long start, long long stop, const QString &name, const QString &text) { QDir ref_voices_dir{ - QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)} + QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)} .filePath(s_ref_voices_dir_name)}; QString out_file_path; @@ -5428,7 +5430,7 @@ void dsnote_app::player_reset() { if (!m_player) return; QFile{import_ref_voice_file_path()}.remove(); - m_player->setMedia({}); + m_player->setSource(QUrl{}); } bool dsnote_app::player_ready() const { @@ -5487,7 +5489,7 @@ void dsnote_app::recorder_stop() { void dsnote_app::recorder_reset() { m_recorder.reset(); } bool dsnote_app::player_playing() const { - return m_player && m_player->state() == QMediaPlayer::State::PlayingState; + return m_player && m_player->playbackState() == QMediaPlayer::PlaybackState::PlayingState; } void dsnote_app::player_set_position(long long position) { diff --git a/src/fake_keyboard.cpp b/src/fake_keyboard.cpp index 0facd62f..7985ab75 100644 --- a/src/fake_keyboard.cpp +++ b/src/fake_keyboard.cpp @@ -13,7 +13,6 @@ #include #include -#include #include #include #include @@ -567,7 +566,10 @@ void fake_keyboard::init_ydo() { if (!m_xkb_ctx) throw std::runtime_error{"no xkb context"}; #ifdef USE_X11_FEATURES - auto *xcb_conn = QX11Info::connection(); + xcb_connection_t *xcb_conn = nullptr; + if (auto *x11App = qGuiApp->nativeInterface()) { + xcb_conn = x11App->connection(); + } if (xcb_conn) { auto device_id = xkb_x11_get_core_keyboard_device_id(xcb_conn); if (device_id == -1) throw std::runtime_error{"no xkb keyboard"}; @@ -756,10 +758,13 @@ void fake_keyboard::send_text_xdo(const QString &text) { void fake_keyboard::init_legacy() { LOGD("using legacy fake-keyboard"); - m_x11_display = QX11Info::display(); + auto *x11App = qGuiApp->nativeInterface(); + if (!x11App) throw std::runtime_error{"no x11 application"}; + + m_x11_display = x11App->display(); if (!m_x11_display) throw std::runtime_error{"no x11 display"}; - m_xcb_conn = QX11Info::connection(); + m_xcb_conn = x11App->connection(); if (!m_xcb_conn) throw std::runtime_error{"no xcb connection"}; auto device_id = xkb_x11_get_core_keyboard_device_id(m_xcb_conn); @@ -818,11 +823,12 @@ void fake_keyboard::init_legacy() { void fake_keyboard::init_xdo() { LOGD("using xdo fake-keyboard"); - if (!QX11Info::display()) { + auto *x11App = qGuiApp->nativeInterface(); + if (!x11App || !x11App->display()) { LOGF("no x11 display"); } - m_xdo = xdo_new_with_opened_display(QX11Info::display(), nullptr, 0); + m_xdo = xdo_new_with_opened_display(x11App->display(), nullptr, 0); if (!m_xdo) { LOGF("can't create xdo"); } @@ -947,14 +953,13 @@ void fake_keyboard::connect_wayland() { std::lock_guard lock{m_wl_mtx}; - auto *native = QGuiApplication::platformNativeInterface(); - if (!native) { - LOGW("can't get native interface"); + auto *waylandApp = qGuiApp->nativeInterface(); + if (!waylandApp) { + LOGW("can't get wayland native interface"); return; } - m_wl_display = static_cast( - native->nativeResourceForIntegration("display")); + m_wl_display = waylandApp->display(); if (!m_wl_display) { LOGW("can't get wl display interface"); return; diff --git a/src/fake_keyboard.hpp b/src/fake_keyboard.hpp index 968e1598..ddaf5085 100644 --- a/src/fake_keyboard.hpp +++ b/src/fake_keyboard.hpp @@ -21,8 +21,8 @@ #include #ifdef USE_X11_FEATURES -#include - +struct _XDisplay; +typedef struct _XDisplay Display; struct xcb_connection_t; struct xkb_context; struct xkb_keymap; diff --git a/src/main.cpp b/src/main.cpp index 3586c2ea..3f52edfa 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -16,7 +16,7 @@ #include #include #include -#include + #include #include #include @@ -520,7 +520,6 @@ int main(int argc, char* argv[]) { #ifdef USE_SFOS const auto& app = *SailfishApp::application(argc, argv); #else - QCoreApplication::setAttribute(Qt::AA_EnableHighDpiScaling); QApplication app(argc, argv); QGuiApplication::setWindowIcon(QIcon{QStringLiteral(":/app_icon.svg")}); #endif diff --git a/src/mic_source.cpp b/src/mic_source.cpp index 8394e1cf..85fd3f40 100644 --- a/src/mic_source.cpp +++ b/src/mic_source.cpp @@ -8,6 +8,8 @@ #include "mic_source.h" #include +#include +#include #include mic_source::mic_source(const QString& preferred_audio_input, QObject* parent) @@ -33,38 +35,37 @@ void mic_source::stop() { } void mic_source::slowdown() { - // do notning } void mic_source::speedup() { - // do notning } static QAudioFormat audio_format() { QAudioFormat format; format.setSampleRate(16000); format.setChannelCount(1); - format.setSampleSize(16); - format.setCodec(QStringLiteral("audio/pcm")); - format.setByteOrder(QAudioFormat::LittleEndian); - format.setSampleType(QAudioFormat::SignedInt); + format.setSampleFormat(QAudioFormat::Int16); return format; } static bool has_audio_input(const QString& name) { - auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput); + auto ad_list = QMediaDevices::audioInputs(); return std::find_if(ad_list.cbegin(), ad_list.cend(), [&name](const auto& ad) { - return ad.deviceName() == name; + return ad.description() == name; }) != ad_list.cend(); } -static QAudioDeviceInfo audio_input_info(const QString& name) { - auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput); - return *std::find_if( +static QAudioDevice audio_input_info(const QString& name) { + auto ad_list = QMediaDevices::audioInputs(); + auto it = std::find_if( ad_list.cbegin(), ad_list.cend(), - [&name](const auto& ad) { return ad.deviceName() == name; }); + [&name](const auto& ad) { return ad.description() == name; }); + if (it != ad_list.cend()) { + return *it; + } + return QMediaDevices::defaultAudioInput(); } QStringList mic_source::audio_inputs() { @@ -72,12 +73,12 @@ QStringList mic_source::audio_inputs() { auto format = audio_format(); - auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput); + auto ad_list = QMediaDevices::audioInputs(); qDebug() << "supported audio input devices:"; for (const auto& ad : ad_list) { if (ad.isFormatSupported(format)) { - qDebug() << ad.deviceName(); - list.push_back(ad.deviceName()); + qDebug() << ad.description(); + list.push_back(ad.description()); } } @@ -92,34 +93,34 @@ void mic_source::init_audio(const QString& preferred_audio_input) { auto input_name{preferred_audio_input}; if (preferred_audio_input.isEmpty() || !has_audio_input(preferred_audio_input)) { - auto info = QAudioDeviceInfo::defaultInputDevice(); + auto info = QMediaDevices::defaultAudioInput(); if (info.isNull()) { qWarning() << "no audio input"; throw std::runtime_error("no audio input"); } - input_name = info.deviceName(); + input_name = info.description(); } auto input_info = audio_input_info(input_name); if (!input_info.isFormatSupported(format)) { qWarning() << "format not supported for audio input:" - << input_info.deviceName(); + << input_info.description(); throw std::runtime_error("audio format is not supported"); } - qDebug() << "using audio input:" << input_info.deviceName() + qDebug() << "using audio input:" << input_info.description() << "(preferred was " << preferred_audio_input << ")"; - m_audio_input = std::make_unique(input_info, format); + m_audio_input = std::make_unique(input_info, format); - connect(m_audio_input.get(), &QAudioInput::stateChanged, this, + connect(m_audio_input.get(), &QAudioSource::stateChanged, this, &mic_source::handle_state_changed); } void mic_source::start() { m_audio_device = m_audio_input->start(); - m_timer.setInterval(200); // 200 ms + m_timer.setInterval(200); connect(&m_timer, &QTimer::timeout, this, &mic_source::handle_read_timeout); m_timer.start(); } @@ -145,11 +146,6 @@ void mic_source::handle_read_timeout() { if (m_stopped && m_audio_input->state() != QAudio::State::SuspendedState) stop(); - /*bool bytes_available = !m_eof || m_audio_input->bytesReady() > 0; - qDebug() << "mic read timeout: b_avai=" << bytes_available - << "eof=" << m_eof << "ended=" << m_ended << "sof=" << m_sof - << "b_ready=" << m_audio_input->bytesReady();*/ - if (m_ended) { emit ended(); m_timer.stop(); @@ -159,24 +155,12 @@ void mic_source::handle_read_timeout() { emit audio_available(); } -void mic_source::clear() { - qDebug() << "mic clear"; - - char buff[std::numeric_limits::max()]; - while (m_audio_device->read(buff, std::numeric_limits::max())) - continue; -} - -audio_source::audio_data mic_source::read_audio(char* buf, size_t max_size) { +mic_source::audio_data mic_source::read_audio(char* buf, size_t max_size) { audio_data data; data.data = buf; data.sof = m_sof; - bool bytes_available = !m_eof || m_audio_input->bytesReady() > 0; - - /*qDebug() << "read_audio: b_avai=" << bytes_available << "eof=" << m_eof - << "ended=" << m_ended << "sof=" << m_sof - << "b_ready=" << m_audio_input->bytesReady();*/ + bool bytes_available = !m_eof || m_audio_input->bytesAvailable() > 0; if (!bytes_available) { data.eof = m_eof; @@ -184,7 +168,7 @@ audio_source::audio_data mic_source::read_audio(char* buf, size_t max_size) { return data; } - data.size = m_audio_device->read(buf, max_size); + data.size = static_cast(m_audio_device->read(buf, static_cast(max_size))); data.eof = m_eof && !bytes_available; m_sof = false; @@ -193,3 +177,5 @@ audio_source::audio_data mic_source::read_audio(char* buf, size_t max_size) { return data; } + +void mic_source::clear() { m_audio_device->readAll(); } diff --git a/src/mic_source.h b/src/mic_source.h index a4fec016..a5b7a141 100644 --- a/src/mic_source.h +++ b/src/mic_source.h @@ -8,7 +8,7 @@ #ifndef MIC_SOURCE_H #define MIC_SOURCE_H -#include +#include #include #include #include @@ -33,7 +33,7 @@ class mic_source : public audio_source { static QStringList audio_inputs(); private: - std::unique_ptr m_audio_input; + std::unique_ptr m_audio_input; QTimer m_timer; QIODevice* m_audio_device = nullptr; bool m_eof = false; diff --git a/src/models_manager.cpp b/src/models_manager.cpp index 35f8311c..2517baeb 100644 --- a/src/models_manager.cpp +++ b/src/models_manager.cpp @@ -814,7 +814,8 @@ void models_manager::download(const QString& id, download_type type, int part, : model.size; QNetworkRequest request{url}; - request.setAttribute(QNetworkRequest::FollowRedirectsAttribute, true); + request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, + QNetworkRequest::NoLessSafeRedirectPolicy); if (type == download_type::all || type == download_type::model_sup) { path = model_path(model.file_name); @@ -1354,7 +1355,7 @@ void models_manager::init_config() { } QString data_dir{ - QStandardPaths::writableLocation(QStandardPaths::DataLocation)}; + QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)}; QDir dir{data_dir}; if (!dir.exists()) if (!dir.mkpath(data_dir)) qWarning() << "failed to create data dir"; @@ -2724,7 +2725,7 @@ void models_manager::reset_models() { qDebug() << "removing models file"; auto models_file_path = - QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)} + QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)} .filePath(models_file); QFile{models_file_path}.remove(); @@ -2734,7 +2735,7 @@ void models_manager::parse_models_file( bool reset, langs_t* langs, packs_t* packs, models_t* models, std::optional models_availability) { const auto models_file_path = - QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)} + QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)} .filePath(models_file); if (!QFile::exists(models_file_path)) init_config(); diff --git a/src/module_tools.cpp b/src/module_tools.cpp index 1aad41df..b1060aac 100644 --- a/src/module_tools.cpp +++ b/src/module_tools.cpp @@ -44,7 +44,7 @@ static QString runtime_prefix() { namespace module_tools { QString unpacked_dir(const QString& name) { return QStringLiteral("%1/%2").arg( - QStandardPaths::writableLocation(QStandardPaths::DataLocation), name); + QStandardPaths::writableLocation(QStandardPaths::AppDataLocation), name); } bool init_module(const QString& name) { @@ -158,7 +158,7 @@ bool unpack_module(const QString& name) { } auto unpack_dir = - QStandardPaths::writableLocation(QStandardPaths::DataLocation); + QStandardPaths::writableLocation(QStandardPaths::AppDataLocation); auto unpack_file = QStringLiteral("%1/%2.tar").arg(unpack_dir, name); QDir{QStringLiteral("%1/%2").arg(unpack_dir, name)}.removeRecursively(); diff --git a/src/py_tools.cpp b/src/py_tools.cpp index 9335f6b1..53133096 100644 --- a/src/py_tools.cpp +++ b/src/py_tools.cpp @@ -380,7 +380,7 @@ bool init_module() { if (!module_tools::init_module(QStringLiteral("python"))) return false; auto py_path = - QStandardPaths::writableLocation(QStandardPaths::DataLocation) + "/" + + QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + "/" + python_site_path; qDebug() << "setting env PYTHONPATH=" << py_path; diff --git a/src/recorder.cpp b/src/recorder.cpp index b0b61785..a8119172 100644 --- a/src/recorder.cpp +++ b/src/recorder.cpp @@ -8,6 +8,8 @@ #include "recorder.hpp" #include +#include +#include #include #include #include @@ -17,18 +19,22 @@ #include "settings.h" static bool has_audio_input(const QString& name) { - auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput); + auto ad_list = QMediaDevices::audioInputs(); return std::find_if(ad_list.cbegin(), ad_list.cend(), [&name](const auto& ad) { - return ad.deviceName() == name; + return ad.description() == name; }) != ad_list.cend(); } -static QAudioDeviceInfo audio_input_info(const QString& name) { - auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput); - return *std::find_if( +static QAudioDevice audio_input_info(const QString& name) { + auto ad_list = QMediaDevices::audioInputs(); + auto it = std::find_if( ad_list.cbegin(), ad_list.cend(), - [&name](const auto& ad) { return ad.deviceName() == name; }); + [&name](const auto& ad) { return ad.description() == name; }); + if (it != ad_list.cend()) { + return *it; + } + return QMediaDevices::defaultAudioInput(); } recorder::recorder(QString wav_file_path, QObject* parent) @@ -72,35 +78,31 @@ void recorder::init() { auto input_name = settings::instance()->audio_input_device(); if (input_name.isEmpty() || !has_audio_input(input_name)) { - auto info = QAudioDeviceInfo::defaultInputDevice(); + auto info = QMediaDevices::defaultAudioInput(); if (info.isNull()) { qWarning() << "no audio input"; throw std::runtime_error("no audio input"); } - input_name = info.deviceName(); + input_name = info.description(); } auto input_info = audio_input_info(input_name); if (!input_info.isFormatSupported(format)) { qWarning() << "format not supported for audio input:" - << input_info.deviceName(); + << input_info.description(); throw std::runtime_error("audio format is not supported"); } - qDebug() << "using audio input:" << input_info.deviceName(); - m_audio_input = std::make_unique(input_info, format); + qDebug() << "using audio input:" << input_info.description(); + m_audio_input = std::make_unique(input_info, format); - connect(m_audio_input.get(), &QAudioInput::stateChanged, this, + connect(m_audio_input.get(), &QAudioSource::stateChanged, this, [this](QAudio::State new_state) { qDebug() << "recorder state:" << new_state; emit recording_changed(); }); - connect(m_audio_input.get(), &QAudioInput::notify, this, [this]() { - m_duration = m_audio_input->elapsedUSecs() / 1000000; - emit duration_changed(); - }); } } @@ -108,10 +110,7 @@ QAudioFormat recorder::make_audio_format() { QAudioFormat format; format.setSampleRate(m_sample_rate); format.setChannelCount(m_num_channels); - format.setSampleSize(16); - format.setCodec(QStringLiteral("audio/pcm")); - format.setByteOrder(QAudioFormat::LittleEndian); - format.setSampleType(QAudioFormat::SignedInt); + format.setSampleFormat(QAudioFormat::Int16); return format; } diff --git a/src/recorder.hpp b/src/recorder.hpp index 0456c785..450140a6 100644 --- a/src/recorder.hpp +++ b/src/recorder.hpp @@ -9,7 +9,7 @@ #define RECORDER_H #include -#include +#include #include #include #include @@ -61,7 +61,7 @@ class recorder final : public QObject { uint32_t data_size = 0; }; - std::unique_ptr m_audio_input; + std::unique_ptr m_audio_input; QString m_input_file_path; QString m_wav_file_path; QFile m_audio_device; diff --git a/src/settings.cpp b/src/settings.cpp index 6846e510..9f5b289f 100644 --- a/src/settings.cpp +++ b/src/settings.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -33,6 +33,19 @@ #include "module_tools.hpp" #include "qtlogger.hpp" +#ifdef USE_DESKTOP +static QStringList qt6_available_styles() { + return QStringList{ + QStringLiteral("Basic"), + QStringLiteral("Fusion"), + QStringLiteral("Material"), + QStringLiteral("Universal"), + QStringLiteral("org.kde.desktop"), + QStringLiteral("org.kde.breeze") + }; +} +#endif + QDebug operator<<(QDebug d, settings::mode_t mode) { switch (mode) { case settings::mode_t::Stt: @@ -540,10 +553,12 @@ static QString file_save_filename(const QDir& dir, QString filename, return filename + '.' + ext; } - QRegExp rx{"\\d+$"}; - if (auto idx = rx.indexIn(filename); idx >= 0) { + QRegularExpression rx{QStringLiteral("\\d+$")}; + QRegularExpressionMatch match = rx.match(filename); + if (match.hasMatch()) { + auto idx = match.capturedStart(); bool ok = false; - auto ii = filename.midRef(idx).toInt(&ok); + auto ii = QStringView{filename}.mid(idx).toInt(&ok); if (ok && ii < max_i) { i = ii; filename = filename.mid(0, idx) + "%1." + ext; @@ -1009,7 +1024,7 @@ int settings::qt_style_idx() const { #ifdef USE_DESKTOP auto name = qt_style_name(); - auto styles = QQuickStyle::availableStyles(); + auto styles = qt6_available_styles(); if (name.isEmpty()) return styles.size(); @@ -1020,7 +1035,7 @@ int settings::qt_style_idx() const { void settings::set_qt_style_idx([[maybe_unused]] int value) { #ifdef USE_DESKTOP - auto styles = QQuickStyle::availableStyles(); + auto styles = qt6_available_styles(); if (value < 0 || value >= styles.size()) { set_qt_style_name({}); @@ -1036,7 +1051,7 @@ QString settings::qt_style_name() const { auto name = value(QStringLiteral("qt_style_name"), default_qt_style).toString(); - if (!QQuickStyle::availableStyles().contains(name)) return {}; + if (!qt6_available_styles().contains(name)) return {}; return name; #else @@ -1046,7 +1061,7 @@ QString settings::qt_style_name() const { void settings::set_qt_style_name([[maybe_unused]] QString name) { #ifdef USE_DESKTOP - if (!QQuickStyle::availableStyles().contains(name)) name.clear(); + if (!qt6_available_styles().contains(name)) name.clear(); if (qt_style_name() != name) { setValue(QStringLiteral("qt_style_name"), name); @@ -1323,7 +1338,7 @@ QString settings::audio_format_str() const { QStringList settings::qt_styles() const { #ifdef USE_DESKTOP - auto styles = QQuickStyle::availableStyles(); + auto styles = qt6_available_styles(); styles.append(tr("Don't force any style")); return styles; #else @@ -1364,17 +1379,9 @@ void settings::update_qt_style(QQmlApplicationEngine* engine) { engine->addImportPath(QStringLiteral("%1/qml").arg(prefix)); } - if (auto prefix = module_tools::path_to_dir_for_path( - QStringLiteral("lib"), QStringLiteral("qml/QtQuick/Controls.2")); - !prefix.isEmpty()) { - QQuickStyle::addStylePath( - QStringLiteral("%1/qml/QtQuick/Controls.2").arg(prefix)); - } - - auto styles = QQuickStyle::availableStyles(); + auto styles = qt6_available_styles(); LOGD("available styles: " << styles); - LOGD("style paths: " << QQuickStyle::stylePathList()); LOGD("import paths: " << engine->importPathList()); LOGD("library paths: " << QCoreApplication::libraryPaths()); @@ -1750,6 +1757,7 @@ X(whispercpp) } X(whispercpp) X(fasterwhisper) +X(canary) #undef X #define X(name, enabled) \ diff --git a/src/settings.h b/src/settings.h index 2c91ae7d..3d25ba1e 100644 --- a/src/settings.h +++ b/src/settings.h @@ -65,6 +65,7 @@ #define GPU_ENGINE_TABLE \ X(whispercpp, false) \ X(fasterwhisper, false) \ + X(canary, true) \ X(coqui, true) \ X(whisperspeech, true) \ X(parler, true) \ @@ -397,6 +398,7 @@ class settings : public QSettings, public singleton { set_##name##_profile NOTIFY name##_changed) X(whispercpp) X(fasterwhisper) + X(canary) #undef X #define X(name, _) \ Q_PROPERTY(bool name##_use_gpu READ name##_use_gpu WRITE \ @@ -996,6 +998,7 @@ class settings : public QSettings, public singleton { Q_INVOKABLE void reset_##name##_options(); X(whispercpp) X(fasterwhisper) + X(canary) #undef X #define X(name, _) \ bool name##_use_gpu() const; \ @@ -1101,6 +1104,7 @@ class settings : public QSettings, public singleton { #define X(name) void name##_changed(); X(whispercpp) X(fasterwhisper) + X(canary) #undef X #define X(name, _) \ void name##_gpu_device_changed(); \ diff --git a/src/speech_service.cpp b/src/speech_service.cpp index a376f207..f0d1cefd 100644 --- a/src/speech_service.cpp +++ b/src/speech_service.cpp @@ -263,7 +263,7 @@ speech_service::speech_service(QObject *parent) connect( this, &speech_service::requet_update_task_state, this, [this] { update_task_state(); }, Qt::QueuedConnection); - connect(&m_player, &QMediaPlayer::stateChanged, this, + connect(&m_player, &QMediaPlayer::playbackStateChanged, this, &speech_service::handle_player_state_changed, Qt::QueuedConnection); connect( settings::instance(), &settings::default_stt_model_changed, this, @@ -2464,7 +2464,7 @@ void speech_service::play_beep(beep_role_t beep_role) { auto get_beep_file = [](const QString &name) { // get from user data dir auto file_user = - QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)} + QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)} .filePath(name); qDebug() << "file user:" << file_user; if (QFileInfo::exists(file_user)) { @@ -2499,15 +2499,15 @@ void speech_service::play_beep(beep_role_t beep_role) { return; } - m_beep_player.setMedia(QMediaContent{QUrl::fromLocalFile(beep_file)}); + m_beep_player.setSource(QUrl::fromLocalFile(beep_file)); m_beep_player.play(); } void speech_service::handle_tts_queue() { if (m_tts_queue.empty()) return; - if (m_player.state() == QMediaPlayer::State::PlayingState || - m_player.state() == QMediaPlayer::State::PausedState) + if (m_player.playbackState() == QMediaPlayer::PlaybackState::PlayingState || + m_player.playbackState() == QMediaPlayer::PlaybackState::PausedState) return; if (m_current_task && m_current_task->paused) return; @@ -2525,8 +2525,7 @@ void speech_service::handle_tts_queue() { result.remove_audio_file = true; } - m_player.setMedia( - QMediaContent{QUrl::fromLocalFile(result.audio_file_path)}); + m_player.setSource(QUrl::fromLocalFile(result.audio_file_path)); m_player.play(); @@ -2635,12 +2634,12 @@ void speech_service::handle_ttt_text_repaired(const QString &text, } void speech_service::handle_player_state_changed( - QMediaPlayer::State new_state) { + QMediaPlayer::PlaybackState new_state) { qDebug() << "player new state:" << new_state; update_task_state(); - if (new_state == QMediaPlayer::State::StoppedState && m_current_task && + if (new_state == QMediaPlayer::PlaybackState::StoppedState && m_current_task && m_current_task->engine == engine_t::tts && !m_current_task->paused && !m_tts_queue.empty()) { const auto &result = m_tts_queue.front(); @@ -3883,7 +3882,7 @@ int speech_service::tts_pause_speech(int task) { m_current_task->paused = true; - if (m_player.state() == QMediaPlayer::PlayingState) m_player.pause(); + if (m_player.playbackState() == QMediaPlayer::PlaybackState::PlayingState) m_player.pause(); update_task_state(); @@ -3916,7 +3915,7 @@ int speech_service::tts_resume_speech(int task) { m_current_task->paused = false; - if (m_player.state() == QMediaPlayer::PausedState) m_player.play(); + if (m_player.playbackState() == QMediaPlayer::PlaybackState::PausedState) m_player.play(); handle_tts_queue(); @@ -4126,7 +4125,7 @@ void speech_service::update_task_state() { // 6 = Canceling auto new_task_state = [&] { - if (m_player.state() == QMediaPlayer::State::PlayingState && + if (m_player.playbackState() == QMediaPlayer::PlaybackState::PlayingState && m_state == state_t::playing_speech) { return 4; } else if (m_stt_engine && m_stt_engine->started()) { @@ -4140,8 +4139,8 @@ void speech_service::update_task_state() { case stt_engine::speech_detection_status_t::no_speech: break; } - } else if (m_player.state() == QMediaPlayer::State::PausedState || - (m_player.state() == QMediaPlayer::State::StoppedState && + } else if (m_player.playbackState() == QMediaPlayer::PlaybackState::PausedState || + (m_player.playbackState() == QMediaPlayer::PlaybackState::StoppedState && m_state == state_t::playing_speech && m_current_task && m_current_task->paused)) { return 5; diff --git a/src/speech_service.h b/src/speech_service.h index ad8de372..65aabee0 100644 --- a/src/speech_service.h +++ b/src/speech_service.h @@ -446,7 +446,7 @@ class speech_service : public QObject, public singleton { double progress, bool last); void handle_tts_speech_encoded(tts_partial_result_t result); void handle_speech_to_file(const tts_partial_result_t &result); - void handle_player_state_changed(QMediaPlayer::State new_state); + void handle_player_state_changed(QMediaPlayer::PlaybackState new_state); void handle_audio_available(); void handle_stt_engine_state_changed( stt_engine::speech_detection_status_t status, int task_id); diff --git a/src/wl_clipboard.cpp b/src/wl_clipboard.cpp index 4f043e68..991137bd 100644 --- a/src/wl_clipboard.cpp +++ b/src/wl_clipboard.cpp @@ -2,7 +2,7 @@ #include #include -#include + #include #include "logger.hpp"