From b1288d9d3e78402d5577f7d4e64be5792697c3b4 Mon Sep 17 00:00:00 2001
From: Cole Leavitt <cole@unwrap.rs>
Date: Wed, 7 Jan 2026 00:43:35 -0700
Subject: [PATCH 1/3] feat(stt): add NVIDIA Canary STT engine support

Add support for NVIDIA's Canary speech-to-text models via NeMo toolkit:

- Canary 1B v2: 4.89% WER, 630x RTF (5x faster than Whisper)
- Canary Qwen 2.5B: Higher accuracy variant for demanding use cases

Both models use NeMo's EncDecMultiTaskModel architecture with automatic
model download via HuggingFace. Supports GPU acceleration (CUDA/ROCm),
translation (s2t_translation), and punctuation restoration.

New files:
- src/canary_engine.hpp: Engine class definition
- src/canary_engine.cpp: NeMo Python integration via py_executor

Modified:
- models_manager.h/cpp: Add stt_canary engine type and feature flags
- speech_service.cpp: Engine instantiation and type checking
- CMakeLists.txt: Add canary_engine source files
- config/models.json: Add both Canary model entries

Requires: pip install nemo_toolkit[asr]
---
 CMakeLists.txt         |   2 +
 config/models.json     |  39 ++++-
 src/canary_engine.cpp  | 389 +++++++++++++++++++++++++++++++++++++++++
 src/canary_engine.hpp  |  56 ++++++
 src/models_manager.cpp |  35 +++-
 src/models_manager.h   |   3 +
 src/speech_service.cpp |  13 +-
 7 files changed, 524 insertions(+), 13 deletions(-)
 create mode 100644 src/canary_engine.cpp
 create mode 100644 src/canary_engine.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dce3bdd2..bdbc4e73 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -374,6 +374,8 @@ set(dsnote_lib_sources
     ${sources_dir}/app_server.cpp
     ${sources_dir}/fasterwhisper_engine.hpp
     ${sources_dir}/fasterwhisper_engine.cpp
+    ${sources_dir}/canary_engine.hpp
+    ${sources_dir}/canary_engine.cpp
     ${sources_dir}/mimic3_engine.hpp
     ${sources_dir}/mimic3_engine.cpp
     ${sources_dir}/april_engine.hpp
diff --git a/config/models.json b/config/models.json
index 54a56e48..1fb6d896 100644
--- a/config/models.json
+++ b/config/models.json
@@ -35916,14 +35916,37 @@
             "engine": "stt_whisper",
             "lang_id": "yo"
         },
-        {
-            "name": "中文 (FasterWhisper)",
-            "id": "zh_fasterwhisper",
-            "engine": "stt_fasterwhisper",
-            "lang_id": "zh"
-        },
-        {
-            "name": "中文 (WhisperCpp)",
+        {
+            "name": "中文 (FasterWhisper)",
+            "id": "zh_fasterwhisper",
+            "engine": "stt_fasterwhisper",
+            "lang_id": "zh"
+        },
+        {
+            "name": "Multilingual (Canary 1B v2)",
+            "model_id": "multilang_canary_1b_v2",
+            "engine": "stt_canary",
+            "lang_id": "multilang",
+            "info": "NVIDIA Canary 1B v2 - 4.89% WER, 5x faster than Whisper (RTFx 630), best accuracy-per-watt",
+            "options": "ti",
+            "score": 5,
+            "features": ["high_quality", "medium_processing", "stt_punctuation"],
+            "default_for_lang": true,
+            "hidden": false
+        },
+        {
+            "name": "Multilingual (Canary Qwen 2.5B)",
+            "model_id": "multilang_canary_qwen",
+            "engine": "stt_canary",
+            "lang_id": "multilang",
+            "info": "NVIDIA Canary Qwen 2.5B - Larger model for maximum accuracy",
+            "options": "ti",
+            "score": 4,
+            "features": ["high_quality", "slow_processing", "stt_punctuation"],
+            "hidden": false
+        },
+        {
+            "name": "中文 (WhisperCpp)",
             "id": "zh_whisper",
             "engine": "stt_whisper",
             "lang_id": "zh"
diff --git a/src/canary_engine.cpp b/src/canary_engine.cpp
new file mode 100644
index 00000000..8530ddee
--- /dev/null
+++ b/src/canary_engine.cpp
@@ -0,0 +1,389 @@
+/* Copyright (C) 2024-2025 Cole Leavitt <cole@coleleavitt.com>
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "canary_engine.hpp"
+
+#include <dlfcn.h>
+#include <pybind11/numpy.h>
+
+#include <algorithm>
+#include <chrono>
+#include <cstdlib>
+#include <sstream>
+
+#include "cpu_tools.hpp"
+#include "gpu_tools.hpp"
+#include "logger.hpp"
+#include "py_executor.hpp"
+#include "text_tools.hpp"
+
+using namespace pybind11::literals;
+
+canary_engine::canary_engine(config_t config, callbacks_t call_backs)
+    : stt_engine{std::move(config), std::move(call_backs)} {
+    m_speech_buf.reserve(m_speech_max_size);
+    m_auto_lang = m_config.lang == "auto";
+}
+
+canary_engine::~canary_engine() {
+    LOGD("canary dtor");
+    stop();
+}
+
+void canary_engine::stop() {
+    stt_engine::stop();
+
+    auto task = py_executor::instance()->execute([&]() {
+        try {
+            m_model.reset();
+            py::module_::import("gc").attr("collect")();
+        } catch (const std::exception& err) {
+            LOGE("py error: " << err.what());
+        }
+        return std::any{};
+    });
+
+    if (task) task->get();
+
+    LOGD("canary stopped");
+}
+
+void canary_engine::push_buf_to_audio_buf(
+    const std::vector<in_buf_t::buf_t::value_type>& buf,
+    audio_buf_t& audio_buf) {
+    std::transform(buf.cbegin(), buf.cend(), std::back_inserter(audio_buf),
+                   [](auto sample) {
+                       return static_cast<audio_buf_t::value_type>(sample) /
+                              32768.0F;
+                   });
+}
+
+void canary_engine::push_buf_to_audio_buf(in_buf_t::buf_t::value_type* data,
+                                          in_buf_t::buf_t::size_type size,
+                                          audio_buf_t& audio_buf) {
+    audio_buf.reserve(audio_buf.size() + size);
+    for (size_t i = 0; i < size; ++i) {
+        audio_buf.push_back(static_cast<audio_buf_t::value_type>(data[i]) /
+                            32768.0F);
+    }
+}
+
+void canary_engine::reset_impl() { m_speech_buf.clear(); }
+
+void canary_engine::stop_processing_impl() { LOGD("canary cancel"); }
+
+void canary_engine::start_processing_impl() { create_model(); }
+
+void canary_engine::create_model() {
+    if (m_model) return;
+
+    LOGD("creating canary model");
+
+    auto task = py_executor::instance()->execute([&]() {
+        auto n_threads = static_cast<int>(
+            std::min(m_config.cpu_threads,
+                     std::max(1U, std::thread::hardware_concurrency())));
+        auto use_cuda =
+            m_config.use_gpu && ((m_config.gpu_device.api == gpu_api_t::cuda &&
+                                  gpu_tools::has_cudnn()) ||
+                                 (m_config.gpu_device.api == gpu_api_t::rocm &&
+                                  gpu_tools::has_hip()));
+
+        LOGD("cpu info: arch=" << cpu_tools::arch()
+                               << ", cores=" << std::thread::hardware_concurrency());
+        LOGD("using threads: " << n_threads << "/"
+                               << std::thread::hardware_concurrency());
+        LOGD("using device: " << (use_cuda ? "cuda" : "cpu") << " "
+                              << m_config.gpu_device.id);
+
+        try {
+            auto torch = py::module_::import("torch");
+            auto nemo_asr = py::module_::import("nemo.collections.asr");
+            auto os_path = py::module_::import("os.path");
+
+            std::string device = use_cuda ? "cuda:" + std::to_string(m_config.gpu_device.id) : "cpu";
+
+            std::string model_path = m_config.model_files.model_file;
+            bool is_local_path = !model_path.empty() && 
+                                 os_path.attr("exists")(model_path).cast<bool>();
+            bool is_hf_model = !model_path.empty() && 
+                               model_path.find('/') != std::string::npos &&
+                               !is_local_path;
+
+            std::string pretrained_name = "nvidia/canary-1b-v2";
+            if (model_path.find("qwen") != std::string::npos ||
+                model_path.find("2.5b") != std::string::npos ||
+                model_path.find("2_5b") != std::string::npos) {
+                pretrained_name = "nvidia/canary-qwen-2.5b";
+            }
+
+            LOGD("canary model_path: " << model_path);
+            LOGD("canary is_local: " << is_local_path << ", is_hf: " << is_hf_model);
+            LOGD("canary pretrained_name: " << pretrained_name);
+
+            py::object model;
+            if (is_local_path) {
+                model = nemo_asr.attr("models").attr("EncDecMultiTaskModel")
+                            .attr("restore_from")(model_path);
+            } else if (is_hf_model) {
+                model = nemo_asr.attr("models").attr("EncDecMultiTaskModel")
+                            .attr("from_pretrained")(model_path);
+            } else {
+                model = nemo_asr.attr("models").attr("EncDecMultiTaskModel")
+                            .attr("from_pretrained")(pretrained_name);
+            }
+
+            model.attr("to")(device);
+            model.attr("eval")();
+
+            if (use_cuda) {
+                torch.attr("cuda").attr("empty_cache")();
+            }
+
+            m_model.emplace(std::move(model));
+            return true;
+        } catch (const std::exception& err) {
+            LOGE("py error: " << err.what());
+            m_model.reset();
+            return false;
+        }
+    });
+
+    if (!task || !std::any_cast<bool>(task->get())) {
+        LOGE("failed to create canary model");
+        throw std::runtime_error{"failed to create canary model"};
+    }
+
+    LOGD("canary model created");
+}
+
+stt_engine::samples_process_result_t canary_engine::process_buff() {
+    if (!lock_buff_for_processing())
+        return samples_process_result_t::wait_for_samples;
+
+    auto eof = m_in_buf.eof;
+    auto sof = m_in_buf.sof;
+
+    LOGD("process samples buf: mode="
+         << m_config.speech_mode << ", in-buf size=" << m_in_buf.size
+         << ", speech-buf size=" << m_speech_buf.size() << ", sof=" << sof
+         << ", eof=" << eof);
+
+    if (sof) {
+        m_speech_buf.clear();
+        m_start_time.reset();
+        m_vad.reset();
+        reset_segment_counters();
+    }
+
+    m_denoiser.process(m_in_buf.buf.data(), m_in_buf.size);
+
+    const auto& vad_buf =
+        m_vad.remove_silence(m_in_buf.buf.data(), m_in_buf.size);
+
+    bool vad_status = !vad_buf.empty();
+
+    if (vad_status) {
+        LOGD("vad: speech detected");
+
+        if (m_config.speech_mode != speech_mode_t::manual &&
+            m_config.speech_mode != speech_mode_t::single_sentence)
+            set_speech_detection_status(
+                speech_detection_status_t::speech_detected);
+
+        if (m_config.text_format == text_format_t::raw)
+            push_buf_to_audio_buf(vad_buf, m_speech_buf);
+        else
+            push_buf_to_audio_buf(m_in_buf.buf.data(), m_in_buf.size,
+                                  m_speech_buf);
+
+        restart_sentence_timer();
+    } else {
+        LOGD("vad: no speech");
+
+        if (m_config.speech_mode == speech_mode_t::single_sentence &&
+            m_speech_buf.empty() && sentence_timer_timed_out()) {
+            LOGD("sentence timeout");
+            m_call_backs.sentence_timeout();
+        }
+
+        if (m_config.speech_mode == speech_mode_t::automatic)
+            set_speech_detection_status(speech_detection_status_t::no_speech);
+
+        if (m_speech_buf.empty())
+            m_segment_time_discarded_before +=
+                (1000 * m_in_buf.size) / m_sample_rate;
+        else
+            m_segment_time_discarded_after +=
+                (1000 * m_in_buf.size) / m_sample_rate;
+    }
+
+    m_in_buf.clear();
+
+    auto decode_samples = [&] {
+        if (m_speech_buf.size() > m_speech_max_size) {
+            LOGD("speech buf reached max size");
+            return true;
+        }
+
+        if (m_speech_buf.empty()) return false;
+
+        if ((m_config.speech_mode == speech_mode_t::manual ||
+             m_speech_detection_status ==
+                 speech_detection_status_t::speech_detected) &&
+            vad_status && !eof)
+            return false;
+
+        if ((m_config.speech_mode == speech_mode_t::manual ||
+             m_config.speech_mode == speech_mode_t::single_sentence) &&
+            m_speech_detection_status == speech_detection_status_t::no_speech &&
+            !eof)
+            return false;
+
+        return true;
+    }();
+
+    if (!decode_samples) {
+        if (eof || (m_config.speech_mode == speech_mode_t::manual &&
+                    m_speech_detection_status ==
+                        speech_detection_status_t::no_speech)) {
+            flush(eof ? flush_t::eof : flush_t::regular);
+            free_buf();
+            return samples_process_result_t::no_samples_needed;
+        }
+
+        free_buf();
+        return samples_process_result_t::wait_for_samples;
+    }
+
+    if (m_thread_exit_requested) {
+        free_buf();
+        return samples_process_result_t::no_samples_needed;
+    }
+
+    set_state(state_t::decoding);
+
+    if (!vad_status) {
+        set_speech_detection_status(speech_detection_status_t::no_speech);
+    }
+
+    LOGD("speech frame: samples=" << m_speech_buf.size());
+
+    m_segment_time_offset += m_segment_time_discarded_before;
+    m_segment_time_discarded_before = 0;
+
+    decode_speech(m_speech_buf);
+
+    m_segment_time_offset += (m_segment_time_discarded_after +
+                              (1000 * m_speech_buf.size() / m_sample_rate));
+    m_segment_time_discarded_after = 0;
+
+    set_state(state_t::idle);
+
+    if (m_config.speech_mode == speech_mode_t::single_sentence &&
+        (!m_intermediate_text || m_intermediate_text->empty())) {
+        LOGD("no speech decoded, forcing sentence timeout");
+        m_call_backs.sentence_timeout();
+    }
+
+    m_speech_buf.clear();
+
+    flush(eof || m_config.speech_mode == speech_mode_t::single_sentence
+              ? flush_t::eof
+              : flush_t::regular);
+
+    free_buf();
+
+    return samples_process_result_t::wait_for_samples;
+}
+
+void canary_engine::decode_speech(const audio_buf_t& buf) {
+    LOGD("speech decoding started");
+
+    create_model();
+
+    auto decoding_start = std::chrono::steady_clock::now();
+
+    auto task = py_executor::instance()->execute([&]() {
+        try {
+            py::array_t<float> array(buf.size());
+            auto r = array.mutable_unchecked<1>();
+            for (py::ssize_t i = 0; i < r.shape(0); ++i) r(i) = buf[i];
+
+            auto torch = py::module_::import("torch");
+            auto sf = py::module_::import("soundfile");
+            auto tempfile = py::module_::import("tempfile");
+            auto os = py::module_::import("os");
+
+            auto tmp_dir = tempfile.attr("gettempdir")();
+            auto tmp_path = py::str(tmp_dir) + py::str("/canary_temp.wav");
+
+            sf.attr("write")(tmp_path, array, m_sample_rate);
+
+            std::string source_lang = m_auto_lang ? "en" : m_config.lang;
+            std::string target_lang = m_config.translate ? "en" : source_lang;
+            std::string task_type = m_config.translate ? "s2t_translation" : "asr";
+
+            py::list paths;
+            paths.append(tmp_path);
+
+            auto result = m_model->attr("transcribe")(
+                paths,
+                "batch_size"_a = 1,
+                "source_lang"_a = source_lang,
+                "target_lang"_a = target_lang,
+                "task"_a = task_type,
+                "pnc"_a = m_config.has_option('i')
+            );
+
+            os.attr("unlink")(tmp_path);
+
+            std::string text;
+            if (py::isinstance<py::list>(result) && py::len(result) > 0) {
+                text = result[py::int_(0)].cast<std::string>();
+            }
+
+            rtrim(text);
+            ltrim(text);
+
+            std::string auto_lang = m_auto_lang ? "en" : m_config.lang;
+
+            return std::pair<std::string, std::string>(std::move(text),
+                                                       std::move(auto_lang));
+        } catch (const std::exception& err) {
+            LOGE("canary py error: " << err.what());
+            return std::pair<std::string, std::string>({}, {});
+        }
+    });
+
+    if (!task) return;
+
+    auto [text, auto_lang] =
+        std::any_cast<std::pair<std::string, std::string>>(task->get());
+
+    if (m_thread_exit_requested) return;
+
+    auto stats = report_stats(
+        buf.size(), m_sample_rate,
+        static_cast<size_t>(std::max(
+            0L, static_cast<long int>(
+                    std::chrono::duration_cast<std::chrono::milliseconds>(
+                        std::chrono::steady_clock::now() - decoding_start)
+                        .count()))));
+
+    auto result = merge_texts(m_intermediate_text.value_or(std::string{}),
+                              std::move(text));
+
+    if (m_config.insert_stats) result.append(" " + stats);
+
+#ifdef DEBUG
+    LOGD("speech decoded: text=" << result);
+#endif
+
+    if (!m_intermediate_text || m_intermediate_text != result)
+        set_intermediate_text(result, auto_lang);
+}
diff --git a/src/canary_engine.hpp b/src/canary_engine.hpp
new file mode 100644
index 00000000..d4ec807d
--- /dev/null
+++ b/src/canary_engine.hpp
@@ -0,0 +1,56 @@
+/* Copyright (C) 2024-2025 Cole Leavitt <cole@coleleavitt.com>
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef CANARY_ENGINE_H
+#define CANARY_ENGINE_H
+
+#undef slots
+#include <pybind11/embed.h>
+#include <pybind11/pytypes.h>
+#define slots Q_SLOTS
+
+#include <optional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "stt_engine.hpp"
+
+namespace py = pybind11;
+
+class canary_engine : public stt_engine {
+   public:
+    canary_engine(config_t config, callbacks_t call_backs);
+    ~canary_engine() override;
+
+   private:
+    using audio_buf_t = std::vector<float>;
+
+    inline static const size_t m_speech_max_size = m_sample_rate * 30;
+    inline static const int m_threads = 8;
+
+    std::optional<py::object> m_model;
+    audio_buf_t m_speech_buf;
+    bool m_auto_lang = false;
+
+    void create_model();
+    samples_process_result_t process_buff() override;
+    void decode_speech(const audio_buf_t& buf);
+    static void push_buf_to_audio_buf(
+        const std::vector<in_buf_t::buf_t::value_type>& buf,
+        audio_buf_t& audio_buf);
+    static void push_buf_to_audio_buf(in_buf_t::buf_t::value_type* data,
+                                      in_buf_t::buf_t::size_type size,
+                                      audio_buf_t& audio_buf);
+
+    void reset_impl() override;
+    void stop_processing_impl() override;
+    void start_processing_impl() override;
+    void stop();
+};
+
+#endif
diff --git a/src/models_manager.cpp b/src/models_manager.cpp
index edb20718..35f8311c 100644
--- a/src/models_manager.cpp
+++ b/src/models_manager.cpp
@@ -1,4 +1,4 @@
-﻿/* Copyright (C) 2021-2025 Michal Kosciesza <michal@mkiol.net>
+/* Copyright (C) 2021-2025 Michal Kosciesza <michal@mkiol.net>
  *
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -136,6 +136,7 @@ QDebug operator<<(QDebug d, models_manager::feature_flags flags) {
     if (flags & models_manager::engine_stt_fasterwhisper)
         d << "engine-stt-fasterwhisper, ";
     if (flags & models_manager::engine_stt_april) d << "engine-stt-april, ";
+    if (flags & models_manager::engine_stt_canary) d << "engine-stt-canary, ";
     if (flags & models_manager::engine_tts_espeak) d << "engine-tts-espeak, ";
     if (flags & models_manager::engine_tts_piper) d << "engine-tts-piper, ";
     if (flags & models_manager::engine_tts_rhvoice) d << "engine-tts-rhvoice, ";
@@ -173,6 +174,9 @@ QDebug operator<<(QDebug d, models_manager::model_engine_t engine) {
         case models_manager::model_engine_t::stt_april:
             d << "stt-april";
             break;
+        case models_manager::model_engine_t::stt_canary:
+            d << "stt-canary";
+            break;
         case models_manager::model_engine_t::ttt_hftc:
             d << "ttt-hftc";
             break;
@@ -262,6 +266,7 @@ QDebug operator<<(QDebug d,
     if (models_availability.tts_kokoro_ja) d << "tts_kokoro_ja,";
     if (models_availability.tts_kokoro_zh) d << "tts_kokoro_zh,";
     if (models_availability.stt_fasterwhisper) d << "stt_fasterwhisper,";
+    if (models_availability.stt_canary) d << "stt_canary,";
     if (models_availability.stt_ds) d << "stt_ds,";
     if (models_availability.stt_vosk) d << "stt_vosk,";
     if (models_availability.stt_whispercpp) d << "stt_whispercpp,";
@@ -1581,6 +1586,7 @@ bool models_manager::is_modelless_engine(model_engine_t engine) {
         case model_engine_t::stt_whisper:
         case model_engine_t::stt_fasterwhisper:
         case model_engine_t::stt_april:
+        case model_engine_t::stt_canary:
         case model_engine_t::ttt_hftc:
         case model_engine_t::ttt_tashkeel:
         case model_engine_t::ttt_unikud:
@@ -1606,6 +1612,7 @@ bool models_manager::is_ignore_on_sfos(model_engine_t engine,
         case model_engine_t::ttt_tashkeel:
         case model_engine_t::ttt_unikud:
         case model_engine_t::stt_fasterwhisper:
+        case model_engine_t::stt_canary:
         case model_engine_t::tts_mimic3:
         case model_engine_t::tts_whisperspeech:
         case model_engine_t::tts_parler:
@@ -1704,6 +1711,7 @@ models_manager::model_role_t models_manager::role_of_engine(
         case model_engine_t::stt_whisper:
         case model_engine_t::stt_fasterwhisper:
         case model_engine_t::stt_april:
+        case model_engine_t::stt_canary:
             return model_role_t::stt;
         case model_engine_t::ttt_hftc:
         case model_engine_t::ttt_tashkeel:
@@ -1736,6 +1744,7 @@ models_manager::model_engine_t models_manager::engine_from_name(
     if (name == QStringLiteral("stt_fasterwhisper"))
         return model_engine_t::stt_fasterwhisper;
     if (name == QStringLiteral("stt_april")) return model_engine_t::stt_april;
+    if (name == QStringLiteral("stt_canary")) return model_engine_t::stt_canary;
     if (name == QStringLiteral("ttt_hftc")) return model_engine_t::ttt_hftc;
     if (name == QStringLiteral("ttt_tashkeel"))
         return model_engine_t::ttt_tashkeel;
@@ -1884,6 +1893,7 @@ models_manager::feature_flags models_manager::add_new_feature(
         case feature_flags::engine_stt_whisper:
         case feature_flags::engine_stt_fasterwhisper:
         case feature_flags::engine_stt_april:
+        case feature_flags::engine_stt_canary:
         case feature_flags::engine_tts_espeak:
         case feature_flags::engine_tts_piper:
         case feature_flags::engine_tts_rhvoice:
@@ -1901,6 +1911,7 @@ models_manager::feature_flags models_manager::add_new_feature(
                 existing_features & feature_flags::engine_stt_whisper ||
                 existing_features & feature_flags::engine_stt_fasterwhisper ||
                 existing_features & feature_flags::engine_stt_april ||
+                existing_features & feature_flags::engine_stt_canary ||
                 existing_features & feature_flags::engine_tts_espeak ||
                 existing_features & feature_flags::engine_tts_piper ||
                 existing_features & feature_flags::engine_tts_rhvoice ||
@@ -1965,11 +1976,14 @@ models_manager::feature_flags models_manager::add_implicit_feature_flags(
             break;
         case model_engine_t::stt_whisper:
         case model_engine_t::stt_fasterwhisper:
+        case model_engine_t::stt_canary:
             existing_features =
                 add_new_feature(existing_features,
                                 engine == model_engine_t::stt_whisper
                                     ? feature_flags::engine_stt_whisper
-                                    : feature_flags::engine_stt_fasterwhisper);
+                                    : engine == model_engine_t::stt_canary
+                                        ? feature_flags::engine_stt_canary
+                                        : feature_flags::engine_stt_fasterwhisper);
             if (model_id.contains("tiny")) {
                 existing_features =
                     add_new_feature(existing_features,
@@ -2268,7 +2282,8 @@ auto models_manager::extract_models(
                             return model_alias_of;
                         }
                         if (engine != model_engine_t::stt_whisper &&
-                            engine != model_engine_t::stt_fasterwhisper) {
+                            engine != model_engine_t::stt_fasterwhisper &&
+                            engine != model_engine_t::stt_canary) {
                             return {};
                         }
                         auto l = model_id.split('_');
@@ -2397,6 +2412,11 @@ auto models_manager::extract_models(
                 qDebug() << "ignoring fasterwhisper model:" << model_id;
                 continue;
             }
+            if (!models_availability->stt_canary &&
+                engine == model_engine_t::stt_canary) {
+                qDebug() << "ignoring canary model:" << model_id;
+                continue;
+            }
             if (!models_availability->stt_ds &&
                 engine == model_engine_t::stt_ds) {
                 qDebug() << "ignoring ds model:" << model_id;
@@ -2552,7 +2572,8 @@ auto models_manager::extract_models(
             // add split by words option for all sam tts models
             model.options.push_back('w');
         } else if ((model.engine == model_engine_t::stt_whisper ||
-                    model.engine == model_engine_t::stt_fasterwhisper) &&
+                    model.engine == model_engine_t::stt_fasterwhisper ||
+                    model.engine == model_engine_t::stt_canary) &&
                    !model.disabled && !model.hidden &&
                    model.options.contains('t') && model.lang_id == "en") {
             // remove translate to english option for all english models
@@ -2780,6 +2801,7 @@ QString models_manager::file_name_from_id(const QString& id,
         case model_engine_t::ttt_tashkeel:
             return id + ".ort";
         case model_engine_t::stt_fasterwhisper:
+        case model_engine_t::stt_canary:
         case model_engine_t::stt_vosk:
         case model_engine_t::ttt_hftc:
         case model_engine_t::ttt_unikud:
@@ -3022,6 +3044,11 @@ void models_manager::update_models_using_availability_internal() {
             pair.second.disabled = true;
             return;
         }
+        if (!m_models_availability->stt_canary &&
+            pair.second.engine == model_engine_t::stt_canary) {
+            pair.second.disabled = true;
+            return;
+        }
         if (!m_models_availability->stt_ds &&
             pair.second.engine == model_engine_t::stt_ds) {
             pair.second.disabled = true;
diff --git a/src/models_manager.h b/src/models_manager.h
index 328cd11d..a27073f3 100644
--- a/src/models_manager.h
+++ b/src/models_manager.h
@@ -49,6 +49,7 @@ class models_manager : public QObject, public singleton<models_manager> {
         stt_whisper,
         stt_fasterwhisper,
         stt_april,
+        stt_canary,
         ttt_hftc,
         ttt_tashkeel,
         ttt_unikud,
@@ -81,6 +82,7 @@ class models_manager : public QObject, public singleton<models_manager> {
         engine_stt_whisper = 1U << 8U,
         engine_stt_fasterwhisper = 1U << 9U,
         engine_stt_april = 1U << 10U,
+        engine_stt_canary = 1U << 21U,
         engine_tts_espeak = 1U << 11U,
         engine_tts_piper = 1U << 12U,
         engine_tts_rhvoice = 1U << 13U,
@@ -190,6 +192,7 @@ class models_manager : public QObject, public singleton<models_manager> {
         bool tts_kokoro_ja = false;
         bool tts_kokoro_zh = false;
         bool stt_fasterwhisper = false;
+        bool stt_canary = false;
         bool stt_ds = false;
         bool stt_vosk = false;
         bool stt_whispercpp = false;
diff --git a/src/speech_service.cpp b/src/speech_service.cpp
index 572aecc8..56f68df2 100644
--- a/src/speech_service.cpp
+++ b/src/speech_service.cpp
@@ -1,4 +1,4 @@
-﻿/* Copyright (C) 2021-2025 Michal Kosciesza <michal@mkiol.net>
+/* Copyright (C) 2021-2025 Michal Kosciesza <michal@mkiol.net>
  *
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -26,6 +26,7 @@
 #include "espeak_engine.hpp"
 #include "f5_engine.hpp"
 #include "fasterwhisper_engine.hpp"
+#include "canary_engine.hpp"
 #include "file_source.h"
 #include "gpu_tools.hpp"
 #include "kokoro_engine.hpp"
@@ -1347,6 +1348,8 @@ QString speech_service::restart_stt_engine(speech_mode_t speech_mode,
             }
         } else if (model_config->stt->engine == models_manager::model_engine_t::stt_fasterwhisper) {
             ENGINE_OPTS(fasterwhisper)
+        } else if (model_config->stt->engine == models_manager::model_engine_t::stt_canary) {
+            ENGINE_OPTS(canary)
         }
 #undef ENGINE_OPTS
         // clang-format on
@@ -1371,6 +1374,10 @@ QString speech_service::restart_stt_engine(speech_mode_t speech_mode,
                     models_manager::model_engine_t::stt_fasterwhisper &&
                 type != typeid(fasterwhisper_engine))
                 return true;
+            if (model_config->stt->engine ==
+                    models_manager::model_engine_t::stt_canary &&
+                type != typeid(canary_engine))
+                return true;
             if (model_config->stt->engine ==
                     models_manager::model_engine_t::stt_april &&
                 type != typeid(april_engine))
@@ -1463,6 +1470,10 @@ QString speech_service::restart_stt_engine(speech_mode_t speech_mode,
                         m_stt_engine = std::make_unique<fasterwhisper_engine>(
                             std::move(config), std::move(call_backs));
                         break;
+                    case models_manager::model_engine_t::stt_canary:
+                        m_stt_engine = std::make_unique<canary_engine>(
+                            std::move(config), std::move(call_backs));
+                        break;
                     case models_manager::model_engine_t::stt_april:
                         m_stt_engine = std::make_unique<april_engine>(
                             std::move(config), std::move(call_backs));

From 0cfa962eb9788b78e99fafb0a890a45f2d11b88a Mon Sep 17 00:00:00 2001
From: Cole Leavitt <cole@unwrap.rs>
Date: Wed, 7 Jan 2026 00:46:11 -0700
Subject: [PATCH 2/3] feat(stt): add NeMo availability detection for Canary
 engine

Check for nemo.collections.asr module availability at startup.
This enables dsnote to automatically detect if NeMo is installed
and show/hide Canary models accordingly in the UI.

- py_tools.hpp: Add nemo_asr to libs_availability_t
- py_tools.cpp: Add nemo.collections.asr import check
- speech_service.cpp: Map nemo_asr availability to stt_canary
---
 src/py_tools.cpp       | 9 +++++++++
 src/py_tools.hpp       | 1 +
 src/speech_service.cpp | 1 +
 3 files changed, 11 insertions(+)

diff --git a/src/py_tools.cpp b/src/py_tools.cpp
index a41190b6..9335f6b1 100644
--- a/src/py_tools.cpp
+++ b/src/py_tools.cpp
@@ -36,6 +36,7 @@ std::ostream& operator<<(std::ostream& os,
     os << "py-version=" << availability.py_version
        << ", coqui-tts=" << availability.coqui_tts
        << ", faster-whisper=" << availability.faster_whisper
+       << ", nemo-asr=" << availability.nemo_asr
        << ", ctranslate2-cuda=" << availability.ctranslate2_cuda
        << ", mimic3-tts=" << availability.mimic3_tts
        << ", whisperspeech-tts=" << availability.whisperspeech_tts
@@ -234,6 +235,14 @@ libs_availability_t libs_availability(libs_scan_type_t scan_type,
             LOGD("faster-whisper check py error: " << err.what());
         }
 
+        try {
+            LOGD("checking: nemo-asr");
+            py::module_::import("nemo.collections.asr");
+            availability.nemo_asr = true;
+        } catch (const std::exception& err) {
+            LOGD("nemo-asr check py error: " << err.what());
+        }
+
         try {
             LOGD("checking: transformers");
             py::module_::import("transformers");
diff --git a/src/py_tools.hpp b/src/py_tools.hpp
index 7ac76b51..55e4add5 100644
--- a/src/py_tools.hpp
+++ b/src/py_tools.hpp
@@ -26,6 +26,7 @@ struct libs_availability_t {
     bool torch_cuda = false;
     bool torch_hip = false;
     bool faster_whisper = false;
+    bool nemo_asr = false;
     bool ctranslate2_cuda = false;
     bool mimic3_tts = false;
     bool whisperspeech_tts = false;
diff --git a/src/speech_service.cpp b/src/speech_service.cpp
index 56f68df2..a376f207 100644
--- a/src/speech_service.cpp
+++ b/src/speech_service.cpp
@@ -3310,6 +3310,7 @@ QVariantMap speech_service::features_availability() {
                  /*tts_kokoro_ja=*/py_availability->kokoro_ja,
                  /*tts_kokoro_zh=*/py_availability->kokoro_zh,
                  /*stt_fasterwhisper=*/py_availability->faster_whisper,
+                 /*stt_canary=*/py_availability->nemo_asr,
                  /*stt_ds=*/stt_ds,
                  /*stt_vosk=*/stt_vosk,
                  /*stt_whispercpp=*/stt_whispercpp,

From cd44a8458324f4dffbfcba9cc2c01883e20cbffb Mon Sep 17 00:00:00 2001
From: Cole Leavitt <cole@unwrap.rs>
Date: Wed, 7 Jan 2026 01:34:54 -0700
Subject: [PATCH 3/3] feat: migrate from Qt5 to Qt6

- Update CMakeLists.txt to use Qt6 instead of Qt5
- Update cmake/*.cmake files for Qt6 compatibility
- Replace deprecated Qt5 APIs with Qt6 equivalents:
  - QRegExp -> QRegularExpression
  - QX11Info -> QNativeInterface::QX11Application
  - QMediaPlayer::State -> QMediaPlayer::PlaybackState
  - QMediaPlayer::stateChanged -> playbackStateChanged
  - setMedia(QMediaContent) -> setSource(QUrl)
  - QAudioInput (recording) -> QAudioSource
  - QAudioDeviceInfo -> QAudioDevice + QMediaDevices
  - QAudioFormat::setSampleSize/setCodec -> setSampleFormat
  - QNetworkRequest::FollowRedirectsAttribute -> RedirectPolicyAttribute
  - Remove Qt::AA_EnableHighDpiScaling (default in Qt6)
  - Remove QTextCodec usage
  - Remove QQuickStyle::availableStyles() (not in Qt6)
- Fix GCC 15 type strictness (std::clamp/max int vs qsizetype)
- Update qhotkey external project to build with Qt6
---
 CMakeLists.txt                 | 16 ++++----
 cmake/dbus_api.cmake           |  6 +--
 cmake/openblas_pkgconfig.cmake |  2 +-
 cmake/qhotkey.cmake            |  6 +--
 cmake/translations.cmake       |  4 +-
 src/app_server.cpp             | 10 ++---
 src/dbus_application_adaptor.h |  2 +-
 src/dbus_speech_adaptor.h      |  2 +-
 src/dsnote_app.cpp             | 70 +++++++++++++++++-----------------
 src/fake_keyboard.cpp          | 27 +++++++------
 src/fake_keyboard.hpp          |  4 +-
 src/main.cpp                   |  3 +-
 src/mic_source.cpp             | 70 ++++++++++++++--------------------
 src/mic_source.h               |  4 +-
 src/models_manager.cpp         |  9 +++--
 src/module_tools.cpp           |  4 +-
 src/py_tools.cpp               |  2 +-
 src/recorder.cpp               | 39 +++++++++----------
 src/recorder.hpp               |  4 +-
 src/settings.cpp               | 44 ++++++++++++---------
 src/settings.h                 |  4 ++
 src/speech_service.cpp         | 27 +++++++------
 src/speech_service.h           |  2 +-
 src/wl_clipboard.cpp           |  2 +-
 24 files changed, 184 insertions(+), 179 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bdbc4e73..ec2b64d3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -535,17 +535,17 @@ pkg_search_module(pulse REQUIRED libpulse)
 list(APPEND deps_libs ${pulse_LIBRARIES})
 list(APPEND includes ${pulse_INCLUDE_DIRS})
 
-find_package(Qt5 COMPONENTS Core Network Multimedia Qml Xml Sql Gui Quick DBus LinguistTools REQUIRED)
-list(APPEND deps_libs Qt5::Core Qt5::Network Qt5::Multimedia Qt5::Gui Qt5::Quick Qt5::DBus Qt5::Xml)
+find_package(Qt6 COMPONENTS Core Network Multimedia Qml Xml Sql Gui Quick DBus LinguistTools REQUIRED)
+list(APPEND deps_libs Qt6::Core Qt6::Network Qt6::Multimedia Qt6::Gui Qt6::Quick Qt6::DBus Qt6::Xml)
 
 if(WITH_DESKTOP)
-    find_package(Qt5 COMPONENTS QuickControls2 Widgets REQUIRED)
-    list(APPEND deps_libs Qt5::QuickControls2 Qt5::Widgets)
+    find_package(Qt6 COMPONENTS QuickControls2 Widgets REQUIRED)
+    list(APPEND deps_libs Qt6::QuickControls2 Qt6::Widgets)
 
     pkg_search_module(wayland REQUIRED wayland-client)
     list(APPEND deps_libs ${wayland_LIBRARIES})
     list(APPEND includes ${wayland_INCLUDE_DIRS})
-    include_directories(${Qt5Gui_PRIVATE_INCLUDE_DIRS})
+    include_directories(${Qt6Gui_PRIVATE_INCLUDE_DIRS})
 
     if(BUILD_XKBCOMMON)
         include(${cmake_path}/xkbcommon.cmake)
@@ -567,8 +567,10 @@ if(WITH_DESKTOP)
         find_package(X11 REQUIRED)
         list(APPEND deps_libs X11)
 
-        find_package(Qt5 COMPONENTS X11Extras REQUIRED)
-        list(APPEND deps_libs Qt5::X11Extras)
+        # Qt6: X11Extras removed, functionality now in QtGui via QNativeInterface
+        pkg_search_module(xcb REQUIRED xcb)
+        list(APPEND deps_libs ${xcb_LIBRARIES})
+        list(APPEND includes ${xcb_INCLUDE_DIRS})
 
         if(BUILD_QHOTKEY)
             include(${cmake_path}/qhotkey.cmake)
diff --git a/cmake/dbus_api.cmake b/cmake/dbus_api.cmake
index 93bb5298..74143997 100644
--- a/cmake/dbus_api.cmake
+++ b/cmake/dbus_api.cmake
@@ -2,12 +2,12 @@ set(dbus_dsnote_interface_file "${PROJECT_BINARY_DIR}/${info_dbus_app_interface}
 
 configure_file(${dbus_dir}/dsnote.xml.in ${dbus_dsnote_interface_file})
 
-find_package(Qt5 COMPONENTS DBus REQUIRED)
+find_package(Qt6 COMPONENTS DBus REQUIRED)
 
 unset(qdbusxml2cpp_bin CACHE)
-find_program(qdbusxml2cpp_bin qdbusxml2cpp)
+find_program(qdbusxml2cpp_bin qdbusxml2cpp HINTS ${Qt6_DIR}/../../../libexec ${Qt6_DIR}/../../../bin)
 if(${qdbusxml2cpp_bin} MATCHES "-NOTFOUND$")
-   find_program(qdbusxml2cpp_bin qdbusxml2cpp-qt5)
+   find_program(qdbusxml2cpp_bin qdbusxml2cpp-qt6)
    if(${qdbusxml2cpp_bin} MATCHES "-NOTFOUND$")
       message(FATAL_ERROR "qdbusxml2cpp not found but it is required")
    endif()
diff --git a/cmake/openblas_pkgconfig.cmake b/cmake/openblas_pkgconfig.cmake
index 4b6f33a6..b2ba9576 100644
--- a/cmake/openblas_pkgconfig.cmake
+++ b/cmake/openblas_pkgconfig.cmake
@@ -1,6 +1,6 @@
 pkg_search_module(openblas openblas)
 
-if(NOT DEFINED ${openblas_FOUND})
+if(NOT openblas_FOUND)
     # check without pkg-config
     set(BLA_STATIC OFF)
     set(BLA_VENDOR "OpenBLAS")
diff --git a/cmake/qhotkey.cmake b/cmake/qhotkey.cmake
index c454cb96..79c9f258 100644
--- a/cmake/qhotkey.cmake
+++ b/cmake/qhotkey.cmake
@@ -15,10 +15,10 @@ ExternalProject_Add(qhotkey
         -DCMAKE_INSTALL_LIBDIR=lib
         -DCMAKE_POSITION_INDEPENDENT_CODE=ON
         -DCMAKE_POLICY_VERSION_MINIMUM=3.5
+        -DQT_DEFAULT_MAJOR_VERSION=6
     BUILD_ALWAYS False
 )
 
-find_package(Qt5 COMPONENTS X11Extras REQUIRED)
-
-list(APPEND deps_libs Qt5::X11Extras "${external_lib_dir}/libqhotkey.a")
+# Qt6: X11Extras removed, functionality now in QtGui via QNativeInterface
+list(APPEND deps_libs "${external_lib_dir}/libqhotkey.a")
 list(APPEND deps qhotkey)
diff --git a/cmake/translations.cmake b/cmake/translations.cmake
index c2642019..5fd1d531 100644
--- a/cmake/translations.cmake
+++ b/cmake/translations.cmake
@@ -3,7 +3,7 @@ set(enabled_translations ar ca_ES cs de en es fr fr_CA it nl no pl ru sv sl tr_T
 # finished translations
 set(enabled_translations ar ca_ES de en es fr fr_CA it nl no pl ru sv sl tr_TR uk zh_CN zh_TW)
 
-find_package(Qt5 COMPONENTS Core LinguistTools)
+find_package(Qt6 COMPONENTS Core LinguistTools)
 
 set(ts_files "")
 foreach(lang ${enabled_translations})
@@ -24,6 +24,6 @@ function(ADD_TRANSLATIONS_RESOURCE res_file)
     set(${res_file} ${_res_file} PARENT_SCOPE)
 endfunction()
 
-qt5_create_translation(qm_files ${CMAKE_SOURCE_DIR}/src ${desktop_dir}/qml ${sfos_dir}/qml ${ts_files})
+qt_add_translation(qm_files ${ts_files})
 
 add_translations_resource(translations_res ${qm_files})
diff --git a/src/app_server.cpp b/src/app_server.cpp
index 4309240a..ce1657fe 100644
--- a/src/app_server.cpp
+++ b/src/app_server.cpp
@@ -134,7 +134,7 @@ int app_server::request_another_instance(const cmd::options &options) {
                     auto model = qdbus_cast<QVariantMap>(
                         m.template value<QDBusArgument>());
                     return std::max(model.contains("id")
-                                        ? model.value("id").toString().size()
+                                        ? static_cast<int>(model.value("id").toString().size())
                                         : size,
                                     size);
                 });
@@ -204,10 +204,10 @@ int app_server::request_another_instance(const cmd::options &options) {
             g_max_size =
                 std::max(g_max_size,
                          std::max(modelStt.contains("id")
-                                      ? modelStt.value("id").toString().size()
+                                      ? static_cast<int>(modelStt.value("id").toString().size())
                                       : 1,
                                   modelTts.contains("id")
-                                      ? modelTts.value("id").toString().size()
+                                      ? static_cast<int>(modelTts.value("id").toString().size())
                                       : 1));
             print_active_model("STT", g_max_size, modelStt);
             print_active_model("TTS", g_max_size, modelTts);
@@ -215,14 +215,14 @@ int app_server::request_another_instance(const cmd::options &options) {
             auto modelStt = iface.activeSttModel();
             g_max_size = std::max(g_max_size,
                                   modelStt.contains("id")
-                                      ? modelStt.value("id").toString().size()
+                                      ? static_cast<int>(modelStt.value("id").toString().size())
                                       : 1);
             print_active_model("STT", g_max_size, modelStt);
         } else if (options.active_model_to_print_role & cmd::role_tts) {
             auto modelTts = iface.activeTtsModel();
             g_max_size = std::max(g_max_size,
                                   modelTts.contains("id")
-                                      ? modelTts.value("id").toString().size()
+                                      ? static_cast<int>(modelTts.value("id").toString().size())
                                       : 1);
             print_active_model("TTS", g_max_size, modelTts);
         }
diff --git a/src/dbus_application_adaptor.h b/src/dbus_application_adaptor.h
index a5816fe4..af185d32 100644
--- a/src/dbus_application_adaptor.h
+++ b/src/dbus_application_adaptor.h
@@ -19,7 +19,7 @@ class QByteArray;
 template<class T> class QList;
 template<class Key, class Value> class QMap;
 class QString;
-class QStringList;
+// class QStringList; // Removed: typedef in Qt6
 class QVariant;
 QT_END_NAMESPACE
 
diff --git a/src/dbus_speech_adaptor.h b/src/dbus_speech_adaptor.h
index 564641c7..056b146e 100644
--- a/src/dbus_speech_adaptor.h
+++ b/src/dbus_speech_adaptor.h
@@ -19,7 +19,7 @@ class QByteArray;
 template<class T> class QList;
 template<class Key, class Value> class QMap;
 class QString;
-class QStringList;
+// class QStringList; // Removed: typedef in Qt6
 class QVariant;
 QT_END_NAMESPACE
 
diff --git a/src/dsnote_app.cpp b/src/dsnote_app.cpp
index 0490590f..ee91ed81 100644
--- a/src/dsnote_app.cpp
+++ b/src/dsnote_app.cpp
@@ -14,7 +14,7 @@
 #include <QFile>
 #include <QGuiApplication>
 #include <QKeySequence>
-#include <QRegExp>
+#include <QRegularExpression>
 #include <QTextStream>
 #include <QTimer>
 #include <algorithm>
@@ -477,13 +477,11 @@ dsnote_app::dsnote_app(QObject *parent)
 }
 
 void dsnote_app::create_player() {
-    m_player = std::make_unique<QMediaPlayer>(QObject::parent(),
-                                              QMediaPlayer::LowLatency);
-    m_player->setNotifyInterval(100);
+    m_player = std::make_unique<QMediaPlayer>(QObject::parent());
 
     connect(
-        m_player.get(), &QMediaPlayer::stateChanged, this,
-        [this](QMediaPlayer::State state) {
+        m_player.get(), &QMediaPlayer::playbackStateChanged, this,
+        [this](QMediaPlayer::PlaybackState state) {
             qDebug() << "player state changed:" << state;
             emit player_playing_changed();
         },
@@ -738,11 +736,12 @@ settings::trans_rule_flags_t dsnote_app::apply_trans_rule(
                     ? rule.flags
                     : trans_rule_flags_t::TransRuleNone;
             break;
-        case trans_rule_type_t::TransRuleTypeMatchRe:
-            rule_matches = text.contains(
-                QRegExp{rule.pattern,
-                        case_sens ? Qt::CaseSensitive : Qt::CaseInsensitive});
+        case trans_rule_type_t::TransRuleTypeMatchRe: {
+            QRegularExpression::PatternOptions opts = QRegularExpression::NoPatternOption;
+            if (!case_sens) opts |= QRegularExpression::CaseInsensitiveOption;
+            rule_matches = text.contains(QRegularExpression{rule.pattern, opts});
             break;
+        }
         case trans_rule_type_t::TransRuleTypeReplaceSimple: {
             rule_matches =
                 text.contains(rule.pattern, case_sens ? Qt::CaseSensitive
@@ -757,29 +756,30 @@ settings::trans_rule_flags_t dsnote_app::apply_trans_rule(
             auto replace = rule.replace;
             replace.replace("\\n", "\n");
 
-            QRegExp rx{rule.pattern,
-                       case_sens ? Qt::CaseSensitive : Qt::CaseInsensitive};
+            QRegularExpression::PatternOptions opts = QRegularExpression::NoPatternOption;
+            if (!case_sens) opts |= QRegularExpression::CaseInsensitiveOption;
+            QRegularExpression rx{rule.pattern, opts};
 
             rule_matches = text.contains(rx);
             if (rule_matches) {
                 if (!rule.replace.contains("\\U") && !replace.contains("\\u")) {
                     text.replace(rx, replace);
                 } else {
-                    int pos = 0;
+                    qsizetype pos = 0;
+                    QRegularExpressionMatch match;
                     while (pos < text.size() &&
-                           (pos = rx.indexIn(text, pos)) != -1 &&
-                           rx.matchedLength() > 0) {
+                           (match = rx.match(text, pos)).hasMatch()) {
                         QString after = replace;
-                        for (int i = 1; i < rx.captureCount() + 1; ++i) {
+                        for (int i = 1; i <= match.lastCapturedIndex(); ++i) {
                             after.replace(QStringLiteral("\\U\\%1").arg(i),
-                                          rx.cap(i).toUpper());
+                                          match.captured(i).toUpper());
                             after.replace(QStringLiteral("\\u\\%1").arg(i),
-                                          rx.cap(i).toLower());
+                                          match.captured(i).toLower());
                             after.replace(QStringLiteral("\\%1").arg(i),
-                                          rx.cap(i));
+                                          match.captured(i));
                         }
-                        text.replace(pos, rx.matchedLength(), after);
-                        pos += after.size();
+                        text.replace(match.capturedStart(), match.capturedLength(), after);
+                        pos = match.capturedStart() + after.size();
                     }
                 }
             }
@@ -868,7 +868,7 @@ QVariantList dsnote_app::test_trans_rule(unsigned int flags,
 }
 
 bool dsnote_app::trans_rule_re_pattern_valid(const QString &pattern) {
-    return QRegExp{pattern}.isValid();
+    return QRegularExpression{pattern}.isValid();
 }
 
 void dsnote_app::update_trans_rule(int index, unsigned int flags,
@@ -1927,7 +1927,7 @@ void dsnote_app::update_available_tts_ref_voices() {
     QVariantMap new_available_tts_ref_voices_map{};
 
     const auto ref_voices_dir =
-        QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)}
+        QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)}
             .filePath(s_ref_voices_dir_name);
 
     auto scan_ref_voices = [&] {
@@ -2873,7 +2873,7 @@ void dsnote_app::play_speech() {
 }
 
 void dsnote_app::play_speech_selected(int start, int end) {
-    auto size = note().size();
+    auto size = static_cast<int>(note().size());
 
     if (size == 0) return;
 
@@ -2893,7 +2893,7 @@ void dsnote_app::play_speech_selected(int start, int end) {
 
 void dsnote_app::play_speech_translator_selected(int start, int end,
                                                  bool transtalated) {
-    auto size = transtalated ? m_translated_text.size() : note().size();
+    auto size = static_cast<int>(transtalated ? m_translated_text.size() : note().size());
 
     if (size == 0) return;
 
@@ -3027,7 +3027,7 @@ void dsnote_app::handle_translate_delayed() {
 }
 
 void dsnote_app::translate_selected(int start, int end) {
-    auto size = note().size();
+    auto size = static_cast<int>(note().size());
 
     if (size == 0) return;
 
@@ -5254,7 +5254,7 @@ QString dsnote_app::import_ref_voice_file_path() {
 void dsnote_app::player_stop_voice_ref() {
     if (!m_player) return;
 
-    m_player->setMedia({});
+    m_player->setSource(QUrl{});
 
     m_player_current_voice_ref_idx = -1;
     emit player_current_voice_ref_idx_changed();
@@ -5329,7 +5329,7 @@ void dsnote_app::player_import_rec() {
 void dsnote_app::player_set_path(const QString &wav_file_path) {
     if (!m_player) create_player();
 
-    m_player->setMedia(
+    m_player->setSource(
         QUrl{QStringLiteral("gst-pipeline: filesrc location=%1 ! wavparse ! "
                             "audioconvert ! alsasink")
                  .arg(wav_file_path)});
@@ -5351,10 +5351,12 @@ QString dsnote_app::tts_ref_voice_unique_name(QString name,
     if (!add_number && !names.contains(name)) return name;
 
     int i = 1;
-    QRegExp rx{"\\d+$"};
-    if (auto idx = rx.indexIn(name); idx >= 0) {
+    QRegularExpression rx{QStringLiteral("\\d+$")};
+    QRegularExpressionMatch match = rx.match(name);
+    if (match.hasMatch()) {
+        auto idx = match.capturedStart();
         bool ok = false;
-        auto ii = name.midRef(idx).toInt(&ok);
+        auto ii = QStringView{name}.mid(idx).toInt(&ok);
         if (ok && ii < 99999) {
             i = ii;
             name = name.mid(0, idx) + "%1";
@@ -5381,7 +5383,7 @@ void dsnote_app::player_export_ref_voice(long long start, long long stop,
                                          const QString &name,
                                          const QString &text) {
     QDir ref_voices_dir{
-        QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)}
+        QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)}
             .filePath(s_ref_voices_dir_name)};
 
     QString out_file_path;
@@ -5428,7 +5430,7 @@ void dsnote_app::player_reset() {
     if (!m_player) return;
 
     QFile{import_ref_voice_file_path()}.remove();
-    m_player->setMedia({});
+    m_player->setSource(QUrl{});
 }
 
 bool dsnote_app::player_ready() const {
@@ -5487,7 +5489,7 @@ void dsnote_app::recorder_stop() {
 void dsnote_app::recorder_reset() { m_recorder.reset(); }
 
 bool dsnote_app::player_playing() const {
-    return m_player && m_player->state() == QMediaPlayer::State::PlayingState;
+    return m_player && m_player->playbackState() == QMediaPlayer::PlaybackState::PlayingState;
 }
 
 void dsnote_app::player_set_position(long long position) {
diff --git a/src/fake_keyboard.cpp b/src/fake_keyboard.cpp
index 0facd62f..7985ab75 100644
--- a/src/fake_keyboard.cpp
+++ b/src/fake_keyboard.cpp
@@ -13,7 +13,6 @@
 
 #include <fmt/format.h>
 #include <linux/uinput.h>
-#include <qpa/qplatformnativeinterface.h>
 #include <sys/epoll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -567,7 +566,10 @@ void fake_keyboard::init_ydo() {
     if (!m_xkb_ctx) throw std::runtime_error{"no xkb context"};
 
 #ifdef USE_X11_FEATURES
-    auto *xcb_conn = QX11Info::connection();
+    xcb_connection_t *xcb_conn = nullptr;
+    if (auto *x11App = qGuiApp->nativeInterface<QNativeInterface::QX11Application>()) {
+        xcb_conn = x11App->connection();
+    }
     if (xcb_conn) {
         auto device_id = xkb_x11_get_core_keyboard_device_id(xcb_conn);
         if (device_id == -1) throw std::runtime_error{"no xkb keyboard"};
@@ -756,10 +758,13 @@ void fake_keyboard::send_text_xdo(const QString &text) {
 void fake_keyboard::init_legacy() {
     LOGD("using legacy fake-keyboard");
 
-    m_x11_display = QX11Info::display();
+    auto *x11App = qGuiApp->nativeInterface<QNativeInterface::QX11Application>();
+    if (!x11App) throw std::runtime_error{"no x11 application"};
+
+    m_x11_display = x11App->display();
     if (!m_x11_display) throw std::runtime_error{"no x11 display"};
 
-    m_xcb_conn = QX11Info::connection();
+    m_xcb_conn = x11App->connection();
     if (!m_xcb_conn) throw std::runtime_error{"no xcb connection"};
 
     auto device_id = xkb_x11_get_core_keyboard_device_id(m_xcb_conn);
@@ -818,11 +823,12 @@ void fake_keyboard::init_legacy() {
 void fake_keyboard::init_xdo() {
     LOGD("using xdo fake-keyboard");
 
-    if (!QX11Info::display()) {
+    auto *x11App = qGuiApp->nativeInterface<QNativeInterface::QX11Application>();
+    if (!x11App || !x11App->display()) {
         LOGF("no x11 display");
     }
 
-    m_xdo = xdo_new_with_opened_display(QX11Info::display(), nullptr, 0);
+    m_xdo = xdo_new_with_opened_display(x11App->display(), nullptr, 0);
     if (!m_xdo) {
         LOGF("can't create xdo");
     }
@@ -947,14 +953,13 @@ void fake_keyboard::connect_wayland() {
 
     std::lock_guard lock{m_wl_mtx};
 
-    auto *native = QGuiApplication::platformNativeInterface();
-    if (!native) {
-        LOGW("can't get native interface");
+    auto *waylandApp = qGuiApp->nativeInterface<QNativeInterface::QWaylandApplication>();
+    if (!waylandApp) {
+        LOGW("can't get wayland native interface");
         return;
     }
 
-    m_wl_display = static_cast<wl_display *>(
-        native->nativeResourceForIntegration("display"));
+    m_wl_display = waylandApp->display();
     if (!m_wl_display) {
         LOGW("can't get wl display interface");
         return;
diff --git a/src/fake_keyboard.hpp b/src/fake_keyboard.hpp
index 968e1598..ddaf5085 100644
--- a/src/fake_keyboard.hpp
+++ b/src/fake_keyboard.hpp
@@ -21,8 +21,8 @@
 #include <thread>
 
 #ifdef USE_X11_FEATURES
-#include <QX11Info>
-
+struct _XDisplay;
+typedef struct _XDisplay Display;
 struct xcb_connection_t;
 struct xkb_context;
 struct xkb_keymap;
diff --git a/src/main.cpp b/src/main.cpp
index 3586c2ea..3f52edfa 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -16,7 +16,7 @@
 #include <QQmlContext>
 #include <QString>
 #include <QStringList>
-#include <QTextCodec>
+
 #include <QTranslator>
 #include <QUrl>
 #include <csignal>
@@ -520,7 +520,6 @@ int main(int argc, char* argv[]) {
 #ifdef USE_SFOS
     const auto& app = *SailfishApp::application(argc, argv);
 #else
-    QCoreApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
     QApplication app(argc, argv);
     QGuiApplication::setWindowIcon(QIcon{QStringLiteral(":/app_icon.svg")});
 #endif
diff --git a/src/mic_source.cpp b/src/mic_source.cpp
index 8394e1cf..85fd3f40 100644
--- a/src/mic_source.cpp
+++ b/src/mic_source.cpp
@@ -8,6 +8,8 @@
 #include "mic_source.h"
 
 #include <QAudioFormat>
+#include <QAudioDevice>
+#include <QMediaDevices>
 #include <QDebug>
 
 mic_source::mic_source(const QString& preferred_audio_input, QObject* parent)
@@ -33,38 +35,37 @@ void mic_source::stop() {
 }
 
 void mic_source::slowdown() {
-    // do notning
 }
 
 void mic_source::speedup() {
-    // do notning
 }
 
 static QAudioFormat audio_format() {
     QAudioFormat format;
     format.setSampleRate(16000);
     format.setChannelCount(1);
-    format.setSampleSize(16);
-    format.setCodec(QStringLiteral("audio/pcm"));
-    format.setByteOrder(QAudioFormat::LittleEndian);
-    format.setSampleType(QAudioFormat::SignedInt);
+    format.setSampleFormat(QAudioFormat::Int16);
 
     return format;
 }
 
 static bool has_audio_input(const QString& name) {
-    auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput);
+    auto ad_list = QMediaDevices::audioInputs();
     return std::find_if(ad_list.cbegin(), ad_list.cend(),
                         [&name](const auto& ad) {
-                            return ad.deviceName() == name;
+                            return ad.description() == name;
                         }) != ad_list.cend();
 }
 
-static QAudioDeviceInfo audio_input_info(const QString& name) {
-    auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput);
-    return *std::find_if(
+static QAudioDevice audio_input_info(const QString& name) {
+    auto ad_list = QMediaDevices::audioInputs();
+    auto it = std::find_if(
         ad_list.cbegin(), ad_list.cend(),
-        [&name](const auto& ad) { return ad.deviceName() == name; });
+        [&name](const auto& ad) { return ad.description() == name; });
+    if (it != ad_list.cend()) {
+        return *it;
+    }
+    return QMediaDevices::defaultAudioInput();
 }
 
 QStringList mic_source::audio_inputs() {
@@ -72,12 +73,12 @@ QStringList mic_source::audio_inputs() {
 
     auto format = audio_format();
 
-    auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput);
+    auto ad_list = QMediaDevices::audioInputs();
     qDebug() << "supported audio input devices:";
     for (const auto& ad : ad_list) {
         if (ad.isFormatSupported(format)) {
-            qDebug() << ad.deviceName();
-            list.push_back(ad.deviceName());
+            qDebug() << ad.description();
+            list.push_back(ad.description());
         }
     }
 
@@ -92,34 +93,34 @@ void mic_source::init_audio(const QString& preferred_audio_input) {
     auto input_name{preferred_audio_input};
     if (preferred_audio_input.isEmpty() ||
         !has_audio_input(preferred_audio_input)) {
-        auto info = QAudioDeviceInfo::defaultInputDevice();
+        auto info = QMediaDevices::defaultAudioInput();
         if (info.isNull()) {
             qWarning() << "no audio input";
             throw std::runtime_error("no audio input");
         }
 
-        input_name = info.deviceName();
+        input_name = info.description();
     }
 
     auto input_info = audio_input_info(input_name);
     if (!input_info.isFormatSupported(format)) {
         qWarning() << "format not supported for audio input:"
-                   << input_info.deviceName();
+                   << input_info.description();
         throw std::runtime_error("audio format is not supported");
     }
 
-    qDebug() << "using audio input:" << input_info.deviceName()
+    qDebug() << "using audio input:" << input_info.description()
              << "(preferred was " << preferred_audio_input << ")";
-    m_audio_input = std::make_unique<QAudioInput>(input_info, format);
+    m_audio_input = std::make_unique<QAudioSource>(input_info, format);
 
-    connect(m_audio_input.get(), &QAudioInput::stateChanged, this,
+    connect(m_audio_input.get(), &QAudioSource::stateChanged, this,
             &mic_source::handle_state_changed);
 }
 
 void mic_source::start() {
     m_audio_device = m_audio_input->start();
 
-    m_timer.setInterval(200);  // 200 ms
+    m_timer.setInterval(200);
     connect(&m_timer, &QTimer::timeout, this, &mic_source::handle_read_timeout);
     m_timer.start();
 }
@@ -145,11 +146,6 @@ void mic_source::handle_read_timeout() {
     if (m_stopped && m_audio_input->state() != QAudio::State::SuspendedState)
         stop();
 
-    /*bool bytes_available = !m_eof || m_audio_input->bytesReady() > 0;
-    qDebug() << "mic read timeout: b_avai=" << bytes_available
-             << "eof=" << m_eof << "ended=" << m_ended << "sof=" << m_sof
-             << "b_ready=" << m_audio_input->bytesReady();*/
-
     if (m_ended) {
         emit ended();
         m_timer.stop();
@@ -159,24 +155,12 @@ void mic_source::handle_read_timeout() {
     emit audio_available();
 }
 
-void mic_source::clear() {
-    qDebug() << "mic clear";
-
-    char buff[std::numeric_limits<short>::max()];
-    while (m_audio_device->read(buff, std::numeric_limits<short>::max()))
-        continue;
-}
-
-audio_source::audio_data mic_source::read_audio(char* buf, size_t max_size) {
+mic_source::audio_data mic_source::read_audio(char* buf, size_t max_size) {
     audio_data data;
     data.data = buf;
     data.sof = m_sof;
 
-    bool bytes_available = !m_eof || m_audio_input->bytesReady() > 0;
-
-    /*qDebug() << "read_audio: b_avai=" << bytes_available << "eof=" << m_eof
-             << "ended=" << m_ended << "sof=" << m_sof
-             << "b_ready=" << m_audio_input->bytesReady();*/
+    bool bytes_available = !m_eof || m_audio_input->bytesAvailable() > 0;
 
     if (!bytes_available) {
         data.eof = m_eof;
@@ -184,7 +168,7 @@ audio_source::audio_data mic_source::read_audio(char* buf, size_t max_size) {
         return data;
     }
 
-    data.size = m_audio_device->read(buf, max_size);
+    data.size = static_cast<size_t>(m_audio_device->read(buf, static_cast<qint64>(max_size)));
     data.eof = m_eof && !bytes_available;
 
     m_sof = false;
@@ -193,3 +177,5 @@ audio_source::audio_data mic_source::read_audio(char* buf, size_t max_size) {
 
     return data;
 }
+
+void mic_source::clear() { m_audio_device->readAll(); }
diff --git a/src/mic_source.h b/src/mic_source.h
index a4fec016..a5b7a141 100644
--- a/src/mic_source.h
+++ b/src/mic_source.h
@@ -8,7 +8,7 @@
 #ifndef MIC_SOURCE_H
 #define MIC_SOURCE_H
 
-#include <QAudioInput>
+#include <QAudioSource>
 #include <QIODevice>
 #include <QObject>
 #include <QStringList>
@@ -33,7 +33,7 @@ class mic_source : public audio_source {
     static QStringList audio_inputs();
 
    private:
-    std::unique_ptr<QAudioInput> m_audio_input;
+    std::unique_ptr<QAudioSource> m_audio_input;
     QTimer m_timer;
     QIODevice* m_audio_device = nullptr;
     bool m_eof = false;
diff --git a/src/models_manager.cpp b/src/models_manager.cpp
index 35f8311c..2517baeb 100644
--- a/src/models_manager.cpp
+++ b/src/models_manager.cpp
@@ -814,7 +814,8 @@ void models_manager::download(const QString& id, download_type type, int part,
                     : model.size;
 
     QNetworkRequest request{url};
-    request.setAttribute(QNetworkRequest::FollowRedirectsAttribute, true);
+    request.setAttribute(QNetworkRequest::RedirectPolicyAttribute,
+                         QNetworkRequest::NoLessSafeRedirectPolicy);
 
     if (type == download_type::all || type == download_type::model_sup) {
         path = model_path(model.file_name);
@@ -1354,7 +1355,7 @@ void models_manager::init_config() {
     }
 
     QString data_dir{
-        QStandardPaths::writableLocation(QStandardPaths::DataLocation)};
+        QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)};
     QDir dir{data_dir};
     if (!dir.exists())
         if (!dir.mkpath(data_dir)) qWarning() << "failed to create data dir";
@@ -2724,7 +2725,7 @@ void models_manager::reset_models() {
     qDebug() << "removing models file";
 
     auto models_file_path =
-        QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)}
+        QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)}
             .filePath(models_file);
 
     QFile{models_file_path}.remove();
@@ -2734,7 +2735,7 @@ void models_manager::parse_models_file(
     bool reset, langs_t* langs, packs_t* packs, models_t* models,
     std::optional<models_availability_t> models_availability) {
     const auto models_file_path =
-        QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)}
+        QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)}
             .filePath(models_file);
     if (!QFile::exists(models_file_path)) init_config();
 
diff --git a/src/module_tools.cpp b/src/module_tools.cpp
index 1aad41df..b1060aac 100644
--- a/src/module_tools.cpp
+++ b/src/module_tools.cpp
@@ -44,7 +44,7 @@ static QString runtime_prefix() {
 namespace module_tools {
 QString unpacked_dir(const QString& name) {
     return QStringLiteral("%1/%2").arg(
-        QStandardPaths::writableLocation(QStandardPaths::DataLocation), name);
+        QStandardPaths::writableLocation(QStandardPaths::AppDataLocation), name);
 }
 
 bool init_module(const QString& name) {
@@ -158,7 +158,7 @@ bool unpack_module(const QString& name) {
     }
 
     auto unpack_dir =
-        QStandardPaths::writableLocation(QStandardPaths::DataLocation);
+        QStandardPaths::writableLocation(QStandardPaths::AppDataLocation);
     auto unpack_file = QStringLiteral("%1/%2.tar").arg(unpack_dir, name);
 
     QDir{QStringLiteral("%1/%2").arg(unpack_dir, name)}.removeRecursively();
diff --git a/src/py_tools.cpp b/src/py_tools.cpp
index 9335f6b1..53133096 100644
--- a/src/py_tools.cpp
+++ b/src/py_tools.cpp
@@ -380,7 +380,7 @@ bool init_module() {
     if (!module_tools::init_module(QStringLiteral("python"))) return false;
 
     auto py_path =
-        QStandardPaths::writableLocation(QStandardPaths::DataLocation) + "/" +
+        QStandardPaths::writableLocation(QStandardPaths::AppDataLocation) + "/" +
         python_site_path;
 
     qDebug() << "setting env PYTHONPATH=" << py_path;
diff --git a/src/recorder.cpp b/src/recorder.cpp
index b0b61785..a8119172 100644
--- a/src/recorder.cpp
+++ b/src/recorder.cpp
@@ -8,6 +8,8 @@
 #include "recorder.hpp"
 
 #include <QAudioFormat>
+#include <QAudioDevice>
+#include <QMediaDevices>
 #include <QDebug>
 #include <QFileInfo>
 #include <chrono>
@@ -17,18 +19,22 @@
 #include "settings.h"
 
 static bool has_audio_input(const QString& name) {
-    auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput);
+    auto ad_list = QMediaDevices::audioInputs();
     return std::find_if(ad_list.cbegin(), ad_list.cend(),
                         [&name](const auto& ad) {
-                            return ad.deviceName() == name;
+                            return ad.description() == name;
                         }) != ad_list.cend();
 }
 
-static QAudioDeviceInfo audio_input_info(const QString& name) {
-    auto ad_list = QAudioDeviceInfo::availableDevices(QAudio::AudioInput);
-    return *std::find_if(
+static QAudioDevice audio_input_info(const QString& name) {
+    auto ad_list = QMediaDevices::audioInputs();
+    auto it = std::find_if(
         ad_list.cbegin(), ad_list.cend(),
-        [&name](const auto& ad) { return ad.deviceName() == name; });
+        [&name](const auto& ad) { return ad.description() == name; });
+    if (it != ad_list.cend()) {
+        return *it;
+    }
+    return QMediaDevices::defaultAudioInput();
 }
 
 recorder::recorder(QString wav_file_path, QObject* parent)
@@ -72,35 +78,31 @@ void recorder::init() {
 
         auto input_name = settings::instance()->audio_input_device();
         if (input_name.isEmpty() || !has_audio_input(input_name)) {
-            auto info = QAudioDeviceInfo::defaultInputDevice();
+            auto info = QMediaDevices::defaultAudioInput();
             if (info.isNull()) {
                 qWarning() << "no audio input";
                 throw std::runtime_error("no audio input");
             }
 
-            input_name = info.deviceName();
+            input_name = info.description();
         }
 
         auto input_info = audio_input_info(input_name);
         if (!input_info.isFormatSupported(format)) {
             qWarning() << "format not supported for audio input:"
-                       << input_info.deviceName();
+                       << input_info.description();
             throw std::runtime_error("audio format is not supported");
         }
 
-        qDebug() << "using audio input:" << input_info.deviceName();
-        m_audio_input = std::make_unique<QAudioInput>(input_info, format);
+        qDebug() << "using audio input:" << input_info.description();
+        m_audio_input = std::make_unique<QAudioSource>(input_info, format);
 
-        connect(m_audio_input.get(), &QAudioInput::stateChanged, this,
+        connect(m_audio_input.get(), &QAudioSource::stateChanged, this,
                 [this](QAudio::State new_state) {
                     qDebug() << "recorder state:" << new_state;
 
                     emit recording_changed();
                 });
-        connect(m_audio_input.get(), &QAudioInput::notify, this, [this]() {
-            m_duration = m_audio_input->elapsedUSecs() / 1000000;
-            emit duration_changed();
-        });
     }
 }
 
@@ -108,10 +110,7 @@ QAudioFormat recorder::make_audio_format() {
     QAudioFormat format;
     format.setSampleRate(m_sample_rate);
     format.setChannelCount(m_num_channels);
-    format.setSampleSize(16);
-    format.setCodec(QStringLiteral("audio/pcm"));
-    format.setByteOrder(QAudioFormat::LittleEndian);
-    format.setSampleType(QAudioFormat::SignedInt);
+    format.setSampleFormat(QAudioFormat::Int16);
 
     return format;
 }
diff --git a/src/recorder.hpp b/src/recorder.hpp
index 0456c785..450140a6 100644
--- a/src/recorder.hpp
+++ b/src/recorder.hpp
@@ -9,7 +9,7 @@
 #define RECORDER_H
 
 #include <QAudioFormat>
-#include <QAudioInput>
+#include <QAudioSource>
 #include <QFile>
 #include <QIODevice>
 #include <QObject>
@@ -61,7 +61,7 @@ class recorder final : public QObject {
         uint32_t data_size = 0;
     };
 
-    std::unique_ptr<QAudioInput> m_audio_input;
+    std::unique_ptr<QAudioSource> m_audio_input;
     QString m_input_file_path;
     QString m_wav_file_path;
     QFile m_audio_device;
diff --git a/src/settings.cpp b/src/settings.cpp
index 6846e510..9f5b289f 100644
--- a/src/settings.cpp
+++ b/src/settings.cpp
@@ -18,7 +18,7 @@
 #include <QFile>
 #include <QFileInfo>
 #include <QGuiApplication>
-#include <QRegExp>
+#include <QRegularExpression>
 #include <QStandardPaths>
 #include <QVariant>
 #include <QVariantList>
@@ -33,6 +33,19 @@
 #include "module_tools.hpp"
 #include "qtlogger.hpp"
 
+#ifdef USE_DESKTOP
+static QStringList qt6_available_styles() {
+    return QStringList{
+        QStringLiteral("Basic"),
+        QStringLiteral("Fusion"),
+        QStringLiteral("Material"),
+        QStringLiteral("Universal"),
+        QStringLiteral("org.kde.desktop"),
+        QStringLiteral("org.kde.breeze")
+    };
+}
+#endif
+
 QDebug operator<<(QDebug d, settings::mode_t mode) {
     switch (mode) {
         case settings::mode_t::Stt:
@@ -540,10 +553,12 @@ static QString file_save_filename(const QDir& dir, QString filename,
             return filename + '.' + ext;
         }
 
-        QRegExp rx{"\\d+$"};
-        if (auto idx = rx.indexIn(filename); idx >= 0) {
+        QRegularExpression rx{QStringLiteral("\\d+$")};
+        QRegularExpressionMatch match = rx.match(filename);
+        if (match.hasMatch()) {
+            auto idx = match.capturedStart();
             bool ok = false;
-            auto ii = filename.midRef(idx).toInt(&ok);
+            auto ii = QStringView{filename}.mid(idx).toInt(&ok);
             if (ok && ii < max_i) {
                 i = ii;
                 filename = filename.mid(0, idx) + "%1." + ext;
@@ -1009,7 +1024,7 @@ int settings::qt_style_idx() const {
 #ifdef USE_DESKTOP
     auto name = qt_style_name();
 
-    auto styles = QQuickStyle::availableStyles();
+    auto styles = qt6_available_styles();
 
     if (name.isEmpty()) return styles.size();
 
@@ -1020,7 +1035,7 @@ int settings::qt_style_idx() const {
 
 void settings::set_qt_style_idx([[maybe_unused]] int value) {
 #ifdef USE_DESKTOP
-    auto styles = QQuickStyle::availableStyles();
+    auto styles = qt6_available_styles();
 
     if (value < 0 || value >= styles.size()) {
         set_qt_style_name({});
@@ -1036,7 +1051,7 @@ QString settings::qt_style_name() const {
     auto name =
         value(QStringLiteral("qt_style_name"), default_qt_style).toString();
 
-    if (!QQuickStyle::availableStyles().contains(name)) return {};
+    if (!qt6_available_styles().contains(name)) return {};
 
     return name;
 #else
@@ -1046,7 +1061,7 @@ QString settings::qt_style_name() const {
 
 void settings::set_qt_style_name([[maybe_unused]] QString name) {
 #ifdef USE_DESKTOP
-    if (!QQuickStyle::availableStyles().contains(name)) name.clear();
+    if (!qt6_available_styles().contains(name)) name.clear();
 
     if (qt_style_name() != name) {
         setValue(QStringLiteral("qt_style_name"), name);
@@ -1323,7 +1338,7 @@ QString settings::audio_format_str() const {
 
 QStringList settings::qt_styles() const {
 #ifdef USE_DESKTOP
-    auto styles = QQuickStyle::availableStyles();
+    auto styles = qt6_available_styles();
     styles.append(tr("Don't force any style"));
     return styles;
 #else
@@ -1364,17 +1379,9 @@ void settings::update_qt_style(QQmlApplicationEngine* engine) {
         engine->addImportPath(QStringLiteral("%1/qml").arg(prefix));
     }
 
-    if (auto prefix = module_tools::path_to_dir_for_path(
-            QStringLiteral("lib"), QStringLiteral("qml/QtQuick/Controls.2"));
-        !prefix.isEmpty()) {
-        QQuickStyle::addStylePath(
-            QStringLiteral("%1/qml/QtQuick/Controls.2").arg(prefix));
-    }
-
-    auto styles = QQuickStyle::availableStyles();
+    auto styles = qt6_available_styles();
 
     LOGD("available styles: " << styles);
-    LOGD("style paths: " << QQuickStyle::stylePathList());
     LOGD("import paths: " << engine->importPathList());
     LOGD("library paths: " << QCoreApplication::libraryPaths());
 
@@ -1750,6 +1757,7 @@ X(whispercpp)
     }
 X(whispercpp)
 X(fasterwhisper)
+X(canary)
 #undef X
 
 #define X(name, enabled)                                                      \
diff --git a/src/settings.h b/src/settings.h
index 2c91ae7d..3d25ba1e 100644
--- a/src/settings.h
+++ b/src/settings.h
@@ -65,6 +65,7 @@
 #define GPU_ENGINE_TABLE    \
     X(whispercpp, false)    \
     X(fasterwhisper, false) \
+    X(canary, true)         \
     X(coqui, true)          \
     X(whisperspeech, true)  \
     X(parler, true)         \
@@ -397,6 +398,7 @@ class settings : public QSettings, public singleton<settings> {
                    set_##name##_profile NOTIFY name##_changed)
     X(whispercpp)
     X(fasterwhisper)
+    X(canary)
 #undef X
 #define X(name, _)                                                           \
     Q_PROPERTY(bool name##_use_gpu READ name##_use_gpu WRITE                 \
@@ -996,6 +998,7 @@ class settings : public QSettings, public singleton<settings> {
     Q_INVOKABLE void reset_##name##_options();
     X(whispercpp)
     X(fasterwhisper)
+    X(canary)
 #undef X
 #define X(name, _)                                      \
     bool name##_use_gpu() const;                        \
@@ -1101,6 +1104,7 @@ class settings : public QSettings, public singleton<settings> {
 #define X(name) void name##_changed();
     X(whispercpp)
     X(fasterwhisper)
+    X(canary)
 #undef X
 #define X(name, _)                    \
     void name##_gpu_device_changed(); \
diff --git a/src/speech_service.cpp b/src/speech_service.cpp
index a376f207..f0d1cefd 100644
--- a/src/speech_service.cpp
+++ b/src/speech_service.cpp
@@ -263,7 +263,7 @@ speech_service::speech_service(QObject *parent)
     connect(
         this, &speech_service::requet_update_task_state, this,
         [this] { update_task_state(); }, Qt::QueuedConnection);
-    connect(&m_player, &QMediaPlayer::stateChanged, this,
+    connect(&m_player, &QMediaPlayer::playbackStateChanged, this,
             &speech_service::handle_player_state_changed, Qt::QueuedConnection);
     connect(
         settings::instance(), &settings::default_stt_model_changed, this,
@@ -2464,7 +2464,7 @@ void speech_service::play_beep(beep_role_t beep_role) {
     auto get_beep_file = [](const QString &name) {
         // get from user data dir
         auto file_user =
-            QDir{QStandardPaths::writableLocation(QStandardPaths::DataLocation)}
+            QDir{QStandardPaths::writableLocation(QStandardPaths::AppDataLocation)}
                 .filePath(name);
         qDebug() << "file user:" << file_user;
         if (QFileInfo::exists(file_user)) {
@@ -2499,15 +2499,15 @@ void speech_service::play_beep(beep_role_t beep_role) {
         return;
     }
 
-    m_beep_player.setMedia(QMediaContent{QUrl::fromLocalFile(beep_file)});
+    m_beep_player.setSource(QUrl::fromLocalFile(beep_file));
     m_beep_player.play();
 }
 
 void speech_service::handle_tts_queue() {
     if (m_tts_queue.empty()) return;
 
-    if (m_player.state() == QMediaPlayer::State::PlayingState ||
-        m_player.state() == QMediaPlayer::State::PausedState)
+    if (m_player.playbackState() == QMediaPlayer::PlaybackState::PlayingState ||
+        m_player.playbackState() == QMediaPlayer::PlaybackState::PausedState)
         return;
 
     if (m_current_task && m_current_task->paused) return;
@@ -2525,8 +2525,7 @@ void speech_service::handle_tts_queue() {
             result.remove_audio_file = true;
         }
 
-        m_player.setMedia(
-            QMediaContent{QUrl::fromLocalFile(result.audio_file_path)});
+        m_player.setSource(QUrl::fromLocalFile(result.audio_file_path));
 
         m_player.play();
 
@@ -2635,12 +2634,12 @@ void speech_service::handle_ttt_text_repaired(const QString &text,
 }
 
 void speech_service::handle_player_state_changed(
-    QMediaPlayer::State new_state) {
+    QMediaPlayer::PlaybackState new_state) {
     qDebug() << "player new state:" << new_state;
 
     update_task_state();
 
-    if (new_state == QMediaPlayer::State::StoppedState && m_current_task &&
+    if (new_state == QMediaPlayer::PlaybackState::StoppedState && m_current_task &&
         m_current_task->engine == engine_t::tts && !m_current_task->paused &&
         !m_tts_queue.empty()) {
         const auto &result = m_tts_queue.front();
@@ -3883,7 +3882,7 @@ int speech_service::tts_pause_speech(int task) {
 
     m_current_task->paused = true;
 
-    if (m_player.state() == QMediaPlayer::PlayingState) m_player.pause();
+    if (m_player.playbackState() == QMediaPlayer::PlaybackState::PlayingState) m_player.pause();
 
     update_task_state();
 
@@ -3916,7 +3915,7 @@ int speech_service::tts_resume_speech(int task) {
 
     m_current_task->paused = false;
 
-    if (m_player.state() == QMediaPlayer::PausedState) m_player.play();
+    if (m_player.playbackState() == QMediaPlayer::PlaybackState::PausedState) m_player.play();
 
     handle_tts_queue();
 
@@ -4126,7 +4125,7 @@ void speech_service::update_task_state() {
     // 6 = Canceling
 
     auto new_task_state = [&] {
-        if (m_player.state() == QMediaPlayer::State::PlayingState &&
+        if (m_player.playbackState() == QMediaPlayer::PlaybackState::PlayingState &&
             m_state == state_t::playing_speech) {
             return 4;
         } else if (m_stt_engine && m_stt_engine->started()) {
@@ -4140,8 +4139,8 @@ void speech_service::update_task_state() {
                 case stt_engine::speech_detection_status_t::no_speech:
                     break;
             }
-        } else if (m_player.state() == QMediaPlayer::State::PausedState ||
-                   (m_player.state() == QMediaPlayer::State::StoppedState &&
+        } else if (m_player.playbackState() == QMediaPlayer::PlaybackState::PausedState ||
+                   (m_player.playbackState() == QMediaPlayer::PlaybackState::StoppedState &&
                     m_state == state_t::playing_speech && m_current_task &&
                     m_current_task->paused)) {
             return 5;
diff --git a/src/speech_service.h b/src/speech_service.h
index ad8de372..65aabee0 100644
--- a/src/speech_service.h
+++ b/src/speech_service.h
@@ -446,7 +446,7 @@ class speech_service : public QObject, public singleton<speech_service> {
                                    double progress, bool last);
     void handle_tts_speech_encoded(tts_partial_result_t result);
     void handle_speech_to_file(const tts_partial_result_t &result);
-    void handle_player_state_changed(QMediaPlayer::State new_state);
+    void handle_player_state_changed(QMediaPlayer::PlaybackState new_state);
     void handle_audio_available();
     void handle_stt_engine_state_changed(
         stt_engine::speech_detection_status_t status, int task_id);
diff --git a/src/wl_clipboard.cpp b/src/wl_clipboard.cpp
index 4f043e68..991137bd 100644
--- a/src/wl_clipboard.cpp
+++ b/src/wl_clipboard.cpp
@@ -2,7 +2,7 @@
 
 #include <QProcess>
 #include <QStandardPaths>
-#include <QTextCodec>
+
 #include <optional>
 
 #include "logger.hpp"