feat: TTS with audio ducking
Some checks failed
TrafficCue CI / check (push) Has been cancelled
TrafficCue CI / build (push) Has been cancelled

This commit is contained in:
2025-06-25 13:21:31 +02:00
parent a2a8255ebf
commit dc4679f072
12 changed files with 374 additions and 23 deletions

View File

@@ -27,6 +27,7 @@
import * as Popover from "../ui/popover";
import { routing } from "$lib/services/navigation/routing.svelte";
import InRouteSidebar from "./sidebar/InRouteSidebar.svelte";
import say from "$lib/services/navigation/TTS";
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const views: Record<string, Component<any>> = {
@@ -197,6 +198,15 @@
>
Join Remote Location
</Button>
<Button
variant="outline"
onclick={async () => {
// await say("This is a test of the text to speech system.");
await say("Dies ist ein Test des Text-zu-Sprache-Systems.");
}}
>
Test TTS
</Button>
</div>
</Popover.Content>
</Popover.Root>

View File

@@ -0,0 +1,10 @@
import { registerPlugin } from "@capacitor/core";
export interface DuckPlugin {
duck: () => void;
unduck: () => void;
}
const Duck = registerPlugin<DuckPlugin>("Duck");
export default Duck;

View File

@@ -0,0 +1,135 @@
import * as tts from '@diffusionstudio/vits-web';
import TTSWorker from './TTSWorker.ts?worker';
// const VOICE = "en_US-hfc_female-medium";
const VOICE = "de_DE-thorsten-medium";
export async function downloadVoice(): Promise<void> {
await tts.download(VOICE, (progress) => {
console.log(`Downloading ${progress.url} - ${Math.round(progress.loaded * 100 / progress.total)}%`);
});
}
interface TTSItem {
text: string;
audio?: Blob; // Optional audio blob if already generated
shouldPlay?: boolean; // Flag to indicate if the audio should be played immediately
}
const queue: TTSItem[] = [];
let playing = false;
let generating = 0;
const worker = new TTSWorker();
worker.addEventListener('message', (event: MessageEvent<{ type: 'result', audio: Blob, text: string }>) => {
if (event.data.type != 'result') return;
// const audio = new Audio();
// audio.src = URL.createObjectURL(event.data.audio);
// audio.play();
// console.log("Audio playing");
// audio.onended = () => {
// playing = false;
// };
const item = queue.find(item => item.text === event.data.text);
if (item) {
item.audio = event.data.audio; // Set the audio blob for the item
generating--;
}
});
setInterval(() => {
// if(playing) return;
// if(queue[0]) {
// playing = true;
// const text = queue.shift();
// console.log("Speaking:", text);
// if(text) {
// // tts.predict({
// // text,
// // voiceId: VOICE,
// // }).then((wav) => {
// // const audio = new Audio();
// // audio.src = URL.createObjectURL(wav);
// // audio.play();
// // audio.onended = () => {
// // playing = false;
// // };
// // }).catch((error) => {
// // console.error("Error playing audio:", error);
// // });
// worker.postMessage({
// type: 'init',
// text,
// voiceId: VOICE
// });
// }
// }
// Pregenerate audio one at a time
if (generating != 0) return;
for (const item of queue) {
// Generate audio blob if it doesn't exist
if (!item.audio) {
generating++;
console.log("Generating audio for:", item.text);
worker.postMessage({
type: 'init',
text: item.text,
voiceId: VOICE
});
item.audio = undefined; // Reset audio to undefined until generated
}
}
}, 100);
setInterval(() => {
if (playing) return;
if (queue.length === 0) return;
for (const item of queue) {
if (item.shouldPlay && item.audio) {
playing = true;
const audio = new Audio();
audio.src = URL.createObjectURL(item.audio);
audio.play();
audio.onended = () => {
playing = false;
};
queue.splice(queue.indexOf(item), 1); // Remove item from queue after playing
return; // Exit after playing one item
}
}
}, 100);
export function queueSpeech(text: string) {
// const wav = await tts.predict({
// text,
// voiceId: VOICE,
// });
// const audio = new Audio();
// audio.src = URL.createObjectURL(wav);
// audio.play();
if (queue.some(item => item.text === text)) {
// console.warn("Text already in queue, not adding again:", text);
return;
}
console.log("Queuing text for speech:", text);
queue.push({
text,
shouldPlay: false
});
}
export function speak(text: string) {
const existingItem = queue.find(item => item.text === text);
if (existingItem) {
existingItem.shouldPlay = true;
} else {
console.warn("Adding new item to play immediately. Consider queuing instead!");
queue.push({
text,
shouldPlay: true
});
}
}

View File

@@ -0,0 +1,46 @@
import type { TextToSpeechPlugin } from "@capacitor-community/text-to-speech";
import { Capacitor } from "@capacitor/core";
import Duck from "../DuckPlugin";
export let tts: TextToSpeechPlugin | "web" | null = null;
export async function initTTS() {
if(Capacitor.isNativePlatform()) {
console.log("Using Capacitor TTS");
tts = (await import("@capacitor-community/text-to-speech")).TextToSpeech;
} else {
console.log("Using Web TTS");
tts = "web";
}
}
export default async function say(text: string) {
if(!tts) {
// alert("TTS not initialized");
// console.error("TTS not initialized");
await initTTS();
// return;
}
console.log("A");
Duck.duck();
console.log("B");
if(tts !== "web") {
try {
await tts?.speak({
text: text,
lang: "deu-default", // TODO: make this configurable
});
console.log("C");
} catch (e) {
console.error("Error speaking text", e);
alert(e);
}
} else {
const utterance = new SpeechSynthesisUtterance(text);
utterance.lang = "de-DE";
window.speechSynthesis.speak(utterance);
}
console.log("D");
Duck.unduck();
console.log("E");
}

View File

@@ -0,0 +1,16 @@
import * as tts from '@diffusionstudio/vits-web';
async function main(event: MessageEvent<tts.InferenceConfg & { type: 'init' }>) {
if (event.data?.type != 'init') return;
const start = performance.now();
const blob = await tts.predict({
text: event.data.text,
voiceId: event.data.voiceId,
});
console.log('Time taken:', performance.now() - start + ' ms');
self.postMessage({ type: 'result', audio: blob, text: event.data.text });
}
self.addEventListener('message', main);

View File

@@ -1,5 +1,6 @@
import { location } from "$lib/components/lnv/location.svelte";
import { map } from "$lib/components/lnv/map.svelte";
import say from "./TTS";
import type { ValhallaRequest } from "./ValhallaRequest";
import type { LngLatBoundsLike } from "maplibre-gl";
@@ -156,18 +157,21 @@ async function tickRoute() {
}
const bgi = currentManeuver.begin_shape_index;
const location = getUserLocation();
const loc = {
lat: location.lat,
lon: location.lng,
};
const polyline = decodePolyline(trip.legs[0].shape);
// Check if the user location is on the last point of the entire route
if (isOnPoint(location, polyline[polyline.length - 1])) {
if (isOnPoint(loc, polyline[polyline.length - 1])) {
console.log("Reached destination!");
stopNavigation();
return;
}
// Check if the user is on the route
if (!isOnShape(location, polyline)) {
if (!isOnShape(loc, polyline)) {
console.log("Off route!");
info.isOffRoute = true;
// TODO: Implement re-routing logic
@@ -180,20 +184,25 @@ async function tickRoute() {
currentManeuver.verbal_pre_transition_instruction &&
!hasAnnouncedPreInstruction
) {
const distanceToEnd = calculateDistance(location, polyline[bgi]);
const distanceToEnd = calculateDistance(loc, polyline[bgi]);
// console.log("Distance to end of current maneuver: ", distanceToEnd, " meters");
if (distanceToEnd <= 100) {
console.log("Speed: ", location.speed, " km/h");
const verbalDistance = verbalPreInstructionDistance(
location.speed || 50, // Assuming location has a speed property
);
if (distanceToEnd <= verbalDistance) {
hasAnnouncedPreInstruction = true;
console.log(
"[Verbal instruction] ",
currentManeuver.verbal_pre_transition_instruction,
);
say(currentManeuver.verbal_pre_transition_instruction);
}
}
// Check if the user is past the current maneuver
// Checks if the user is still on the current maneuver's polyline
if (!isOnShape(location, polyline.slice(bgi))) {
if (!isOnShape(loc, polyline.slice(bgi))) {
return; // User is not on the current maneuver's polyline, do not update
}
@@ -208,7 +217,7 @@ async function tickRoute() {
if (currentManeuver.verbal_post_transition_instruction) {
hasAnnouncedPreInstruction = false;
const distanceToEnd = calculateDistance(
location,
loc,
polyline[
trip.legs[0].maneuvers[routing.currentTripInfo.maneuverIdx + 1]
.begin_shape_index
@@ -219,6 +228,7 @@ async function tickRoute() {
"[Verbal instruction] ",
currentManeuver.verbal_post_transition_instruction,
);
say(currentManeuver.verbal_post_transition_instruction);
}
}
@@ -232,9 +242,16 @@ async function tickRoute() {
info.currentManeuver = trip.legs[0].maneuvers[info.maneuverIdx];
// queueSpeech(info.currentManeuver.verbal_pre_transition_instruction || "");
// queueSpeech(info.currentManeuver.verbal_post_transition_instruction || "");
// TODO: verbal instructions
}
function verbalPreInstructionDistance(speed: number): number {
return (speed * 2.222) + 37.144;
}
export function stopNavigation() {
if (routing.currentTripInfo.int) {
clearInterval(routing.currentTripInfo.int);
@@ -245,20 +262,20 @@ export function stopNavigation() {
removeAllRoutes();
}
function getUserLocation(): WorldLocation {
// return geolocate.currentLocation!;
return {
lat: location.lat,
lon: location.lng,
};
// const lnglat = window.geolocate._userLocationDotMarker.getLngLat();
// return { lat: lnglat.lat, lon: lnglat.lng };
// console.log(map.value!)
// return {
// lat: 0,
// lon: 0
// }
}
// function getUserLocation(): WorldLocation {
// // return geolocate.currentLocation!;
// return {
// lat: location.lat,
// lon: location.lng,
// };
// // const lnglat = window.geolocate._userLocationDotMarker.getLngLat();
// // return { lat: lnglat.lat, lon: lnglat.lng };
// // console.log(map.value!)
// // return {
// // lat: 0,
// // lon: 0
// // }
// }
function isOnLine(
location: WorldLocation,