feat: TTS with audio ducking
This commit is contained in:
@@ -27,6 +27,7 @@
|
||||
import * as Popover from "../ui/popover";
|
||||
import { routing } from "$lib/services/navigation/routing.svelte";
|
||||
import InRouteSidebar from "./sidebar/InRouteSidebar.svelte";
|
||||
import say from "$lib/services/navigation/TTS";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const views: Record<string, Component<any>> = {
|
||||
@@ -197,6 +198,15 @@
|
||||
>
|
||||
Join Remote Location
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
onclick={async () => {
|
||||
// await say("This is a test of the text to speech system.");
|
||||
await say("Dies ist ein Test des Text-zu-Sprache-Systems.");
|
||||
}}
|
||||
>
|
||||
Test TTS
|
||||
</Button>
|
||||
</div>
|
||||
</Popover.Content>
|
||||
</Popover.Root>
|
||||
|
||||
10
src/lib/services/DuckPlugin.ts
Normal file
10
src/lib/services/DuckPlugin.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { registerPlugin } from "@capacitor/core";
|
||||
|
||||
export interface DuckPlugin {
|
||||
duck: () => void;
|
||||
unduck: () => void;
|
||||
}
|
||||
|
||||
const Duck = registerPlugin<DuckPlugin>("Duck");
|
||||
|
||||
export default Duck;
|
||||
135
src/lib/services/navigation/TTS.ML.ts
Normal file
135
src/lib/services/navigation/TTS.ML.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import * as tts from '@diffusionstudio/vits-web';
|
||||
import TTSWorker from './TTSWorker.ts?worker';
|
||||
|
||||
// const VOICE = "en_US-hfc_female-medium";
|
||||
const VOICE = "de_DE-thorsten-medium";
|
||||
|
||||
export async function downloadVoice(): Promise<void> {
|
||||
await tts.download(VOICE, (progress) => {
|
||||
console.log(`Downloading ${progress.url} - ${Math.round(progress.loaded * 100 / progress.total)}%`);
|
||||
});
|
||||
}
|
||||
|
||||
interface TTSItem {
|
||||
text: string;
|
||||
audio?: Blob; // Optional audio blob if already generated
|
||||
shouldPlay?: boolean; // Flag to indicate if the audio should be played immediately
|
||||
}
|
||||
|
||||
const queue: TTSItem[] = [];
|
||||
let playing = false;
|
||||
let generating = 0;
|
||||
const worker = new TTSWorker();
|
||||
|
||||
worker.addEventListener('message', (event: MessageEvent<{ type: 'result', audio: Blob, text: string }>) => {
|
||||
if (event.data.type != 'result') return;
|
||||
|
||||
// const audio = new Audio();
|
||||
// audio.src = URL.createObjectURL(event.data.audio);
|
||||
// audio.play();
|
||||
// console.log("Audio playing");
|
||||
// audio.onended = () => {
|
||||
// playing = false;
|
||||
// };
|
||||
const item = queue.find(item => item.text === event.data.text);
|
||||
if (item) {
|
||||
item.audio = event.data.audio; // Set the audio blob for the item
|
||||
generating--;
|
||||
}
|
||||
});
|
||||
|
||||
setInterval(() => {
|
||||
// if(playing) return;
|
||||
// if(queue[0]) {
|
||||
// playing = true;
|
||||
// const text = queue.shift();
|
||||
// console.log("Speaking:", text);
|
||||
// if(text) {
|
||||
// // tts.predict({
|
||||
// // text,
|
||||
// // voiceId: VOICE,
|
||||
// // }).then((wav) => {
|
||||
// // const audio = new Audio();
|
||||
// // audio.src = URL.createObjectURL(wav);
|
||||
// // audio.play();
|
||||
// // audio.onended = () => {
|
||||
// // playing = false;
|
||||
// // };
|
||||
// // }).catch((error) => {
|
||||
// // console.error("Error playing audio:", error);
|
||||
// // });
|
||||
// worker.postMessage({
|
||||
// type: 'init',
|
||||
// text,
|
||||
// voiceId: VOICE
|
||||
// });
|
||||
// }
|
||||
// }
|
||||
// Pregenerate audio one at a time
|
||||
if (generating != 0) return;
|
||||
for (const item of queue) {
|
||||
// Generate audio blob if it doesn't exist
|
||||
if (!item.audio) {
|
||||
generating++;
|
||||
console.log("Generating audio for:", item.text);
|
||||
worker.postMessage({
|
||||
type: 'init',
|
||||
text: item.text,
|
||||
voiceId: VOICE
|
||||
});
|
||||
item.audio = undefined; // Reset audio to undefined until generated
|
||||
}
|
||||
}
|
||||
}, 100);
|
||||
|
||||
setInterval(() => {
|
||||
if (playing) return;
|
||||
if (queue.length === 0) return;
|
||||
|
||||
for (const item of queue) {
|
||||
if (item.shouldPlay && item.audio) {
|
||||
playing = true;
|
||||
const audio = new Audio();
|
||||
audio.src = URL.createObjectURL(item.audio);
|
||||
audio.play();
|
||||
audio.onended = () => {
|
||||
playing = false;
|
||||
};
|
||||
queue.splice(queue.indexOf(item), 1); // Remove item from queue after playing
|
||||
return; // Exit after playing one item
|
||||
}
|
||||
}
|
||||
}, 100);
|
||||
|
||||
export function queueSpeech(text: string) {
|
||||
// const wav = await tts.predict({
|
||||
// text,
|
||||
// voiceId: VOICE,
|
||||
// });
|
||||
|
||||
// const audio = new Audio();
|
||||
// audio.src = URL.createObjectURL(wav);
|
||||
// audio.play();
|
||||
if (queue.some(item => item.text === text)) {
|
||||
// console.warn("Text already in queue, not adding again:", text);
|
||||
return;
|
||||
}
|
||||
console.log("Queuing text for speech:", text);
|
||||
queue.push({
|
||||
text,
|
||||
shouldPlay: false
|
||||
});
|
||||
}
|
||||
|
||||
export function speak(text: string) {
|
||||
const existingItem = queue.find(item => item.text === text);
|
||||
if (existingItem) {
|
||||
existingItem.shouldPlay = true;
|
||||
} else {
|
||||
console.warn("Adding new item to play immediately. Consider queuing instead!");
|
||||
queue.push({
|
||||
text,
|
||||
shouldPlay: true
|
||||
});
|
||||
}
|
||||
}
|
||||
46
src/lib/services/navigation/TTS.ts
Normal file
46
src/lib/services/navigation/TTS.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import type { TextToSpeechPlugin } from "@capacitor-community/text-to-speech";
|
||||
import { Capacitor } from "@capacitor/core";
|
||||
import Duck from "../DuckPlugin";
|
||||
|
||||
export let tts: TextToSpeechPlugin | "web" | null = null;
|
||||
|
||||
export async function initTTS() {
|
||||
if(Capacitor.isNativePlatform()) {
|
||||
console.log("Using Capacitor TTS");
|
||||
tts = (await import("@capacitor-community/text-to-speech")).TextToSpeech;
|
||||
} else {
|
||||
console.log("Using Web TTS");
|
||||
tts = "web";
|
||||
}
|
||||
}
|
||||
|
||||
export default async function say(text: string) {
|
||||
if(!tts) {
|
||||
// alert("TTS not initialized");
|
||||
// console.error("TTS not initialized");
|
||||
await initTTS();
|
||||
// return;
|
||||
}
|
||||
console.log("A");
|
||||
Duck.duck();
|
||||
console.log("B");
|
||||
if(tts !== "web") {
|
||||
try {
|
||||
await tts?.speak({
|
||||
text: text,
|
||||
lang: "deu-default", // TODO: make this configurable
|
||||
});
|
||||
console.log("C");
|
||||
} catch (e) {
|
||||
console.error("Error speaking text", e);
|
||||
alert(e);
|
||||
}
|
||||
} else {
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.lang = "de-DE";
|
||||
window.speechSynthesis.speak(utterance);
|
||||
}
|
||||
console.log("D");
|
||||
Duck.unduck();
|
||||
console.log("E");
|
||||
}
|
||||
16
src/lib/services/navigation/TTSWorker.ts
Normal file
16
src/lib/services/navigation/TTSWorker.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import * as tts from '@diffusionstudio/vits-web';
|
||||
|
||||
async function main(event: MessageEvent<tts.InferenceConfg & { type: 'init' }>) {
|
||||
if (event.data?.type != 'init') return;
|
||||
|
||||
const start = performance.now();
|
||||
const blob = await tts.predict({
|
||||
text: event.data.text,
|
||||
voiceId: event.data.voiceId,
|
||||
});
|
||||
console.log('Time taken:', performance.now() - start + ' ms');
|
||||
|
||||
self.postMessage({ type: 'result', audio: blob, text: event.data.text });
|
||||
}
|
||||
|
||||
self.addEventListener('message', main);
|
||||
@@ -1,5 +1,6 @@
|
||||
import { location } from "$lib/components/lnv/location.svelte";
|
||||
import { map } from "$lib/components/lnv/map.svelte";
|
||||
import say from "./TTS";
|
||||
import type { ValhallaRequest } from "./ValhallaRequest";
|
||||
import type { LngLatBoundsLike } from "maplibre-gl";
|
||||
|
||||
@@ -156,18 +157,21 @@ async function tickRoute() {
|
||||
}
|
||||
|
||||
const bgi = currentManeuver.begin_shape_index;
|
||||
const location = getUserLocation();
|
||||
const loc = {
|
||||
lat: location.lat,
|
||||
lon: location.lng,
|
||||
};
|
||||
const polyline = decodePolyline(trip.legs[0].shape);
|
||||
|
||||
// Check if the user location is on the last point of the entire route
|
||||
if (isOnPoint(location, polyline[polyline.length - 1])) {
|
||||
if (isOnPoint(loc, polyline[polyline.length - 1])) {
|
||||
console.log("Reached destination!");
|
||||
stopNavigation();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if the user is on the route
|
||||
if (!isOnShape(location, polyline)) {
|
||||
if (!isOnShape(loc, polyline)) {
|
||||
console.log("Off route!");
|
||||
info.isOffRoute = true;
|
||||
// TODO: Implement re-routing logic
|
||||
@@ -180,20 +184,25 @@ async function tickRoute() {
|
||||
currentManeuver.verbal_pre_transition_instruction &&
|
||||
!hasAnnouncedPreInstruction
|
||||
) {
|
||||
const distanceToEnd = calculateDistance(location, polyline[bgi]);
|
||||
const distanceToEnd = calculateDistance(loc, polyline[bgi]);
|
||||
// console.log("Distance to end of current maneuver: ", distanceToEnd, " meters");
|
||||
if (distanceToEnd <= 100) {
|
||||
console.log("Speed: ", location.speed, " km/h");
|
||||
const verbalDistance = verbalPreInstructionDistance(
|
||||
location.speed || 50, // Assuming location has a speed property
|
||||
);
|
||||
if (distanceToEnd <= verbalDistance) {
|
||||
hasAnnouncedPreInstruction = true;
|
||||
console.log(
|
||||
"[Verbal instruction] ",
|
||||
currentManeuver.verbal_pre_transition_instruction,
|
||||
);
|
||||
say(currentManeuver.verbal_pre_transition_instruction);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the user is past the current maneuver
|
||||
// Checks if the user is still on the current maneuver's polyline
|
||||
if (!isOnShape(location, polyline.slice(bgi))) {
|
||||
if (!isOnShape(loc, polyline.slice(bgi))) {
|
||||
return; // User is not on the current maneuver's polyline, do not update
|
||||
}
|
||||
|
||||
@@ -208,7 +217,7 @@ async function tickRoute() {
|
||||
if (currentManeuver.verbal_post_transition_instruction) {
|
||||
hasAnnouncedPreInstruction = false;
|
||||
const distanceToEnd = calculateDistance(
|
||||
location,
|
||||
loc,
|
||||
polyline[
|
||||
trip.legs[0].maneuvers[routing.currentTripInfo.maneuverIdx + 1]
|
||||
.begin_shape_index
|
||||
@@ -219,6 +228,7 @@ async function tickRoute() {
|
||||
"[Verbal instruction] ",
|
||||
currentManeuver.verbal_post_transition_instruction,
|
||||
);
|
||||
say(currentManeuver.verbal_post_transition_instruction);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -232,9 +242,16 @@ async function tickRoute() {
|
||||
|
||||
info.currentManeuver = trip.legs[0].maneuvers[info.maneuverIdx];
|
||||
|
||||
// queueSpeech(info.currentManeuver.verbal_pre_transition_instruction || "");
|
||||
// queueSpeech(info.currentManeuver.verbal_post_transition_instruction || "");
|
||||
|
||||
// TODO: verbal instructions
|
||||
}
|
||||
|
||||
function verbalPreInstructionDistance(speed: number): number {
|
||||
return (speed * 2.222) + 37.144;
|
||||
}
|
||||
|
||||
export function stopNavigation() {
|
||||
if (routing.currentTripInfo.int) {
|
||||
clearInterval(routing.currentTripInfo.int);
|
||||
@@ -245,20 +262,20 @@ export function stopNavigation() {
|
||||
removeAllRoutes();
|
||||
}
|
||||
|
||||
function getUserLocation(): WorldLocation {
|
||||
// return geolocate.currentLocation!;
|
||||
return {
|
||||
lat: location.lat,
|
||||
lon: location.lng,
|
||||
};
|
||||
// const lnglat = window.geolocate._userLocationDotMarker.getLngLat();
|
||||
// return { lat: lnglat.lat, lon: lnglat.lng };
|
||||
// console.log(map.value!)
|
||||
// return {
|
||||
// lat: 0,
|
||||
// lon: 0
|
||||
// }
|
||||
}
|
||||
// function getUserLocation(): WorldLocation {
|
||||
// // return geolocate.currentLocation!;
|
||||
// return {
|
||||
// lat: location.lat,
|
||||
// lon: location.lng,
|
||||
// };
|
||||
// // const lnglat = window.geolocate._userLocationDotMarker.getLngLat();
|
||||
// // return { lat: lnglat.lat, lon: lnglat.lng };
|
||||
// // console.log(map.value!)
|
||||
// // return {
|
||||
// // lat: 0,
|
||||
// // lon: 0
|
||||
// // }
|
||||
// }
|
||||
|
||||
function isOnLine(
|
||||
location: WorldLocation,
|
||||
|
||||
Reference in New Issue
Block a user