Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion packages/api/src/hooks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ import useUIState from './useUIState';
import useUserID from './useUserID';
import useUsername from './useUsername';
import useVoiceSelector from './useVoiceSelector';
import useVoiceActivities from './useVoiceActivities';

export { useBuildRenderActivityCallback } from '@msinternal/botframework-webchat-api-middleware';
export { useSuggestedActionsHooks } from '@msinternal/botframework-webchat-redux-store';
Expand Down Expand Up @@ -148,5 +149,6 @@ export {
useUIState,
useUserID,
useUsername,
useVoiceSelector
useVoiceSelector,
useVoiceActivities
};
31 changes: 31 additions & 0 deletions packages/api/src/hooks/internal/useStateRef.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { useCallback, useRef, useState } from 'react';

import type { Dispatch, MutableRefObject, SetStateAction } from 'react';

export default function useStateRef<T>(
initialValue?: T
): readonly [T, Dispatch<SetStateAction<T>>, MutableRefObject<T>] {
const [_, forceRender] = useState<unknown>();
const valueRef: MutableRefObject<T> = useRef<T>(initialValue);

const setter: Dispatch<SetStateAction<T>> = useCallback(
(value: SetStateAction<T>) => {
const { current } = valueRef;

value = value instanceof Function ? value(current) : value;

if (current !== value) {
valueRef.current = value;

forceRender({});
}
},
[forceRender, valueRef]
);

return Object.freeze([valueRef.current, setter, valueRef]) as readonly [
T,
Dispatch<SetStateAction<T>>,
MutableRefObject<T>
];
}
3 changes: 3 additions & 0 deletions packages/api/src/hooks/useSpeechToSpeech.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import useSpeechToSpeech from '../providers/SpeechToSpeech/useSpeechToSpeech';

export default useSpeechToSpeech;
6 changes: 6 additions & 0 deletions packages/api/src/hooks/useVoiceActivities.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { type WebChatActivity } from 'botframework-webchat-core';
import { useSelector } from './internal/WebChatReduxContext';

export default function useVoiceActivities(): [WebChatActivity[]] {
return [useSelector(({ voiceActivities }) => voiceActivities)];
}
144 changes: 144 additions & 0 deletions packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import React, { useCallback, useEffect, useMemo, useRef, type ReactNode } from 'react';
import { isVoiceActivity, WebChatActivity } from 'botframework-webchat-core';
import { useAudioPlayer } from './private/useAudioPlayer';
import { useRecorder } from './private/useRecorder';
import { useDebouncedNotifications, usePostActivity, useVoiceActivities } from '../../hooks';
import SpeechToSpeechContext from './private/Context';
import { SpeechState } from './types/SpeechState';
import useStateRef from '../../hooks/internal/useStateRef';

export const SpeechToSpeechComposer: React.FC<{ readonly children: ReactNode }> = ({ children }) => {
const [voiceActivities] = useVoiceActivities();
const postActivity = usePostActivity();
const [{ connectivitystatus }] = useDebouncedNotifications();
const lastProcessedIndexRef = useRef(0);
const [speechState, setSpeechState] = useStateRef<SpeechState>('idle');

// config received from server on session init (only once), for now ccv2 and mmrt runs on different sample rate and chunk interval.
// we will read those config, free form object as unsure of what all session config would be needed in future.
const [serverConfig, setServerConfig] = useStateRef<Record<string, unknown> | null>(null);
const { playAudio, stopAudio, isPlaying } = useAudioPlayer(serverConfig);

const isConnected = useMemo(() => connectivitystatus?.message === 'connected', [connectivitystatus]);

const sendAudioChunk = useCallback(
(base64: string, timestamp: string) => {
postActivity({
type: 'event',
name: 'stream.chunk',
value: { voice: { contentUrl: base64, timestamp } }
} as any);
},
[postActivity]
);

const { recording, setRecording: baseSetRecording } = useRecorder(sendAudioChunk, serverConfig);

const handleVoiceActivity = useCallback(
(activity: WebChatActivity) => {
if (!isVoiceActivity(activity)) {
return;
}

const { name, value } = activity;
const { voice } = value;

switch (name) {
// TODO - this will be commandResult activity and not event, need to think on handling of command and commandResult activities.
case 'session.init': {
setServerConfig(value.session?.config as Record<string, unknown>);
break;
}

case 'session.update': {
switch (voice.bot_state) {
case 'voice.request.detected':
stopAudio();
setSpeechState('listening');
break;

case 'voice.request.processing':
setSpeechState('processing');
break;

default:
break;
}
break;
}

case 'stream.chunk': {
if (voice.contentUrl) {
playAudio(voice.contentUrl);
}
break;
}

default:
break;
}
},
[playAudio, setServerConfig, setSpeechState, stopAudio]
);

useEffect(() => {
const startIndex = lastProcessedIndexRef.current;
if (!voiceActivities.length || startIndex >= voiceActivities.length) {
return;
}

for (let i = startIndex; i < voiceActivities.length; i++) {
// eslint-disable-next-line security/detect-object-injection
const activity = voiceActivities[i];

// Skip processing the activity if it's from the user as we want to process only incoming voice activities.
// we may receive (optional) config from server as soon as socket is established
// at that time recording would be off but we still want to process to read config and act on it.
if (
activity.from?.role === 'user' ||
(!recording && isVoiceActivity(activity) && activity.name !== 'session.init')
) {
continue;
}

handleVoiceActivity(activity);
}

if (isPlaying && speechState !== 'bot_speaking') {
setSpeechState('bot_speaking');
} else if (!isPlaying && speechState === 'bot_speaking') {
setSpeechState('listening');
}

lastProcessedIndexRef.current = voiceActivities.length;
}, [handleVoiceActivity, isPlaying, recording, setSpeechState, speechState, voiceActivities]);

const setRecording = useCallback(
async (shouldRecord: boolean) => {
if (!isConnected) {
return;
}

if (shouldRecord) {
setSpeechState('listening');
} else {
stopAudio();
setSpeechState('idle');
}

await baseSetRecording(shouldRecord);
},
[isConnected, baseSetRecording, setSpeechState, stopAudio]
);

const contextValue = useMemo(
() => ({
recording,
setRecording,
speechState
}),
[recording, setRecording, speechState]
);

return <SpeechToSpeechContext.Provider value={contextValue}>{children}</SpeechToSpeechContext.Provider>;
};
14 changes: 14 additions & 0 deletions packages/api/src/providers/SpeechToSpeech/private/Context.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { createContext } from 'react';
import { SpeechState } from '../types/SpeechState';

type SpeechToSpeechContextType = {
recording: boolean;
setRecording: (recording: boolean) => void;
speechState: SpeechState;
};

const SpeechToSpeechContext = createContext<SpeechToSpeechContextType>(undefined!);

export default SpeechToSpeechContext;

export type { SpeechToSpeechContextType };
Loading
Loading