EchoTrio
 
Loading...
Searching...
No Matches
Actor.cs
Go to the documentation of this file.
1using ElevenLabs;
2using ElevenLabs.TextToSpeech;
3using Newtonsoft.Json;
4using Newtonsoft.Json.Linq;
5using OpenAI;
6using OpenAI.Responses;
7using System;
8using System.Collections.Generic;
9using System.Threading;
10using System.Threading.Tasks;
11using UnityEngine;
12
13namespace EchoTrio {
14 /// The actors are the OpenAI Response model which chats with the user.
15 public class Actor {
16 [System.Serializable]
17 public class Response {
18 public string message = null;
19 public Emotion emotion = Emotion.Neutral;
20 public AudioClip audioClip = null;
21 public List<string> reasonings = new List<string>();
22 }
23
24 private class OpenAISettings {
25 public OpenAI.Models.Model model = OpenAI.Models.Model.GPT5;
26 public ReasoningEffort reasoningEffort = ReasoningEffort.Low; // Must be at least ReasoningEffort.Low in order to use file search or web search.
27 public List<string> include = new List<string>();
28 public List<OpenAI.Tool> tools = new List<Tool>();
29
30 public OpenAISettings(ActorConfig config) {
31 // Choose the minimum reasoning effort that is required to run the enabled features.
32 reasoningEffort = config.IsAnyFeatureEnabled(ActorConfig.Feature.WebSearch | ActorConfig.Feature.FileSearch | ActorConfig.Feature.Reasoning) ? ReasoningEffort.Low : ReasoningEffort.Minimal;
33
34 // TODO: Change from WebSearchPreviewTool to WebSearchTool once the com.openai.unity library supports it.
35 if (config.AreAllFeaturesEnabled(ActorConfig.Feature.WebSearch)) {
36 tools.Add(new WebSearchPreviewTool(SearchContextSize.Low)); // User Location: Optional Free Text City, ISO 3166-1 Country Code, Free Text State/Region, IANA Time Zone
37 include.Add("web_search_call.action.sources");
38 }
39
40 if (config.AreAllFeaturesEnabled(ActorConfig.Feature.FileSearch)) {
41 if (config.GetOpenAIVectorStoreID() != string.Empty) {
42 tools.Add(new FileSearchTool(config.GetOpenAIVectorStoreID(), maxNumberOfResults: 2));
43 include.Add("file_search_call.results");
44 } else {
45 Debug.LogWarning($"Actor {config.GetPersona()} has file search enabled but an empty file search vector store ID was provided!");
46 }
47 }
48 }
49 }
50
51 private class ElevenLabsSettings {
52 public ElevenLabs.Models.Model expressionModel = new("eleven_v3");
53 public ElevenLabs.Models.Model fastModel = ElevenLabs.Models.Model.FlashV2_5;
54 public string languageCode = "en"; // ISO 639 Language Code
55 public string voiceId = string.Empty;
56
60 }
61 }
62
63 // OpenAI Internal Variables
64 private OpenAIClient openAIApi = null;
66
67 // ElevenLabs Internal Variables
68 private ElevenLabsClient elevenLabsApi = null;
70
71 // Internal Variables
72 /// The conversation history from this actor's point of view.
73 private List<IResponseItem> conversation = new List<IResponseItem>();
74
75 // Public Properties
76 public string Persona { private set; get; }
77 public bool EnableDebug { get; set; } = false;
78
79 public Actor(ActorConfig config) {
80 // Check for config overrides.
81 config = config.Override();
82 Persona = config.GetPersona().ToString();
83
84 // Initialise OpenAI
85 openAISettings = new OpenAISettings(config);
87
88 openAIApi = new OpenAIClient(Authentication.GetOpenAIAuthentication()) { EnableDebug = this.EnableDebug };
90
91 // Initialise ElevenLabs
93 elevenLabsApi = new ElevenLabsClient(Authentication.GetElevenLabsAuthentication()) { EnableDebug = this.EnableDebug };
94 }
95
96 /// Append a system message to the actor's conversation history.
97 /// <param name="message">The message to append.</param>
98 public void AddSystemMesssage(string message) {
99 conversation.Add(new Message(OpenAI.Role.System, message));
100 }
101
102 /// Append a user message to the actor's conversation history.
103 /// <param name="message">The message to append.</param>
104 public void AddUserMessage(string message) {
105 conversation.Add(new Message(OpenAI.Role.User, message));
106 }
107
108 /// Append an assistant message to the actor's conversation history. This means that we can make the actor think it said something, even if it did not.
109 /// Used for scripted speech, where we force the actor to say something the designer wrote.
110 /// <param name="message">The message to append.</param>
111 /// <param name="emotion">The emotion of the message.</param>
112 /// <param name="cancellationToken">Cancellation token used to cancel any async actions when the program shuts down.</param>
113 /// <returns>The actor's response.</returns>
114 public async Task<Actor.Response> InsertResponse(string message, Emotion emotion, CancellationToken cancellationToken) {
115 // Bit of a hack because assistant messages must now be of type output_text. Had to make my own custom class because the package's creator rejected my pull request.
116 conversation.Add(new Message(OpenAI.Role.Assistant, message));
117 return new Actor.Response() {
118 message = message,
119 emotion = emotion,
120 audioClip = await GetAudioClipAsync(message, cancellationToken)
121 };
122 }
123
124 /// Request the actor to generate a response based on the conversation history.
125 /// <param name="cancellationToken">Cancellation token used to cancel any async actions when the program shuts down.</param>
126 /// <returns>The actor's response.</returns>
127 public async Task<Actor.Response> GetResponse(CancellationToken cancellationToken) {
128 try {
129 // Request a response from OpenAI.
130 CreateResponseRequest request = new CreateResponseRequest(
131 input: conversation,
132 model: openAISettings.model,
133 tools: openAISettings.tools,
134 reasoning: new Reasoning(openAISettings.reasoningEffort, OpenAI.ReasoningSummary.Auto),
135 maxToolCalls: 0 < openAISettings.tools.Count ? openAISettings.tools.Count : null,
136 include: openAISettings.include);
137 OpenAI.Responses.Response response = await openAIApi.ResponsesEndpoint.CreateModelResponseAsync(request, cancellationToken: cancellationToken);
138
139 // Get response from OpenAI.
140 Actor.Response actorResponse = new Actor.Response();
141 for (int i = 0; i < response.Output.Count; ++i) {
142 IResponseItem responseItem = response.Output[i];
143 switch (responseItem) {
144 case OpenAI.Responses.Message message:
145 conversation.Add(message);
146 actorResponse.message = message.ToString();
147 actorResponse.audioClip = await GetAudioClipAsync(message.ToString(), cancellationToken);
148 break;
149 case OpenAI.Responses.ReasoningItem reasoningItem:
150 conversation.Add(reasoningItem);
151 List<string> reasonings = new List<string>();
152 foreach (OpenAI.Responses.ReasoningSummary reasoningSummary in reasoningItem.Summary) {
153 actorResponse.reasonings.Add(reasoningSummary.Text);
154 }
155 break;
156 case OpenAI.Responses.WebSearchToolCall webSearchToolCall:
157 Debug.Log("Actor " + Persona + " Searched Web");
158 break;
159 case OpenAI.Responses.FileSearchToolCall fileSearchToolCall:
160 Debug.Log("Actor " + Persona + " Searched Files");
161 break;
162 case OpenAI.Responses.FunctionToolCall functionToolCall:
163 Debug.Log("Actor " + Persona + " Function Call: " + functionToolCall.Name + ", Arguments: " + functionToolCall.Arguments.ToString());
164
165 // Handle function calls.
166 string output = string.Empty;
167 if (functionToolCall.Name == "set_emotion") {
168 output = ParseEmotion(functionToolCall.Arguments.ToString());
169 actorResponse.emotion = output.ToEmotion();
170 }
171
172 // Return the function call output to the model.
173 conversation.Add(functionToolCall);
174 conversation.Add(new FunctionToolCallOutput(functionToolCall, output));
175 return await GetResponse(cancellationToken);
176 default:
177 Debug.LogWarning("Actor.GetResponse: Unhandled " + responseItem.GetType().Name + " Received");
178 break;
179 }
180 }
181 return actorResponse;
182 } catch (Exception e) {
183 Debug.Log(e);
184 return null;
185 }
186 }
187
188 // Internal Functions
189 private async Task<AudioClip> GetAudioClipAsync(string text, CancellationToken cancellationToken) {
190 try {
191 ElevenLabs.Voices.Voice voice = new ElevenLabs.Voices.Voice(elevenLabsSettings.voiceId, this.Persona);
192 TextToSpeechRequest request = new TextToSpeechRequest(
193 voice, text,
195 languageCode: elevenLabsSettings.languageCode,
196 outputFormat: OutputFormat.PCM_24000); // Output format must be PCM, because that's what the AudioClip convertor in VoiceClip expects.
197 ElevenLabs.VoiceClip voiceClip = await elevenLabsApi.TextToSpeechEndpoint.TextToSpeechAsync(request, cancellationToken);
198 return voiceClip.AudioClip;
199 } catch (Exception e) {
200 Debug.Log(e);
201 }
202 return null;
203 }
204
205 private bool ContainsAudioTags(string text) {
206 return System.Text.RegularExpressions.Regex.IsMatch(text, @"[\[\]]");
207 }
208
209 // Tools
210 /// Create a function following OpenAI's JSON Schema for the actor to select its emotion for the current response.
211 /// OpenAI API on function calling: https://platform.openai.com/docs/guides/function-calling
212 /// <returns>The function's JSON Object.</returns>
213 private OpenAI.Function BuildSetEmotionTool() {
214 List<string> emotions = new List<string>();
215 for (int i = 0; i < (int)Emotion.Num; ++i) {
216 Emotion emotion = (Emotion)i;
217 emotions.Add(emotion.ToString());
218 }
219
220 var args = new {
221 type = "object",
222 properties = new {
223 emotion = new {
224 type = "string",
225 description = "The emotion of your reply.",
226 @enum = emotions.ToArray() // Adding an enum means that the AI can only pick from this set of values. (Well, the AI still sometimes hallucinates invalid values.)
227 }
228 },
229 required = new[] { "emotion" }
230 };
231 string parameters = JsonConvert.SerializeObject(args, Formatting.Indented);
232 return new OpenAI.Function("set_emotion", "Set the emotion of your current reply. The selected emotion is returned.", JToken.Parse(parameters));
233 }
234
235 private string ParseEmotion(string args) {
236 JToken parsedArgs = JToken.Parse(args);
237 if (parsedArgs == null || parsedArgs["emotion"] == null) { return string.Empty; }
238 string emotion = parsedArgs["emotion"].ToString();
239 return emotion;
240 }
241 }
242}
ActorConfig Override()
Overrides any actor config if value is set in ActorOverrides.ini.
Definition: ActorConfig.cs:67
bool IsAnyFeatureEnabled(Feature features)
Definition: ActorConfig.cs:47
bool AreAllFeaturesEnabled(Feature features)
Definition: ActorConfig.cs:45
Persona GetPersona()
Definition: ActorConfig.cs:43
Feature
Flags to enable or disable actor features.
Definition: ActorConfig.cs:18
string GetOpenAIVectorStoreID()
Definition: ActorConfig.cs:51
string GetInstructions()
Definition: ActorConfig.cs:53
string GetElevenLabsVoiceID()
Definition: ActorConfig.cs:49
ElevenLabs.Models.Model expressionModel
Definition: Actor.cs:52
ElevenLabsSettings(ActorConfig config)
Definition: Actor.cs:57
ElevenLabs.Models.Model fastModel
Definition: Actor.cs:53
ReasoningEffort reasoningEffort
Definition: Actor.cs:26
OpenAI.Models.Model model
Definition: Actor.cs:25
List< OpenAI.Tool > tools
Definition: Actor.cs:28
List< string > include
Definition: Actor.cs:27
OpenAISettings(ActorConfig config)
Definition: Actor.cs:30
AudioClip audioClip
Definition: Actor.cs:20
List< string > reasonings
Definition: Actor.cs:21
The actors are the OpenAI Response model which chats with the user.
Definition: Actor.cs:15
OpenAI.Function BuildSetEmotionTool()
Definition: Actor.cs:213
bool ContainsAudioTags(string text)
Definition: Actor.cs:205
void AddSystemMesssage(string message)
Definition: Actor.cs:98
bool EnableDebug
Definition: Actor.cs:77
async Task< AudioClip > GetAudioClipAsync(string text, CancellationToken cancellationToken)
Definition: Actor.cs:189
string ParseEmotion(string args)
Definition: Actor.cs:235
async Task< Actor.Response > InsertResponse(string message, Emotion emotion, CancellationToken cancellationToken)
Definition: Actor.cs:114
ElevenLabsSettings elevenLabsSettings
Definition: Actor.cs:69
ElevenLabsClient elevenLabsApi
Definition: Actor.cs:68
OpenAISettings openAISettings
Definition: Actor.cs:65
void AddUserMessage(string message)
Definition: Actor.cs:104
Actor(ActorConfig config)
Definition: Actor.cs:79
OpenAIClient openAIApi
Definition: Actor.cs:64
List< IResponseItem > conversation
The conversation history from this actor's point of view.
Definition: Actor.cs:73
async Task< Actor.Response > GetResponse(CancellationToken cancellationToken)
Definition: Actor.cs:127
Helper class to load the authentication file and retrieve API keys.
static ElevenLabsAuthentication GetElevenLabsAuthentication()
static OpenAIAuthentication GetOpenAIAuthentication()
Emotion
Definition: Emotion.cs:4
Persona
Personas the actors will role-play.
Definition: Persona.cs:5