forked from dotnet/ai-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReportingExamples.Example02_SamplingAndEvaluatingMultipleResponses.cs
More file actions
60 lines (51 loc) · 3.16 KB
/
ReportingExamples.Example02_SamplingAndEvaluatingMultipleResponses.cs
File metadata and controls
60 lines (51 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.Extensions.AI.Evaluation;
using Microsoft.Extensions.AI.Evaluation.Reporting;
namespace Reporting;
public partial class ReportingExamples
{
private static IEnumerable<object[]> Iterations
{
get
{
for (int i = 1; i <= 3; ++i)
{
yield return new object[] { i.ToString() };
}
}
}
[TestMethod]
[DynamicData(nameof(Iterations))]
public async Task Example02_SamplingAndEvaluatingMultipleResponses(string iterationName)
{
/// This example is similar to the previous one in
/// <see cref="Example01_SamplingAndEvaluatingSingleResponse"/> with the only difference being that we sample
/// and evaluate multiple responses in this example (instead of just one).
/// Use <see cref="s_defaultReportingConfiguration"/> to create a <see cref="ScenarioRun"/> with
/// <see cref="ScenarioRun.ScenarioName"/> set to the fully qualified name of the current test method and
/// see <see cref="ScenarioRun.IterationName"/> set to the number representing the current iteration.
await using ScenarioRun scenarioRun =
await s_defaultReportingConfiguration.CreateScenarioRunAsync(this.ScenarioName, iterationName);
/// Get an LLM response to be evaluated for the current iteration. As previously explained in the comments in
/// <see cref="Example01_SamplingAndEvaluatingSingleResponse"/>, the response to be evaluated will be fetched
/// directly from the LLM in the very first run of each individual iteration, and from the (disk-based)
/// response cache in every subsequent run of the same iteration until the cached entries expire (in 14 days
/// by default).
var (messages, modelResponse) = await GetAstronomyConversationAsync(
chatClient: scenarioRun.ChatConfiguration!.ChatClient,
astronomyQuestion: "How far is the planet Jupiter from the Earth at its closest and furthest points?");
/// Run the evaluators configured in <see cref="s_defaultReportingConfiguration"/> against the response. Again,
/// the evaluation will be performed using the the LLM in the very first run of each individual iteration, and
/// fetched from the (disk-based) response cache in every subsequent run of the same iteration until the cached
/// entries expire (in 14 days by default).
EvaluationResult result = await scenarioRun.EvaluateAsync(messages, modelResponse);
/// Run some basic validation on the evaluation result.
Validate(result);
/// At this point, the <see cref="scenarioRun"/> object will be disposed and the evaluation result for the
/// current iteration above will be stored to the (disk-based) result store. You can inspect how the result for
/// each iteration is stored by navigating to the directory that you specified via the
/// 'EVAL_SAMPLE_STORAGE_ROOT_PATH' environment variable.
}
}