Skip to content

Commit 27c745e

Browse files
Cristian PopCIPop
authored andcommitted
Stress/perf test automation: devops yaml, fault injection via Windows
Firewall, adding KPI and exception faulting.
1 parent 9b8897c commit 27c745e

27 files changed

+866
-324
lines changed

e2e/stress/IoTClientPerf/Configuration.Stress.cs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
33

44
using System;
5+
using System.Globalization;
56
using System.Security.Cryptography.X509Certificates;
67

78
namespace Microsoft.Azure.Devices.E2ETests
@@ -25,6 +26,36 @@ public static partial class Stress
2526

2627
private static Lazy<X509Certificate2> s_cert = new Lazy<X509Certificate2>(() => { return Configuration.IoTHub.GetCertificateWithPrivateKey(); });
2728

29+
/// <summary>
30+
/// Gets the import export BLOB URI.
31+
/// </summary>
32+
public static string ImportExportBlobUri => GetValue("IOTHUB_IMPORTEXPORT_BLOB_URI");
33+
34+
/// <summary>
35+
/// Gets the connected devices percentage expected by the runner after the test ended.
36+
/// </summary>
37+
public static long? ConnectedDevicesPercentage => ParseNullable(GetValue("IOTHUB_PERF_CONNECTED_PERCENTAGE", ""));
38+
39+
/// <summary>
40+
/// Gets the connected devices percentage expected by the runner after the test ended.
41+
/// </summary>
42+
public static long? TcpConnectionsPercentage => ParseNullable(GetValue("IOTHUB_PERF_TCP_PERCENTAGE", ""));
43+
44+
/// <summary>
45+
/// Gets the requests per second minimum average after the test ended.
46+
/// </summary>
47+
public static long? RequestsPerSecondMinAvg => ParseNullable(GetValue("IOTHUB_PERF_RPS_MIN_AVG", ""));
48+
49+
/// <summary>
50+
/// Gets the requests per second minimum standard deviation after the test ended.
51+
/// </summary>
52+
public static long? RequestsPerSecondMaxStd => ParseNullable(GetValue("IOTHUB_PERF_RPS_MAX_STD", ""));
53+
54+
/// <summary>
55+
/// Gets the requests per second minimum standard deviation after the test ended.
56+
/// </summary>
57+
public static long? GCMemoryBytes => ParseNullable(GetValue("IOTHUB_PERF_GC_MEM_BYTES_MAX", ""));
58+
2859
public static string GetDeviceNameById(int id, string authType)
2960
{
3061
return $"{NamePrefix}_{authType}_{id}";
@@ -43,6 +74,12 @@ public static string GetConnectionStringById(int id, string authType)
4374
public static string Key2 => s_key2.Value;
4475

4576
public static X509Certificate2 Certificate => s_cert.Value;
77+
78+
private static long? ParseNullable(string s)
79+
{
80+
if (long.TryParse(s, out long l)) return l;
81+
return null;
82+
}
4683
}
4784
}
4885
}

e2e/stress/IoTClientPerf/IoTClientPerf.csproj

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,27 @@
22

33
<PropertyGroup>
44
<OutputType>Exe</OutputType>
5-
<TargetFramework>netcoreapp2.1</TargetFramework>
5+
<TargetFramework>netcoreapp2.2</TargetFramework>
66
<RootNamespace>Microsoft.Azure.Devices.E2ETests</RootNamespace>
7+
<RootDir>$(MSBuildProjectDirectory)\..\..\..</RootDir>
8+
<CommonTest>$(RootDir)\common\test</CommonTest>
79
</PropertyGroup>
810

911
<ItemGroup>
10-
<Compile Include="..\..\..\common\test\Configuration.cs" Link="Configuration.cs" />
11-
<Compile Include="..\..\..\common\test\Configuration.IoTHub.cs" Link="Configuration.IoTHub.cs" />
12+
<Compile Include="$(CommonTest)\Configuration.cs" Link="Configuration.cs" />
13+
<Compile Include="$(CommonTest)\Configuration.IoTHub.cs" Link="Configuration.IoTHub.cs" />
1214
</ItemGroup>
1315

14-
<ItemGroup>
15-
<ProjectReference Include="..\..\..\iothub\device\src\Microsoft.Azure.Devices.Client.csproj" />
16-
<ProjectReference Include="..\..\..\iothub\service\src\Microsoft.Azure.Devices.csproj" />
17-
<ProjectReference Include="..\..\..\shared\src\Microsoft.Azure.Devices.Shared.csproj" />
16+
<ItemGroup Condition=" '$(AZURE_IOT_LOCALPACKAGES)' == '' ">
17+
<ProjectReference Include="$(RootDir)\iothub\device\src\Microsoft.Azure.Devices.Client.csproj" />
18+
<ProjectReference Include="$(RootDir)\iothub\service\src\Microsoft.Azure.Devices.csproj" />
19+
<ProjectReference Include="$(RootDir)\shared\src\Microsoft.Azure.Devices.Shared.csproj" />
20+
</ItemGroup>
21+
22+
<ItemGroup Condition=" '$(AZURE_IOT_LOCALPACKAGES)' != '' ">
23+
<PackageReference Include="Microsoft.Azure.Devices" Version="1.*" />
24+
<PackageReference Include="Microsoft.Azure.Devices.Shared" Version="1.*" />
25+
<PackageReference Include="Microsoft.Azure.Devices.Client" Version="1.*" />
1826
</ItemGroup>
1927

2028
</Project>

e2e/stress/IoTClientPerf/ParallelRun.cs

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
using System;
55
using System.Collections.Generic;
66
using System.Diagnostics;
7+
using System.IO;
8+
using System.Runtime.ExceptionServices;
79
using System.Threading;
810
using System.Threading.Tasks;
911

@@ -43,10 +45,15 @@ public ParallelRun(
4345

4446
public async Task RunAsync(bool runOnce, CancellationToken ct)
4547
{
46-
int cursor_left, cursor_top;
47-
cursor_left = Console.CursorLeft;
48-
cursor_top = Console.CursorTop;
48+
int cursor_left = 0, cursor_top = 0;
4949

50+
try
51+
{
52+
cursor_left = Console.CursorLeft;
53+
cursor_top = Console.CursorTop;
54+
}
55+
catch (IOException) { /* Avoid "The handle is invalid" exception in DevOps */ }
56+
5057
int actualParallel = Math.Min(_parallelOperations, _tests.Length);
5158
int currentInstance = 0;
5259

@@ -82,6 +89,14 @@ public async Task RunAsync(bool runOnce, CancellationToken ct)
8289
break;
8390
case TaskStatus.Faulted:
8491
statInterimFaulted++;
92+
foreach (Exception ex in finished.Exception.InnerExceptions)
93+
{
94+
if (ex is ParallelRunFatalException)
95+
{
96+
// Crash the process to simplify analysis. Recover original stack.
97+
((ParallelRunFatalException)ex).ThrowInner();
98+
}
99+
}
85100
break;
86101
case TaskStatus.RanToCompletion:
87102
statInterimCompleted++;
@@ -103,9 +118,13 @@ public async Task RunAsync(bool runOnce, CancellationToken ct)
103118
double statInterimSeconds = statInterimSw.Elapsed.TotalSeconds;
104119
statTotalCompleted += statInterimCompleted;
105120

106-
Console.SetCursorPosition(cursor_left, cursor_top);
107-
cursor_left = Console.CursorLeft;
108-
cursor_top = Console.CursorTop;
121+
try
122+
{
123+
Console.SetCursorPosition(cursor_left, cursor_top);
124+
cursor_left = Console.CursorLeft;
125+
cursor_top = Console.CursorTop;
126+
}
127+
catch (IOException) { /* Avoid "The handle is invalid" exception in DevOps */ }
109128

110129
_updateStatistics(statInterimCompleted, statInterimFaulted, statInterimCancelled, statInterimSeconds);
111130
if (drain) Console.Write("Waiting for tasks to finish...\r");
@@ -142,4 +161,19 @@ public async Task RunAsync(bool runOnce, CancellationToken ct)
142161
}
143162
}
144163
}
164+
165+
public class ParallelRunFatalException : Exception
166+
{
167+
private ExceptionDispatchInfo _exceptionDispatchInfo;
168+
169+
public ParallelRunFatalException(ExceptionDispatchInfo innerExceptionDispatchInfo)
170+
{
171+
_exceptionDispatchInfo = innerExceptionDispatchInfo;
172+
}
173+
174+
public void ThrowInner()
175+
{
176+
_exceptionDispatchInfo.Throw();
177+
}
178+
}
145179
}

e2e/stress/IoTClientPerf/PerfTestRunner.cs

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ public class PerfTestRunner
2525
private readonly int _timeSeconds;
2626
private readonly Func<PerfScenarioConfig, PerfScenario> _scenarioFactory;
2727

28-
private PerfScenario[] _tests;
29-
private Stopwatch _sw = new Stopwatch();
28+
private readonly PerfScenario[] _tests;
29+
private readonly Stopwatch _sw = new Stopwatch();
3030

3131
public PerfTestRunner(
3232
ResultWriter writer,
@@ -80,8 +80,9 @@ private void FilterTcpStatistics()
8080
}
8181
}
8282

83-
public async Task RunTestAsync()
83+
public async Task<int> RunTestAsync()
8484
{
85+
int ret = 0;
8586
_sw.Restart();
8687

8788
try
@@ -92,14 +93,16 @@ public async Task RunTestAsync()
9293
catch (OperationCanceledException)
9394
{
9495
Console.WriteLine($"Setup FAILED (timeout:{_sw.Elapsed})");
96+
ret = 1;
97+
return ret;
9598
}
9699

97100
_sw.Restart();
98101
Console.WriteLine();
99-
102+
100103
try
101104
{
102-
await LoopAsync().ConfigureAwait(false);
105+
ret = await LoopAsync().ConfigureAwait(false);
103106
}
104107
catch (OperationCanceledException)
105108
{
@@ -111,16 +114,22 @@ public async Task RunTestAsync()
111114

112115
await TeardownAllAsync().ConfigureAwait(false);
113116
Console.WriteLine("Done. ");
117+
118+
return ret;
114119
}
115120

116-
private async Task LoopAsync()
121+
private async Task<int> LoopAsync()
117122
{
118123
using (var cts = new CancellationTokenSource(TimeSpan.FromSeconds(_timeSeconds)))
119124
{
120125
ulong statTotalCompleted = 0;
121126
ulong statTotalFaulted = 0;
122127
ulong statTotalCancelled = 0;
123128
double statTotalSeconds = 0.0;
129+
int cpuLoad = 0;
130+
long memoryBytes = 0, gcBytes = 0, tcpConn = 0, devConn = 0;
131+
double avgRps = 0.0, stdDevRps = 0.0;
132+
124133
List<double> statRps = new List<double>();
125134

126135
var runner = new ParallelRun(
@@ -145,23 +154,61 @@ private async Task LoopAsync()
145154
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;
146155
double totalTransferPerSec = totalRequestsPerSec * _messageSizeBytes;
147156

148-
(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
157+
(avgRps, stdDevRps) = CalculateAvgAndStDev(statRps);
149158
double avgBps = avgRps * _messageSizeBytes;
150159
double stdDevBps = stdDevRps * _messageSizeBytes;
151-
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
152-
160+
SystemMetrics.GetMetrics(out cpuLoad, out memoryBytes, out gcBytes, out tcpConn, out devConn);
161+
153162
Console.WriteLine($"[{_sw.Elapsed}] Loop Statistics:");
154163
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
155-
Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(avgRps)}/s ");
164+
Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(stdDevBps)}/s ");
156165
Console.WriteLine($"Connected : {devConn,10:N0} ");
157-
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
166+
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
158167
Console.WriteLine("----");
159168
Console.WriteLine($"TOTALs: ");
160169
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
161170
Console.WriteLine($"Data : {GetHumanReadableBytes(statTotalCompleted * (ulong)_messageSizeBytes)} ");
162171
});
163172

164173
await runner.RunAsync(runOnce: false, ct: cts.Token).ConfigureAwait(false);
174+
175+
Console.WriteLine();
176+
int ret = 0;
177+
float? expectedDeviceConn = (float)_n * Configuration.Stress.ConnectedDevicesPercentage / 100;
178+
float? expectedTcpConn = (float)_poolSize * Configuration.Stress.TcpConnectionsPercentage / 100;
179+
180+
if (expectedDeviceConn.HasValue && (devConn < expectedDeviceConn))
181+
{
182+
Console.Error.WriteLine($"FAILED KPI: Connected Devices. Expected: >{expectedDeviceConn}; Actual: {devConn}.");
183+
ret = 1;
184+
}
185+
186+
if (expectedTcpConn.HasValue && (tcpConn != expectedTcpConn)) // Ensure all are connected and no connection leaks exist.
187+
{
188+
Console.Error.WriteLine($"FAILED KPI: TCP Connections. Expected: ={expectedTcpConn}; Actual: {tcpConn}.");
189+
ret = 2;
190+
}
191+
192+
if (Configuration.Stress.RequestsPerSecondMinAvg.HasValue && (avgRps < Configuration.Stress.RequestsPerSecondMinAvg))
193+
{
194+
Console.Error.WriteLine($"FAILED KPI: RPS Average. Expected: >{Configuration.Stress.RequestsPerSecondMinAvg}; Actual: {avgRps}.");
195+
ret = 3;
196+
}
197+
198+
if (Configuration.Stress.RequestsPerSecondMaxStd.HasValue && (stdDevRps > Configuration.Stress.RequestsPerSecondMaxStd))
199+
{
200+
Console.Error.WriteLine($"FAILED KPI: RPS StdDev. Expected: <{Configuration.Stress.RequestsPerSecondMaxStd}; Actual: {stdDevRps}.");
201+
ret = 4;
202+
}
203+
204+
if (Configuration.Stress.GCMemoryBytes.HasValue && (gcBytes > Configuration.Stress.GCMemoryBytes))
205+
{
206+
Console.Error.WriteLine($"FAILED KPI: GC Memory. Expected: <{GetHumanReadableBytes(Configuration.Stress.GCMemoryBytes.Value)}; Actual: {GetHumanReadableBytes(gcBytes)}.");
207+
ret = 5;
208+
}
209+
210+
if (ret != 0) Console.WriteLine("^^^^^^^^^^^^^^^^^^^\n");
211+
return ret;
165212
}
166213
}
167214

@@ -211,12 +258,12 @@ private async Task SetupAllAsync()
211258
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;
212259

213260
(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
214-
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
261+
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
215262

216263
Console.WriteLine($"[{_sw.Elapsed}] Setup Statistics:");
217264
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
218265
Console.WriteLine($"Connected : {devConn,10:N0} ");
219-
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
266+
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
220267
Console.WriteLine("----");
221268
Console.WriteLine($"TOTALs: ");
222269
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
@@ -257,13 +304,13 @@ private async Task TeardownAllAsync()
257304
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;
258305

259306
(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
260-
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
307+
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
261308

262309

263310
Console.WriteLine($"[{_sw.Elapsed}] Teardown Statistics:");
264311
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
265312
Console.WriteLine($"Connected : {devConn,10:N0} ");
266-
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
313+
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
267314
Console.WriteLine("----");
268315
Console.WriteLine($"TOTALs: ");
269316
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");

0 commit comments

Comments
 (0)