scale-agentex-python/.github/workflows/agentex-tutorials-test.yml at main · scaleapi/scale-agentex-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
name: Test Tutorial Agents

on:
  pull_request:
    branches: [main]
  push:
    branches: [main]
  workflow_dispatch:

jobs:
  find-tutorials:
    runs-on: ubuntu-latest
    outputs:
      tutorials: ${{ steps.get-tutorials.outputs.tutorials }}
    steps:
      - name: Checkout agentex-python repo
        uses: actions/checkout@v4

      - name: Find all tutorials
        id: get-tutorials
        run: |
          cd examples/tutorials
          # Find all tutorials with a manifest.yaml
          all_tutorials=$(find . -name "manifest.yaml" -exec dirname {} \; | sort | sed 's|^\./||')

          # Convert to JSON array
          tutorials=$(echo "$all_tutorials" | jq -R -s -c 'split("\n") | map(select(length > 0))')

          echo "tutorials=$tutorials" >> $GITHUB_OUTPUT
          echo "All tutorials found: $(echo "$all_tutorials" | wc -l)"
          echo "Final tutorial list: $tutorials"

  test-tutorial:
    needs: find-tutorials
    runs-on: ubuntu-latest
    timeout-minutes: 15
    strategy:
      matrix:
        tutorial: ${{ fromJson(needs.find-tutorials.outputs.tutorials) }}
      fail-fast: false
    name: test-${{ matrix.tutorial }}

    steps:
      - name: Checkout agentex-python repo
        uses: actions/checkout@v4

      - name: Install UV
        run: |
          curl -LsSf https://astral.sh/uv/install.sh | sh
          echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Pull latest AgentEx image
        run: |
          echo "🐳 Pulling latest Scale AgentEx Docker image..."
          max_attempts=3
          attempt=1
          while [ $attempt -le $max_attempts ]; do
            echo "Attempt $attempt of $max_attempts..."
            if docker pull ghcr.io/scaleapi/scale-agentex/agentex:latest; then
              echo "✅ Successfully pulled AgentEx Docker image"
              exit 0
            fi
            echo "❌ Pull failed, waiting before retry..."
            sleep $((attempt * 10))
            attempt=$((attempt + 1))
          done
          echo "❌ Failed to pull image after $max_attempts attempts"
          exit 1

      - name: Checkout scale-agentex repo
        uses: actions/checkout@v4
        with:
          repository: scaleapi/scale-agentex
          path: scale-agentex

      - name: Configure Docker Compose for pulled image and host networking
        run: |
          cd scale-agentex/agentex
          echo "🔧 Configuring AgentEx container to use pulled image and host networking..."

          # Install yq for YAML manipulation
          sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
          sudo chmod +x /usr/local/bin/yq

          # Override to use pulled image instead of building
          yq eval '.services.agentex.image = "ghcr.io/scaleapi/scale-agentex/agentex:latest"' -i docker-compose.yml
          yq eval 'del(.services.agentex.build)' -i docker-compose.yml

          # Add extra_hosts to agentex service to make host.docker.internal work
          yq eval '.services.agentex.extra_hosts = ["host.docker.internal:host-gateway"]' -i docker-compose.yml

          echo "✅ Configured docker-compose to use pulled image with host access"

      - name: Start AgentEx Server
        run: |
          cd scale-agentex/agentex
          echo "🚀 Starting AgentEx server and dependencies..."

          # Start all services
          docker compose up -d

          echo "⏳ Waiting for dependencies to be healthy..."

          # Wait for services to be healthy
          for i in {1..30}; do
            if docker compose ps | grep -q "healthy"; then
              echo "✅ Dependencies are healthy"
              break
            fi
            echo "  Attempt $i/30: Waiting for services..."
            sleep 5
          done

          # Wait specifically for AgentEx server to be ready
          echo "⏳ Waiting for AgentEx server to be ready..."
          for i in {1..30}; do
            if curl -s --max-time 5 http://localhost:5003/health >/dev/null 2>&1; then
              echo "✅ AgentEx server is ready"
              break
            fi
            echo "  Attempt $i/30: Waiting for AgentEx server..."
            sleep 5
          done

      - name: Build AgentEx SDK
        run: |
          echo "🔨 Building AgentEx SDK wheel..."
          uv build
          echo "✅ SDK built successfully"
          ls -la dist/

      - name: Test Tutorial
        id: run-test
        working-directory: ./examples/tutorials
        env:
          OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
          HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
        run: |
          echo "Testing tutorial: ${{ matrix.tutorial }}"
          AGENTEX_API_BASE_URL="http://localhost:5003" \
            ./run_agent_test.sh --build-cli "${{ matrix.tutorial }}"

      - name: Print agent logs on failure
        if: failure()
        working-directory: ./examples/tutorials
        run: |
          echo "🚨 Test failed for tutorial: ${{ matrix.tutorial }}"
          echo "📋 Printing agent logs..."

          # Look for agent log files in the tutorial directory
          if find "${{ matrix.tutorial }}" -name "*.log" -type f 2>/dev/null | grep -q .; then
            echo "Found agent log files:"
            find "${{ matrix.tutorial }}" -name "*.log" -type f -exec echo "=== {} ===" \; -exec cat {} \;
          else
            echo "No .log files found, checking for other common log locations..."
          fi

          # Check for any output files or dumps
          if find "${{ matrix.tutorial }}" -name "agent_output*" -o -name "debug*" -o -name "*.out" 2>/dev/null | grep -q .; then
            echo "Found other output files:"
            find "${{ matrix.tutorial }}" -name "agent_output*" -o -name "debug*" -o -name "*.out" -exec echo "=== {} ===" \; -exec cat {} \;
          fi

          # Print the last 50 lines of any python processes that might still be running
          echo "🔍 Checking for running python processes..."
          ps aux | grep python || echo "No python processes found"

      - name: Record test result
        id: test-result
        if: always()
        run: |
          # Create results directory
          mkdir -p test-results

          # Determine result
          if [ "${{ steps.run-test.outcome }}" == "success" ]; then
            result="passed"
            echo "result=passed" >> $GITHUB_OUTPUT
            echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
          else
            result="failed"
            echo "result=failed" >> $GITHUB_OUTPUT
            echo "tutorial=${{ matrix.tutorial }}" >> $GITHUB_OUTPUT
          fi

          # Save result to file for artifact upload
          # Create a safe filename from tutorial path
          safe_name=$(echo "${{ matrix.tutorial }}" | tr '/' '_' | tr -d ' ')
          echo "$result" > "test-results/result-${safe_name}.txt"
          echo "${{ matrix.tutorial }}" > "test-results/tutorial-${safe_name}.txt"
          echo "safe_name=${safe_name}" >> $GITHUB_OUTPUT

      - name: Upload test result
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-result-${{ steps.test-result.outputs.safe_name }}
          path: test-results/
          retention-days: 1

  test-summary:
    if: always()
    needs: [find-tutorials, test-tutorial]
    runs-on: ubuntu-latest
    name: Test Summary
    steps:
      - name: Download all test results
        uses: actions/download-artifact@v4
        with:
          pattern: test-result-*
          path: all-results/
          merge-multiple: true
        continue-on-error: true

      - name: Generate Test Summary
        run: |
          echo "# 🧪 Tutorial Tests Summary" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY

          # Initialize counters
          passed_count=0
          failed_count=0
          skipped_count=0
          total_count=0

          # Get all tutorials that were supposed to run
          tutorials='${{ needs.find-tutorials.outputs.tutorials }}'

          if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
            echo "📊 Processing individual test results from artifacts..."

            echo "## Test Results" >> $GITHUB_STEP_SUMMARY
            echo "" >> $GITHUB_STEP_SUMMARY
            echo "| Tutorial | Status | Result |" >> $GITHUB_STEP_SUMMARY
            echo "|----------|--------|--------|" >> $GITHUB_STEP_SUMMARY

            # Process each result file
            for result_file in all-results/result-*.txt; do
              if [ -f "$result_file" ]; then
                # Extract the safe name from filename
                safe_name=$(basename "$result_file" .txt | sed 's/result-//')

                # Get corresponding tutorial name file
                tutorial_file="all-results/tutorial-${safe_name}.txt"

                if [ -f "$tutorial_file" ]; then
                  tutorial_name=$(cat "$tutorial_file")
                  result=$(cat "$result_file")

                  total_count=$((total_count + 1))

                  if [ "$result" = "passed" ]; then
                    echo "| \`$tutorial_name\` | ✅ | Passed |" >> $GITHUB_STEP_SUMMARY
                    passed_count=$((passed_count + 1))
                  else
                    echo "| \`$tutorial_name\` | ❌ | Failed |" >> $GITHUB_STEP_SUMMARY
                    failed_count=$((failed_count + 1))
                  fi
                fi
              fi
            done

            # Check for any tutorials that didn't have results (skipped/cancelled)
            echo "$tutorials" | jq -r '.[]' | while read expected_tutorial; do
              safe_expected=$(echo "$expected_tutorial" | tr '/' '_' | tr -d ' ')
              if [ ! -f "all-results/result-${safe_expected}.txt" ]; then
                echo "| \`$expected_tutorial\` | ⏭️ | Skipped/Cancelled |" >> $GITHUB_STEP_SUMMARY
                skipped_count=$((skipped_count + 1))
                total_count=$((total_count + 1))
              fi
            done

          else
            echo "⚠️ No individual test results found. This could mean:"
            echo "- Test jobs were cancelled before completion"
            echo "- Artifacts failed to upload"
            echo "- No tutorials were found to test"
            echo ""

            overall_result="${{ needs.test-tutorial.result }}"
            echo "Overall job status: **$overall_result**"

            if [[ "$overall_result" == "success" ]]; then
              echo "✅ All tests appear to have passed based on job status."
            elif [[ "$overall_result" == "failure" ]]; then
              echo "❌ Some tests appear to have failed based on job status."
              echo ""
              echo "💡 **Tip:** Check individual job logs for specific failure details."
            elif [[ "$overall_result" == "cancelled" ]]; then
              echo "⏭️ Tests were cancelled."
            else
              echo "❓ Test status is unclear: $overall_result"
            fi

            # Don't show detailed breakdown when we don't have individual results
            tutorial_count=$(echo "$tutorials" | jq -r '. | length')
            echo ""
            echo "Expected tutorial count: $tutorial_count"
          fi

          # Only show detailed statistics if we have individual results
          if [ -d "all-results" ] && [ "$(ls -A all-results 2>/dev/null)" ]; then
            echo "" >> $GITHUB_STEP_SUMMARY
            echo "## Summary Statistics" >> $GITHUB_STEP_SUMMARY
            echo "" >> $GITHUB_STEP_SUMMARY
            echo "- **Total Tests:** $total_count" >> $GITHUB_STEP_SUMMARY
            echo "- **Passed:** $passed_count ✅" >> $GITHUB_STEP_SUMMARY
            echo "- **Failed:** $failed_count ❌" >> $GITHUB_STEP_SUMMARY
            echo "- **Skipped:** $skipped_count ⏭️" >> $GITHUB_STEP_SUMMARY
            echo "" >> $GITHUB_STEP_SUMMARY

            if [ $failed_count -eq 0 ] && [ $passed_count -gt 0 ]; then
              echo "🎉 **All tests passed!**" >> $GITHUB_STEP_SUMMARY
            elif [ $failed_count -gt 0 ]; then
              echo "⚠️ **Some tests failed.** Check individual job logs for details." >> $GITHUB_STEP_SUMMARY
              echo "" >> $GITHUB_STEP_SUMMARY
              echo "💡 **Tip:** Look for the 'Print agent logs on failure' step in failed jobs for debugging information." >> $GITHUB_STEP_SUMMARY
            else
              echo "ℹ️ **Tests were cancelled or skipped.**" >> $GITHUB_STEP_SUMMARY
            fi
          fi

      - name: Fail if tests failed
        if: ${{ needs.test-tutorial.result != 'success' }}
        run: |
          echo "❌ Test jobs did not succeed. Result: ${{ needs.test-tutorial.result }}"
          exit 1