diff --git a/.github/workflows/repo-assist.lock.yml b/.github/workflows/repo-assist.lock.yml index 5e9da92e..bb338ffa 100644 --- a/.github/workflows/repo-assist.lock.yml +++ b/.github/workflows/repo-assist.lock.yml @@ -398,6 +398,57 @@ jobs: persist-credentials: false - name: Create gh-aw temp directory run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh + - name: Start DIFC proxy for pre-agent gh calls + env: + GH_TOKEN: ${{ github.token }} + run: | + PROXY_LOG_DIR=/tmp/gh-aw/proxy-logs + mkdir -p "$PROXY_LOG_DIR" + + POLICY='{"allow-only":{"repos":["github/*"],"min-integrity":"approved"}}' + + docker run -d --name awmg-proxy --network host \ + -e GH_TOKEN \ + -e DEBUG='*' \ + -v "$PROXY_LOG_DIR:$PROXY_LOG_DIR" \ + ghcr.io/github/gh-aw-mcpg:v0.1.23 proxy \ + --policy "$POLICY" \ + --listen 0.0.0.0:18443 \ + --log-dir "$PROXY_LOG_DIR" \ + --tls --tls-dir "$PROXY_LOG_DIR/proxy-tls" \ + --guards-mode filter + + # Wait for TLS cert to be generated + CA_INSTALLED=false + PROXY_READY=false + for i in $(seq 1 30); do + if [ -f "$PROXY_LOG_DIR/proxy-tls/ca.crt" ]; then + # Add proxy CA to system trust store once so gh CLI (Go) trusts it + if [ "$CA_INSTALLED" = "false" ]; then + sudo cp "$PROXY_LOG_DIR/proxy-tls/ca.crt" /usr/local/share/ca-certificates/awmg-proxy.crt + sudo update-ca-certificates + CA_INSTALLED=true + fi + # Health check against proxy's /health endpoint (/api/v3/health after prefix strip) + if curl -sf "https://localhost:18443/api/v3/health" -o /dev/null 2>/dev/null; then + echo "DIFC proxy ready on port 18443" + echo "GH_HOST=localhost:18443" >> "$GITHUB_ENV" + # Add git remote matching proxy host so gh CLI can resolve repo context + git remote add proxy "https://localhost:18443/${GITHUB_REPOSITORY}.git" || true + echo "Git remotes:" + git remote -v + PROXY_READY=true + break + fi + fi + sleep 1 + done + + if [ "$PROXY_READY" = "false" ]; then + echo "::warning::DIFC proxy failed to start, falling back to direct API access" + docker logs awmg-proxy 2>&1 | tail -20 || true + docker rm -f awmg-proxy 2>/dev/null || true + fi - name: Configure gh CLI for GitHub Enterprise run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh env: @@ -405,7 +456,14 @@ jobs: - env: GH_TOKEN: ${{ github.token }} name: Fetch repo data for task weighting - run: "mkdir -p /tmp/gh-aw\n\n# Fetch open issues with labels (up to 500)\ngh issue list --state open --limit 500 --json number,labels > /tmp/gh-aw/issues.json\n\n# Fetch open PRs with titles (up to 200)\ngh pr list --state open --limit 200 --json number,title > /tmp/gh-aw/prs.json\n\n# Compute task weights and select two tasks for this run\npython3 - << 'EOF'\nimport json, random, os\n\nwith open('/tmp/gh-aw/issues.json') as f:\n issues = json.load(f)\nwith open('/tmp/gh-aw/prs.json') as f:\n prs = json.load(f)\n\nopen_issues = len(issues)\nunlabelled = sum(1 for i in issues if not i.get('labels'))\nrepo_assist_prs = sum(1 for p in prs if p['title'].startswith('[Repo Assist]'))\nother_prs = sum(1 for p in prs if not p['title'].startswith('[Repo Assist]'))\n\ntask_names = {\n 1: 'Issue Labelling',\n 2: 'Issue Investigation and Comment',\n 3: 'Issue Investigation and Fix',\n 4: 'Engineering Investments',\n 5: 'Coding Improvements',\n 6: 'Maintain Repo Assist PRs',\n 7: 'Stale PR Nudges',\n 8: 'Performance Improvements',\n 9: 'Testing Improvements',\n 10: 'Take the Repository Forward',\n}\n\nweights = {\n 1: 1 + 3 * unlabelled,\n 2: 3 + 1 * open_issues,\n 3: 3 + 0.7 * open_issues,\n 4: 5 + 0.2 * open_issues,\n 5: 5 + 0.1 * open_issues,\n 6: float(repo_assist_prs),\n 7: 0.1 * other_prs,\n 8: 3 + 0.05 * open_issues,\n 9: 3 + 0.05 * open_issues,\n 10: 3 + 0.05 * open_issues,\n}\n\n# Seed with run ID for reproducibility within a run\nrun_id = int(os.environ.get('GITHUB_RUN_ID', '0'))\nrng = random.Random(run_id)\n\ntask_ids = list(weights.keys())\ntask_weights = [weights[t] for t in task_ids]\n\n# Weighted sample without replacement (pick 2 distinct tasks)\nchosen, seen = [], set()\nfor t in rng.choices(task_ids, weights=task_weights, k=30):\n if t not in seen:\n seen.add(t)\n chosen.append(t)\n if len(chosen) == 2:\n break\n\nprint('=== Repo Assist Task Selection ===')\nprint(f'Open issues : {open_issues}')\nprint(f'Unlabelled issues : {unlabelled}')\nprint(f'Repo Assist PRs : {repo_assist_prs}')\nprint(f'Other open PRs : {other_prs}')\nprint()\nprint('Task weights:')\nfor t, w in weights.items():\n tag = ' <-- SELECTED' if t in chosen else ''\n print(f' Task {t:2d} ({task_names[t]}): weight {w:6.1f}{tag}')\nprint()\nprint(f'Selected tasks for this run: Task {chosen[0]} ({task_names[chosen[0]]}) and Task {chosen[1]} ({task_names[chosen[1]]})')\n\nresult = {\n 'open_issues': open_issues, 'unlabelled_issues': unlabelled,\n 'repo_assist_prs': repo_assist_prs, 'other_prs': other_prs,\n 'task_names': task_names,\n 'weights': {str(k): round(v, 2) for k, v in weights.items()},\n 'selected_tasks': chosen,\n}\nwith open('/tmp/gh-aw/task_selection.json', 'w') as f:\n json.dump(result, f, indent=2)\nEOF\n" + run: "mkdir -p /tmp/gh-aw\n\n# Fetch open issues with labels (up to 500)\n# Fallback to empty array if DIFC proxy filters all data\ngh issue list -R $GITHUB_REPOSITORY --state open --limit 500 --json number,labels > /tmp/gh-aw/issues.json 2>/dev/null || echo '[]' > /tmp/gh-aw/issues.json\n\n# Fetch open PRs with titles (up to 200)\ngh pr list -R $GITHUB_REPOSITORY --state open --limit 200 --json number,title > /tmp/gh-aw/prs.json 2>/dev/null || echo '[]' > /tmp/gh-aw/prs.json\n\n# Compute task weights and select two tasks for this run\npython3 - << 'EOF'\nimport json, random, os\n\nwith open('/tmp/gh-aw/issues.json') as f:\n issues = json.load(f)\nwith open('/tmp/gh-aw/prs.json') as f:\n prs = json.load(f)\n\nopen_issues = len(issues)\nunlabelled = sum(1 for i in issues if not i.get('labels'))\nrepo_assist_prs = sum(1 for p in prs if p['title'].startswith('[Repo Assist]'))\nother_prs = sum(1 for p in prs if not p['title'].startswith('[Repo Assist]'))\n\ntask_names = {\n 1: 'Issue Labelling',\n 2: 'Issue Investigation and Comment',\n 3: 'Issue Investigation and Fix',\n 4: 'Engineering Investments',\n 5: 'Coding Improvements',\n 6: 'Maintain Repo Assist PRs',\n 7: 'Stale PR Nudges',\n 8: 'Performance Improvements',\n 9: 'Testing Improvements',\n 10: 'Take the Repository Forward',\n}\n\nweights = {\n 1: 1 + 3 * unlabelled,\n 2: 3 + 1 * open_issues,\n 3: 3 + 0.7 * open_issues,\n 4: 5 + 0.2 * open_issues,\n 5: 5 + 0.1 * open_issues,\n 6: float(repo_assist_prs),\n 7: 0.1 * other_prs,\n 8: 3 + 0.05 * open_issues,\n 9: 3 + 0.05 * open_issues,\n 10: 3 + 0.05 * open_issues,\n}\n\n# Seed with run ID for reproducibility within a run\nrun_id = int(os.environ.get('GITHUB_RUN_ID', '0'))\nrng = random.Random(run_id)\n\ntask_ids = list(weights.keys())\ntask_weights = [weights[t] for t in task_ids]\n\n# Weighted sample without replacement (pick 2 distinct tasks)\nchosen, seen = [], set()\nfor t in rng.choices(task_ids, weights=task_weights, k=30):\n if t not in seen:\n seen.add(t)\n chosen.append(t)\n if len(chosen) == 2:\n break\n\nprint('=== Repo Assist Task Selection ===')\nprint(f'Open issues : {open_issues}')\nprint(f'Unlabelled issues : {unlabelled}')\nprint(f'Repo Assist PRs : {repo_assist_prs}')\nprint(f'Other open PRs : {other_prs}')\nprint()\nprint('Task weights:')\nfor t, w in weights.items():\n tag = ' <-- SELECTED' if t in chosen else ''\n print(f' Task {t:2d} ({task_names[t]}): weight {w:6.1f}{tag}')\nprint()\nprint(f'Selected tasks for this run: Task {chosen[0]} ({task_names[chosen[0]]}) and Task {chosen[1]} ({task_names[chosen[1]]})')\n\nresult = {\n 'open_issues': open_issues, 'unlabelled_issues': unlabelled,\n 'repo_assist_prs': repo_assist_prs, 'other_prs': other_prs,\n 'task_names': task_names,\n 'weights': {str(k): round(v, 2) for k, v in weights.items()},\n 'selected_tasks': chosen,\n}\nwith open('/tmp/gh-aw/task_selection.json', 'w') as f:\n json.dump(result, f, indent=2)\nEOF\n" + - name: Dump proxy logs (debug) + if: always() + run: | + echo "=== Proxy container logs ===" + docker logs awmg-proxy 2>&1 | tail -80 || true + echo "=== Proxy log file ===" + cat /tmp/gh-aw/proxy-logs/proxy.log 2>/dev/null | tail -50 || true # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) @@ -450,7 +508,7 @@ jobs: - name: Install AWF binary run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.24.3 - name: Download container images - run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.24.3 ghcr.io/github/gh-aw-firewall/api-proxy:0.24.3 ghcr.io/github/gh-aw-firewall/squid:0.24.3 ghcr.io/github/gh-aw-mcpg:v0.1.19 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.24.3 ghcr.io/github/gh-aw-firewall/api-proxy:0.24.3 ghcr.io/github/gh-aw-firewall/squid:0.24.3 ghcr.io/github/gh-aw-mcpg:v0.1.23 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine - name: Write Safe Outputs Config run: | mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs @@ -798,7 +856,7 @@ jobs: export DEBUG="*" export GH_AW_ENGINE="copilot" - export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.19' + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.23' mkdir -p /home/runner/.copilot cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh @@ -815,7 +873,7 @@ jobs: }, "guard-policies": { "allow-only": { - "min-integrity": "merged", + "min-integrity": "approved", "repos": [ "github/*" ] @@ -850,6 +908,13 @@ jobs: with: name: activation path: /tmp/gh-aw + - name: Stop DIFC proxy + run: | + docker rm -f awmg-proxy 2>/dev/null || true + git remote remove proxy 2>/dev/null || true + # Clear GH_HOST so the agent and awf container use direct API access + echo "GH_HOST=" >> "$GITHUB_ENV" + echo "DIFC proxy stopped" - name: Clean git credentials continue-on-error: true run: bash ${RUNNER_TEMP}/gh-aw/actions/clean_git_credentials.sh diff --git a/guards/github-guard/rust-guard/src/labels/helpers.rs b/guards/github-guard/rust-guard/src/labels/helpers.rs index a223d3ad..65f4680f 100644 --- a/guards/github-guard/rust-guard/src/labels/helpers.rs +++ b/guards/github-guard/rust-guard/src/labels/helpers.rs @@ -1034,12 +1034,16 @@ pub fn commit_integrity( /// Trusted bots: /// - dependabot[bot]: GitHub dependency updater /// - github-actions[bot]: GitHub Actions workflow actor (GITHUB_TOKEN) +/// - github-actions: GitHub Actions workflow actor (without [bot] suffix, as returned by some APIs) +/// - app/github-actions: GitHub Actions workflow actor (with app/ prefix, as returned by gh CLI) /// - github-merge-queue[bot]: GitHub merge queue automation /// - copilot: GitHub Copilot AI assistant pub fn is_trusted_first_party_bot(username: &str) -> bool { let lower = username.to_lowercase(); lower == "dependabot[bot]" || lower == "github-actions[bot]" + || lower == "github-actions" + || lower == "app/github-actions" || lower == "github-merge-queue[bot]" || lower == "copilot" } diff --git a/guards/github-guard/rust-guard/src/labels/mod.rs b/guards/github-guard/rust-guard/src/labels/mod.rs index 0e16673a..7db3cbac 100644 --- a/guards/github-guard/rust-guard/src/labels/mod.rs +++ b/guards/github-guard/rust-guard/src/labels/mod.rs @@ -847,7 +847,8 @@ mod tests { // Not bots assert!(!is_trusted_first_party_bot("octocat")); assert!(!is_trusted_first_party_bot("dependabot")); - assert!(!is_trusted_first_party_bot("github-actions")); + assert!(is_trusted_first_party_bot("github-actions")); + assert!(is_trusted_first_party_bot("app/github-actions")); assert!(!is_trusted_first_party_bot("")); } @@ -893,6 +894,16 @@ mod tests { writer_integrity(repo, &ctx) ); + // github-actions without [bot] suffix (as returned by some APIs) + let actions_no_bot_issue = json!({ + "user": {"login": "github-actions"}, + "author_association": "NONE" + }); + assert_eq!( + issue_integrity(&actions_no_bot_issue, repo, false, &ctx), + writer_integrity(repo, &ctx) + ); + // Non-trusted bot still gets none integrity on public repo let renovate_issue = json!({ "user": {"login": "renovate[bot]"}, diff --git a/internal/proxy/graphql.go b/internal/proxy/graphql.go index ea05b150..cbaccd53 100644 --- a/internal/proxy/graphql.go +++ b/internal/proxy/graphql.go @@ -35,6 +35,10 @@ type graphqlPattern struct { // graphqlPatterns is the ordered list of GraphQL operation → tool name mappings. var graphqlPatterns = []graphqlPattern{ + // Schema introspection queries (safe read-only metadata, no repo data) + {queryPattern: regexp.MustCompile(`(?i)__type\s*\(`), toolName: "graphql_introspection"}, + {queryPattern: regexp.MustCompile(`(?i)__schema\b`), toolName: "graphql_introspection"}, + // Issue operations (singular before plural — more specific first) {queryPattern: regexp.MustCompile(`(?i)repository\s*\([^)]*\)\s*\{[^}]*\bissue\s*\(`), toolName: "issue_read"}, {queryPattern: regexp.MustCompile(`(?i)repository\s*\([^)]*\)\s*\{[^}]*\bissues\s*[\({]`), toolName: "list_issues"}, @@ -170,7 +174,9 @@ func extractOwnerRepo(variables map[string]interface{}, query string) (string, s } // IsGraphQLPath returns true if the request path is the GraphQL endpoint. +// Accepts /graphql (after prefix strip), /api/v3/graphql (before strip), +// and /api/graphql (GHES-style path used by gh CLI with GH_HOST). func IsGraphQLPath(path string) bool { cleaned := strings.TrimSuffix(path, "/") - return cleaned == "/graphql" || cleaned == "/api/v3/graphql" + return cleaned == "/graphql" || cleaned == "/api/v3/graphql" || cleaned == "/api/graphql" } diff --git a/internal/proxy/handler.go b/internal/proxy/handler.go index a8519164..e1b9fd78 100644 --- a/internal/proxy/handler.go +++ b/internal/proxy/handler.go @@ -65,7 +65,7 @@ func (h *proxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { match := MatchGraphQL(graphQLBody) if match == nil { // Unknown GraphQL query — fail closed: deny rather than risk leaking unfiltered data - logHandler.Printf("unknown GraphQL query, blocking request") + logHandler.Printf("unknown GraphQL query, blocking request: %s", truncateForLog(string(graphQLBody), 500)) w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusForbidden) json.NewEncoder(w).Encode(map[string]interface{}{ @@ -74,6 +74,20 @@ func (h *proxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { }) return } + // Schema introspection (__type, __schema) is safe metadata — passthrough without DIFC + if match.ToolName == "graphql_introspection" { + logHandler.Printf("GraphQL introspection query, passing through") + clientAuth := r.Header.Get("Authorization") + resp, err := h.server.forwardToGitHub(r.Context(), http.MethodPost, "/graphql", bytes.NewReader(graphQLBody), "application/json", clientAuth) + if err != nil { + http.Error(w, "upstream request failed", http.StatusBadGateway) + return + } + defer resp.Body.Close() + respBody, _ := io.ReadAll(resp.Body) + h.writeResponse(w, resp, respBody) + return + } toolName = match.ToolName args = match.Args } else { @@ -193,6 +207,7 @@ func (h *proxyHandler) handleWithDIFC(w http.ResponseWriter, r *http.Request, pa // **Phase 5: Fine-grained filtering** var finalData interface{} + var useOriginalBody bool // GraphQL responses need original format preserved if labeledData != nil { if collection, ok := labeledData.(*difc.CollectionLabeledData); ok { filtered := s.evaluator.FilterCollection( @@ -220,19 +235,36 @@ func (h *proxyHandler) handleWithDIFC(w http.ResponseWriter, r *http.Request, pa return } - finalData, err = filtered.ToResult() - if err != nil { - logHandler.Printf("[DIFC] Phase 5 ToResult failed: %v", err) + // For GraphQL: if nothing was filtered, return original response body + // to preserve the exact response format (ToResult transforms the structure) + if graphQLBody != nil && filtered.GetFilteredCount() == 0 { + useOriginalBody = true + } else if graphQLBody != nil { + // GraphQL with filtered items: return valid empty GraphQL response + // (ToResult returns an array which breaks gh CLI's GraphQL parser) + logHandler.Printf("[DIFC] GraphQL response: %d/%d items filtered, returning empty GraphQL response", + filtered.GetFilteredCount(), filtered.TotalCount) h.writeEmptyResponse(w, resp, responseData) return + } else { + finalData, err = filtered.ToResult() + if err != nil { + logHandler.Printf("[DIFC] Phase 5 ToResult failed: %v", err) + h.writeEmptyResponse(w, resp, responseData) + return + } } } else { // Simple labeled data — already passed coarse check - finalData, err = labeledData.ToResult() - if err != nil { - logHandler.Printf("[DIFC] Phase 5 ToResult failed: %v", err) - h.writeEmptyResponse(w, resp, responseData) - return + if graphQLBody != nil { + useOriginalBody = true + } else { + finalData, err = labeledData.ToResult() + if err != nil { + logHandler.Printf("[DIFC] Phase 5 ToResult failed: %v", err) + h.writeEmptyResponse(w, resp, responseData) + return + } } } } else { @@ -253,17 +285,21 @@ func (h *proxyHandler) handleWithDIFC(w http.ResponseWriter, r *http.Request, pa } // Write the filtered response - filteredJSON, err := json.Marshal(finalData) - if err != nil { - http.Error(w, "failed to serialize filtered response", http.StatusInternalServerError) - return + if useOriginalBody { + // GraphQL: return original upstream response to preserve exact format + logHandler.Printf("[DIFC] returning original response body (GraphQL, no items filtered)") + h.writeResponse(w, resp, respBody) + } else { + filteredJSON, err := json.Marshal(finalData) + if err != nil { + http.Error(w, "failed to serialize filtered response", http.StatusInternalServerError) + return + } + copyResponseHeaders(w, resp) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(resp.StatusCode) + w.Write(filteredJSON) } - - // Copy response headers - copyResponseHeaders(w, resp) - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(resp.StatusCode) - w.Write(filteredJSON) } // passthrough forwards a request to the upstream GitHub API without DIFC filtering. @@ -339,3 +375,10 @@ func copyResponseHeaders(w http.ResponseWriter, resp *http.Response) { } } } + +func truncateForLog(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen] + "..." +} diff --git a/internal/proxy/proxy_test.go b/internal/proxy/proxy_test.go index c0d297fe..dd224485 100644 --- a/internal/proxy/proxy_test.go +++ b/internal/proxy/proxy_test.go @@ -435,6 +435,16 @@ func TestMatchGraphQL(t *testing.T) { body: `not json`, wantNil: true, }, + { + name: "__type introspection query", + body: `{"query":"query Issue_fields{Issue: __type(name: \"Issue\"){fields(includeDeprecated: true){name}}}"}`, + wantTool: "graphql_introspection", + }, + { + name: "__schema introspection query", + body: `{"query":"query { __schema { types { name } } }"}`, + wantTool: "graphql_introspection", + }, } for _, tt := range tests { @@ -492,6 +502,8 @@ func TestIsGraphQLPath(t *testing.T) { assert.True(t, IsGraphQLPath("/graphql/")) assert.True(t, IsGraphQLPath("/api/v3/graphql")) assert.True(t, IsGraphQLPath("/api/v3/graphql/")) + assert.True(t, IsGraphQLPath("/api/graphql")) + assert.True(t, IsGraphQLPath("/api/graphql/")) assert.False(t, IsGraphQLPath("/repos/org/repo")) assert.False(t, IsGraphQLPath("/user")) }