-
Notifications
You must be signed in to change notification settings - Fork 9
Implement URL normalization function and enhance SSRF detection #252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
de1fa00
d42f497
7bb57a4
6d5e7f4
d3f66d0
53fb8c7
976677b
f0bb7cc
a7eb170
ed1eed5
e13b062
95cfe5f
70d5d3d
0ef59f9
9e0a174
04c34e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,21 +1,21 @@ | ||
| module main | ||
|
|
||
| go 1.23.0 | ||
| go 1.24.0 | ||
|
|
||
| toolchain go1.23.3 | ||
| toolchain go1.24.8 | ||
|
|
||
| require ( | ||
| github.com/stretchr/testify v1.9.0 | ||
| google.golang.org/grpc v1.74.2 | ||
| google.golang.org/grpc v1.76.0 | ||
| google.golang.org/protobuf v1.36.6 | ||
| ) | ||
|
|
||
| require ( | ||
| github.com/davecgh/go-spew v1.1.1 // indirect | ||
| github.com/pmezard/go-difflib v1.0.0 // indirect | ||
| golang.org/x/net v0.40.0 // indirect | ||
| golang.org/x/sys v0.33.0 // indirect | ||
| golang.org/x/text v0.25.0 // indirect | ||
| google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect | ||
| golang.org/x/net v0.42.0 // indirect | ||
| golang.org/x/sys v0.34.0 // indirect | ||
| golang.org/x/text v0.27.0 // indirect | ||
| google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b // indirect | ||
| gopkg.in/yaml.v3 v3.0.1 // indirect | ||
| ) | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| package helpers | ||
|
|
||
| import ( | ||
| "net/url" | ||
| "regexp" | ||
| "strings" | ||
| ) | ||
|
|
||
| // remove all control characters (< 32) and 0x7f(DEL) + whitespace | ||
| func removeCTLByte(urlStr string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'urlStr' is reassigned/modified inside removeCTLByte, harming clarity and risking logic errors when mutating while iterating. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| for i := 0; i < len(urlStr); i++ { | ||
| if urlStr[i] <= ' ' || urlStr[i] == 0x7f { | ||
| urlStr = urlStr[:i] + urlStr[i+1:] | ||
| } | ||
| } | ||
| return urlStr | ||
| } | ||
|
|
||
| func removeUserInfo(raw string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'raw' is reused/reassigned within removeUserInfo instead of using distinct local variables, reducing clarity. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| schemeEnd := strings.Index(raw, "://") | ||
| if schemeEnd == -1 { | ||
| // No scheme, can't safely identify authority | ||
| return raw | ||
| } | ||
|
|
||
| scheme := raw[:schemeEnd+3] | ||
| rest := raw[schemeEnd+3:] | ||
|
|
||
| // Authority is up to first '/', '?', or '#' (https://datatracker.ietf.org/doc/html/rfc3986#section-3.2) | ||
| authorityEnd := len(rest) | ||
| for _, sep := range []string{"/", "?", "#"} { | ||
| if idx := strings.Index(rest, sep); idx != -1 && idx < authorityEnd { | ||
| authorityEnd = idx | ||
| } | ||
| } | ||
|
|
||
| authority := rest[:authorityEnd] | ||
| path := rest[authorityEnd:] | ||
|
|
||
| // Remove userinfo if present | ||
| if at := strings.LastIndex(authority, "@"); at != -1 { | ||
| authority = authority[at+1:] | ||
| } | ||
|
|
||
| return scheme + authority + path | ||
| } | ||
|
|
||
| func UnescapeUrl(urlStr string) string { | ||
| unescapedUrl, err := url.QueryUnescape(urlStr) | ||
| if err != nil { | ||
| return urlStr | ||
| } | ||
| return unescapedUrl | ||
| } | ||
|
|
||
| // ConvertIPv6Mapped converts IPv6-mapped IPv4 (only if it contains ::ffff:) | ||
| // Example: "http://[::ffff:10.0.0.1]" -> "http://10.0.0.1" | ||
| func convertIPv6Mapped(input string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'input' is reassigned multiple times within convertIPv6Mapped instead of using separate local variables. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| // Return immediately if not IPv6-mapped form | ||
| if !strings.Contains(input, "::ffff:") { | ||
| return input | ||
| } | ||
|
|
||
| // Extract URL scheme if present (http://, https://, etc.) | ||
| scheme := "" | ||
| if strings.Contains(input, "://") { | ||
| parts := strings.SplitN(input, "://", 2) | ||
| scheme = parts[0] + "://" | ||
| input = parts[1] | ||
| } | ||
|
|
||
| // Strip brackets | ||
| input = strings.TrimPrefix(input, "[") | ||
| input = strings.TrimSuffix(input, "]") | ||
|
|
||
| // Replace ::ffff:x.x.x.x -> x.x.x.x | ||
| re := regexp.MustCompile(`::ffff:(\d+\.\d+\.\d+\.\d+)`) | ||
| ip := re.ReplaceAllString(input, "$1") | ||
|
|
||
| return scheme + ip | ||
| } | ||
|
|
||
| func NormalizeRawUrl(urlStr string) string { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'urlStr' is repeatedly reassigned in NormalizeRawUrl rather than using a new local variable for the normalized value. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| urlStr = UnescapeUrl(urlStr) | ||
| urlStr = removeCTLByte(urlStr) | ||
| urlStr = FixURL(urlStr) | ||
| urlStr = removeUserInfo(urlStr) | ||
| urlStr = convertIPv6Mapped(urlStr) | ||
| return urlStr | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| package helpers | ||
|
|
||
| import "testing" | ||
|
|
||
| func TestNormalizeRequestUrl(t *testing.T) { | ||
| tests := []struct { | ||
| input string | ||
| expected string | ||
| }{ | ||
| {"http://localhost:4000", "http://localhost:4000"}, | ||
| {"http://localhost:4000 ", "http://localhost:4000"}, | ||
| {"http://localhost:4000" + "\x00", "http://localhost:4000"}, | ||
| {"http://\\@localhost:4000", "http://localhost:4000"}, | ||
| {"http://127.1.1.1:4000\\\\\\@127.0.0.1:80/", "http://127.0.0.1:80/"}, | ||
| {"https:/localhost:4000", "https://localhost:4000"}, | ||
| {"http://127%2E0%2E0%2E1:4000", "http://127.0.0.1:4000"}, | ||
| } | ||
| for _, test := range tests { | ||
| result := NormalizeRawUrl(test.input) | ||
| if result != test.expected { | ||
| t.Errorf("For input '%s', expected %v but got %v", test.input, test.expected, result) | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,16 @@ | ||
| package helpers | ||
|
|
||
| import ( | ||
| "net" | ||
| "net/url" | ||
| "strconv" | ||
|
|
||
| "golang.org/x/net/idna" | ||
| ) | ||
|
|
||
| func TryParseURL(input string) *url.URL { | ||
| parsedURL, err := url.ParseRequestURI(input) | ||
| if err != nil { | ||
| parsedURL, err := url.Parse(input) | ||
| if err != nil || parsedURL.Host == "" { | ||
| return nil | ||
| } | ||
|
|
||
|
|
@@ -17,5 +19,26 @@ func TryParseURL(input string) *url.URL { | |
| if err == nil { | ||
| parsedURL.Host = parsedHost | ||
| } | ||
| // If the port is not present, we need to add it based on the scheme | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment states the mechanical action (adding a port) rather than explaining why default ports are added; replace with a goal/rationale comment. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| if parsedURL.Port() == "" { | ||
| port := 0 | ||
| switch parsedURL.Scheme { | ||
| case "http": | ||
| port = 80 | ||
| case "https": | ||
| port = 443 | ||
| } | ||
| parsedURL.Host = parsedURL.Host + ":" + strconv.Itoa(int(port)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Defaulting to port 0 and appending ":0" for unknown schemes results in invalid host values (e.g., example.com:0). Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| } | ||
| host, port, err := net.SplitHostPort(parsedURL.Host) | ||
| if err == nil { | ||
| ip := net.ParseIP(host) | ||
| if ip != nil { | ||
| parsedURL.Host = ip.String() + ":" + port | ||
| } else { | ||
| parsedURL.Host = host + ":" + port | ||
| } | ||
| } | ||
|
|
||
| return parsedURL | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,28 +2,50 @@ package ssrf | |
|
|
||
| import ( | ||
| "main/helpers" | ||
| "main/log" | ||
| "net/url" | ||
| "strconv" | ||
| "strings" | ||
| ) | ||
|
|
||
| func findHostnameInUserInput(userInput string, hostname string, port uint32) bool { | ||
| func getVariants(userInput string) []string { | ||
| variants := []string{userInput, "http://" + userInput, "https://" + userInput} | ||
| decodedUserInput, err := url.QueryUnescape(userInput) | ||
| if err == nil && decodedUserInput != userInput { | ||
| variants = append(variants, decodedUserInput, "http://"+decodedUserInput, "https://"+decodedUserInput) | ||
| } | ||
| return variants | ||
| } | ||
|
|
||
| func findHostnameInUserInput(userInput string, hostname string, port uint32) bool { | ||
| log.Debugf("findHostnameInUserInput: userInput: %s, hostname: %s, port: %d", userInput, hostname, port) | ||
| if len(userInput) <= 1 { | ||
| return false | ||
| } | ||
| // if hostname contains : we need to add the [ and ] to the hostname (ipv6) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function parameter 'hostname' is reassigned (modified) in-place; avoid reassigning function arguments. Details✨ AI Reasoning 🔧 How do I fix it? More info - Comment |
||
| if strings.Contains(hostname, ":") { | ||
| hostname = "[" + hostname + "]" | ||
| } | ||
|
|
||
| hostnameURL := helpers.TryParseURL("http://" + hostname) | ||
| hostnameURL := helpers.TryParseURL("http://" + hostname + ":" + strconv.Itoa(int(port))) | ||
| if hostnameURL == nil { | ||
| return false | ||
| } | ||
|
|
||
| userInput = helpers.ExtractResourceOrOriginal(userInput) | ||
| variants := []string{userInput, "http://" + userInput, "https://" + userInput} | ||
| userInput = helpers.NormalizeRawUrl(userInput) | ||
|
|
||
| variants := getVariants(userInput) | ||
|
|
||
| for _, variant := range variants { | ||
| userInputURL := helpers.TryParseURL(variant) | ||
| if userInputURL == nil { | ||
| continue | ||
| } | ||
|
|
||
| // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 | ||
| // "The host subcomponent is case-insensitive." | ||
| if userInputURL != nil && strings.EqualFold(userInputURL.Hostname(), hostnameURL.Hostname()) { | ||
| if strings.EqualFold(userInputURL.Hostname(), hostnameURL.Hostname()) { | ||
| userPort := helpers.GetPortFromURL(userInputURL) | ||
|
|
||
| if port == 0 { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment describes what the code does (removing control chars and whitespace) instead of explaining why URL normalization is required or what invariants it preserves.
Details
✨ AI Reasoning
1) The added comment on line 52-53 describes the exact string transformations performed (removing control characters, replacing @, removing whitespace) rather than explaining why these transformations are necessary or any constraints around them.
2) This harms maintainability because such 'what' comments can become outdated when the helper implementation changes and do not convey the rationale behind normalizing the URL before parsing.
3) The violation is introduced by this PR because the comment was added in the changed lines and is not present before.
🔧 How do I fix it?
Write comments that explain the purpose, reasoning, or business logic behind the code using words like 'because', 'so that', or 'in order to'.
More info - Comment
@AikidoSec feedback: [FEEDBACK]to get better review comments in the future.