diff --git a/entity/change/change.go b/entity/change/change.go index bf24dc4c..ff86c272 100644 --- a/entity/change/change.go +++ b/entity/change/change.go @@ -14,7 +14,7 @@ // Package change holds the shared code-change identity used across SubmitQueue, // Stovepipe, and other repo-local domains. A Change names code to act on via -// provider URIs; the URI parsers live in the github and phabricator subpackages. +// provider URIs; the URI parsers live in the github, phabricator, and git subpackages. package change // Change represents a code change identified by URIs from a code change provider (e.g., GitHub Pull Request, Phabricator Diff). diff --git a/entity/change/changeutil/BUILD.bazel b/entity/change/changeutil/BUILD.bazel new file mode 100644 index 00000000..46c750d5 --- /dev/null +++ b/entity/change/changeutil/BUILD.bazel @@ -0,0 +1,15 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "changeutil", + srcs = ["hex.go"], + importpath = "github.com/uber/submitqueue/entity/change/changeutil", + visibility = ["//visibility:public"], +) + +go_test( + name = "changeutil_test", + srcs = ["hex_test.go"], + embed = [":changeutil"], + deps = ["@com_github_stretchr_testify//assert"], +) diff --git a/entity/change/changeutil/hex.go b/entity/change/changeutil/hex.go new file mode 100644 index 00000000..d33fe915 --- /dev/null +++ b/entity/change/changeutil/hex.go @@ -0,0 +1,33 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package changeutil holds small helpers shared across the entity/change +// provider subpackages (github, git, ...). +package changeutil + +// IsFullHex reports whether s is exactly length lowercase hex characters. +// Providers that pin a change to a commit by SHA (github, git) use this to +// validate the SHA segment of a change URI. +func IsFullHex(s string, length int) bool { + if len(s) != length { + return false + } + for i := 0; i < len(s); i++ { + c := s[i] + if !(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'f') { + return false + } + } + return true +} diff --git a/entity/change/changeutil/hex_test.go b/entity/change/changeutil/hex_test.go new file mode 100644 index 00000000..5ec12d47 --- /dev/null +++ b/entity/change/changeutil/hex_test.go @@ -0,0 +1,43 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package changeutil + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsFullHex(t *testing.T) { + tests := []struct { + name string + s string + length int + want bool + }{ + {name: "valid 40-char SHA", s: "abcdef0123456789abcdef0123456789abcdef01", length: 40, want: true}, + {name: "valid custom length", s: "abc123", length: 6, want: true}, + {name: "too short", s: "abc", length: 40, want: false}, + {name: "too long", s: "abcdef0123456789abcdef0123456789abcdef0101", length: 40, want: false}, + {name: "uppercase rejected", s: "ABCDEF0123456789ABCDEF0123456789ABCDEF01", length: 40, want: false}, + {name: "non-hex rejected", s: "zzzzzz0123456789abcdef0123456789abcdef01", length: 40, want: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, IsFullHex(tt.s, tt.length)) + }) + } +} diff --git a/entity/change/git/BUILD.bazel b/entity/change/git/BUILD.bazel new file mode 100644 index 00000000..de6889d9 --- /dev/null +++ b/entity/change/git/BUILD.bazel @@ -0,0 +1,19 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "git", + srcs = ["change_id.go"], + importpath = "github.com/uber/submitqueue/entity/change/git", + visibility = ["//visibility:public"], + deps = ["//entity/change/changeutil"], +) + +go_test( + name = "git_test", + srcs = ["change_id_test.go"], + embed = [":git"], + deps = [ + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/entity/change/git/change_id.go b/entity/change/git/change_id.go new file mode 100644 index 00000000..aa68e30b --- /dev/null +++ b/entity/change/git/change_id.go @@ -0,0 +1,119 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package git parses change IDs that use the git:// URI scheme. +package git + +import ( + "fmt" + "net/url" + "strings" + + "github.com/uber/submitqueue/entity/change/changeutil" +) + +// scheme is the canonical URI scheme for git-backed change identifiers. +const scheme = "git" + +// refPrefix is the namespace every fully-qualified git ref lives under +// (refs/heads/, refs/tags/, ...). Disambiguates between branches and tags +// of the same name. +const refPrefix = "refs/" + +// changeIDFormat is the expected format for change IDs, included in error messages. +const changeIDFormat = "git://{remote}/{repo}/{ref}/{commit_sha}" + +// shaLength is the length of a git commit SHA. +const shaLength = 40 + +// ChangeID represents a parsed git:// change identifier. +// Format: git://{remote}/{repo}/{ref}/{commit_sha} +// +// Ref is a fully-qualified, percent-encoded git ref so that branches, tags, and +// ref names containing slashes all fit a single path segment unambiguously. +type ChangeID struct { + // Scheme captures the URI scheme (always "git" in current implementation). + Scheme string + // Remote is the host (or host:port) of the git remote the repository lives + // on (e.g. "git.example.com" or "git.example.com:9418"). + Remote string + // Repo is the path to the repository on the remote and may contain slashes + // (e.g. "uber/monorepo" or "team/group/repo.git"). + Repo string + // Ref is the fully-qualified git ref the change landed on, decoded from the + // URI (e.g. "refs/heads/main", "refs/tags/v1.0"). + Ref string + // CommitSHA is a commit that ref has pointed to at some point in time. + CommitSHA string +} + +// ParseChangeID parses a raw change ID string into a ChangeID. +// Expected format: git://{remote}/{repo}/{ref}/{commit_sha}, where {ref} is a +// fully-qualified, percent-encoded git ref (e.g. "refs%2Fheads%2Fmain"). +func ParseChangeID(raw string) (ChangeID, error) { + u, err := url.Parse(raw) + if err != nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: %w (expected format: %s)", raw, err, changeIDFormat) + } + if u.Scheme != scheme { + return ChangeID{}, fmt.Errorf("invalid change ID %q: scheme must be %q, got %q (expected format: %s)", raw, scheme, u.Scheme, changeIDFormat) + } + if u.Host == "" { + return ChangeID{}, fmt.Errorf("invalid change ID %q: missing remote (expected format: %s)", raw, changeIDFormat) + } + + // Split on the escaped path so the percent-encoded ref stays a single + // segment (url.URL.Path decodes %2F to "/", which would split it apart). + segments := strings.Split(strings.TrimPrefix(u.EscapedPath(), "/"), "/") + // Need at least 3 segments: {repo}/{ref}/{commit_sha}. + if len(segments) < 3 { + return ChangeID{}, fmt.Errorf("invalid change ID %q: need at least repo/ref/sha, got %d path segments (expected format: %s)", raw, len(segments), changeIDFormat) + } + + sha := segments[len(segments)-1] + encodedRef := segments[len(segments)-2] + repo := strings.Join(segments[:len(segments)-2], "/") + + if sha == "" { + return ChangeID{}, fmt.Errorf("invalid change ID %q: empty commit SHA (expected format: %s)", raw, changeIDFormat) + } + if !changeutil.IsFullHex(sha, shaLength) { + return ChangeID{}, fmt.Errorf("invalid change ID %q: commit SHA %q must be %d lowercase hex characters (expected format: %s)", raw, sha, shaLength, changeIDFormat) + } + + ref, err := url.PathUnescape(encodedRef) + if err != nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: ref %q is not valid percent-encoding: %w (expected format: %s)", raw, encodedRef, err, changeIDFormat) + } + if !strings.HasPrefix(ref, refPrefix) || ref == refPrefix { + return ChangeID{}, fmt.Errorf("invalid change ID %q: ref %q must be a fully-qualified git ref (e.g. refs/heads/main, refs/tags/v1.0) (expected format: %s)", raw, ref, changeIDFormat) + } + + if repo == "" { + return ChangeID{}, fmt.Errorf("invalid change ID %q: empty repo (expected format: %s)", raw, changeIDFormat) + } + + return ChangeID{ + Scheme: u.Scheme, + Remote: u.Host, + Repo: repo, + Ref: ref, + CommitSHA: sha, + }, nil +} + +// String returns the string representation of the change ID. +func (c ChangeID) String() string { + return fmt.Sprintf("%s://%s/%s/%s/%s", c.Scheme, c.Remote, c.Repo, url.PathEscape(c.Ref), c.CommitSHA) +} diff --git a/entity/change/git/change_id_test.go b/entity/change/git/change_id_test.go new file mode 100644 index 00000000..f8bbdc76 --- /dev/null +++ b/entity/change/git/change_id_test.go @@ -0,0 +1,148 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseChangeID(t *testing.T) { + sha := "c3a4d5e6f7890123456789abcdef0123456789ab" + + tests := []struct { + name string + raw string + want ChangeID + wantErr bool + }{ + { + name: "branch ref", + raw: "git://git.example.com/uber/monorepo/refs%2Fheads%2Fmain/" + sha, + want: ChangeID{ + Scheme: "git", + Remote: "git.example.com", + Repo: "uber/monorepo", + Ref: "refs/heads/main", + CommitSHA: sha, + }, + }, + { + name: "host with port", + raw: "git://git.example.com:9418/uber/monorepo/refs%2Fheads%2Fmain/" + sha, + want: ChangeID{ + Scheme: "git", + Remote: "git.example.com:9418", + Repo: "uber/monorepo", + Ref: "refs/heads/main", + CommitSHA: sha, + }, + }, + { + name: "single-segment repo path", + raw: "git://git.example.com/monorepo/refs%2Fheads%2Fmain/" + sha, + want: ChangeID{ + Scheme: "git", + Remote: "git.example.com", + Repo: "monorepo", + Ref: "refs/heads/main", + CommitSHA: sha, + }, + }, + { + name: "branch ref with slash", + raw: "git://git.example.com/uber/monorepo/refs%2Fheads%2Ffeature%2Fx/" + sha, + want: ChangeID{ + Scheme: "git", + Remote: "git.example.com", + Repo: "uber/monorepo", + Ref: "refs/heads/feature/x", + CommitSHA: sha, + }, + }, + { + name: "tag ref", + raw: "git://git.example.com/uber/monorepo/refs%2Ftags%2Fv1.0/" + sha, + want: ChangeID{ + Scheme: "git", + Remote: "git.example.com", + Repo: "uber/monorepo", + Ref: "refs/tags/v1.0", + CommitSHA: sha, + }, + }, + { + name: "nested repo path", + raw: "git://git.example.com/uber/deepteam/monorepo/refs%2Fheads%2Fmain/" + sha, + want: ChangeID{ + Scheme: "git", + Remote: "git.example.com", + Repo: "uber/deepteam/monorepo", + Ref: "refs/heads/main", + CommitSHA: sha, + }, + }, + { + name: "wrong scheme", + raw: "github://git.example.com/uber/monorepo/refs%2Fheads%2Fmain/" + sha, + wantErr: true, + }, + { + name: "missing host", + raw: "git:///uber/monorepo/refs%2Fheads%2Fmain/" + sha, + wantErr: true, + }, + { + name: "missing commit SHA", + raw: "git://git.example.com/uber/monorepo/refs%2Fheads%2Fmain", + wantErr: true, + }, + { + name: "abbreviated SHA", + raw: "git://git.example.com/uber/monorepo/refs%2Fheads%2Fmain/deadbeef", + wantErr: true, + }, + { + name: "unqualified ref", + raw: "git://git.example.com/uber/monorepo/main/" + sha, + wantErr: true, + }, + { + name: "malformed percent-encoding", + raw: "git://git.example.com/uber/monorepo/refs%2/" + sha, + wantErr: true, + }, + { + name: "empty repo path", + raw: "git://git.example.com//refs%2Fheads%2Fmain/" + sha, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseChangeID(tt.raw) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.raw, got.String()) + }) + } +} diff --git a/entity/change/github/BUILD.bazel b/entity/change/github/BUILD.bazel index 85b9c172..9311d029 100644 --- a/entity/change/github/BUILD.bazel +++ b/entity/change/github/BUILD.bazel @@ -5,6 +5,7 @@ go_library( srcs = ["change_id.go"], importpath = "github.com/uber/submitqueue/entity/change/github", visibility = ["//visibility:public"], + deps = ["//entity/change/changeutil"], ) go_test( diff --git a/entity/change/github/change_id.go b/entity/change/github/change_id.go index c5556ec5..3bfca416 100644 --- a/entity/change/github/change_id.go +++ b/entity/change/github/change_id.go @@ -18,6 +18,8 @@ import ( "fmt" "strconv" "strings" + + "github.com/uber/submitqueue/entity/change/changeutil" ) // changeIDFormat is the expected format for change IDs, included in error messages. @@ -91,7 +93,7 @@ func ParseChangeID(raw string) (ChangeID, error) { if sha == "" { return ChangeID{}, fmt.Errorf("invalid change ID %q: empty head commit SHA (expected format: %s)", raw, changeIDFormat) } - if !isFullHexSHA(sha) { + if !changeutil.IsFullHex(sha, shaLength) { return ChangeID{}, fmt.Errorf("invalid change ID %q: head commit SHA %q must be %d lowercase hex characters (expected format: %s)", raw, sha, shaLength, changeIDFormat) } @@ -133,17 +135,3 @@ func (c ChangeID) String() string { func (c ChangeID) OwnerRepo() string { return fmt.Sprintf("%s/%s", c.Org, c.Repo) } - -// isFullHexSHA reports whether s is exactly shaLength lowercase hex characters. -func isFullHexSHA(s string) bool { - if len(s) != shaLength { - return false - } - for i := 0; i < len(s); i++ { - c := s[i] - if !(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'f') { - return false - } - } - return true -}