From 09663493543a2ce15fc90fbb3808dda5b87335e5 Mon Sep 17 00:00:00 2001
From: Lunny Xiao <xiaolunwen@gmail.com>
Date: Wed, 30 Jun 2021 15:23:49 +0800
Subject: [PATCH] Make the github migration less rate limit waiting to get
 comment per page from repository but not per issue (#16070)

* Make the github migration less rate limit waiting to get comment per page from repository but not per issue

* Fix lint

* adjust Downloader interface

* Fix missed reviews

* Fix test

* Remove unused struct
---
 modules/migrations/base/downloader.go       | 10 ++-
 modules/migrations/base/null_downloader.go  |  9 ++-
 modules/migrations/base/retry_downloader.go |  7 +-
 modules/migrations/gitea_downloader.go      | 16 ++--
 modules/migrations/gitea_downloader_test.go |  4 +-
 modules/migrations/github.go                | 87 ++++++++++++++++++++-
 modules/migrations/github_test.go           |  4 +-
 modules/migrations/gitlab.go                |  7 +-
 modules/migrations/gitlab_test.go           |  4 +-
 modules/migrations/gogs.go                  |  7 +-
 modules/migrations/gogs_test.go             |  4 +-
 modules/migrations/migrate.go               | 77 ++++++++++++------
 modules/migrations/restore.go               | 14 ++--
 13 files changed, 194 insertions(+), 56 deletions(-)

diff --git a/modules/migrations/base/downloader.go b/modules/migrations/base/downloader.go
index 919f4b52a0..2388b2dd6e 100644
--- a/modules/migrations/base/downloader.go
+++ b/modules/migrations/base/downloader.go
@@ -11,6 +11,13 @@ import (
 	"code.gitea.io/gitea/modules/structs"
 )
 
+// GetCommentOptions represents an options for get comment
+type GetCommentOptions struct {
+	IssueNumber int64
+	Page        int
+	PageSize    int
+}
+
 // Downloader downloads the site repo informations
 type Downloader interface {
 	SetContext(context.Context)
@@ -20,7 +27,8 @@ type Downloader interface {
 	GetReleases() ([]*Release, error)
 	GetLabels() ([]*Label, error)
 	GetIssues(page, perPage int) ([]*Issue, bool, error)
-	GetComments(issueNumber int64) ([]*Comment, error)
+	GetComments(opts GetCommentOptions) ([]*Comment, bool, error)
+	SupportGetRepoComments() bool
 	GetPullRequests(page, perPage int) ([]*PullRequest, bool, error)
 	GetReviews(pullRequestNumber int64) ([]*Review, error)
 	FormatCloneURL(opts MigrateOptions, remoteAddr string) (string, error)
diff --git a/modules/migrations/base/null_downloader.go b/modules/migrations/base/null_downloader.go
index a93c20339b..53a536709d 100644
--- a/modules/migrations/base/null_downloader.go
+++ b/modules/migrations/base/null_downloader.go
@@ -51,8 +51,8 @@ func (n NullDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) {
 }
 
 // GetComments returns comments according issueNumber
-func (n NullDownloader) GetComments(issueNumber int64) ([]*Comment, error) {
-	return nil, &ErrNotSupported{Entity: "Comments"}
+func (n NullDownloader) GetComments(GetCommentOptions) ([]*Comment, bool, error) {
+	return nil, false, &ErrNotSupported{Entity: "Comments"}
 }
 
 // GetPullRequests returns pull requests according page and perPage
@@ -80,3 +80,8 @@ func (n NullDownloader) FormatCloneURL(opts MigrateOptions, remoteAddr string) (
 	}
 	return remoteAddr, nil
 }
+
+// SupportGetRepoComments return true if it supports get repo comments
+func (n NullDownloader) SupportGetRepoComments() bool {
+	return false
+}
diff --git a/modules/migrations/base/retry_downloader.go b/modules/migrations/base/retry_downloader.go
index 82a038b98b..e6c80038f1 100644
--- a/modules/migrations/base/retry_downloader.go
+++ b/modules/migrations/base/retry_downloader.go
@@ -150,18 +150,19 @@ func (d *RetryDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) {
 }
 
 // GetComments returns a repository's comments with retry
-func (d *RetryDownloader) GetComments(issueNumber int64) ([]*Comment, error) {
+func (d *RetryDownloader) GetComments(opts GetCommentOptions) ([]*Comment, bool, error) {
 	var (
 		comments []*Comment
+		isEnd    bool
 		err      error
 	)
 
 	err = d.retry(func() error {
-		comments, err = d.Downloader.GetComments(issueNumber)
+		comments, isEnd, err = d.Downloader.GetComments(opts)
 		return err
 	})
 
-	return comments, err
+	return comments, isEnd, err
 }
 
 // GetPullRequests returns a repository's pull requests with retry
diff --git a/modules/migrations/gitea_downloader.go b/modules/migrations/gitea_downloader.go
index 40820ae375..665466ffef 100644
--- a/modules/migrations/gitea_downloader.go
+++ b/modules/migrations/gitea_downloader.go
@@ -435,37 +435,37 @@ func (g *GiteaDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, err
 }
 
 // GetComments returns comments according issueNumber
-func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) {
+func (g *GiteaDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
 	var allComments = make([]*base.Comment, 0, g.maxPerPage)
 
 	// for i := 1; ; i++ {
 	// make sure gitea can shutdown gracefully
 	select {
 	case <-g.ctx.Done():
-		return nil, nil
+		return nil, false, nil
 	default:
 	}
 
-	comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, index, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{
+	comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, opts.IssueNumber, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{
 		// PageSize: g.maxPerPage,
 		// Page:     i,
 	}})
 	if err != nil {
-		return nil, fmt.Errorf("error while listing comments for issue #%d. Error: %v", index, err)
+		return nil, false, fmt.Errorf("error while listing comments for issue #%d. Error: %v", opts.IssueNumber, err)
 	}
 
 	for _, comment := range comments {
 		reactions, err := g.getCommentReactions(comment.ID)
 		if err != nil {
-			log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err)
+			log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)
 			if err2 := models.CreateRepositoryNotice(
-				fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil {
+				fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil {
 				log.Error("create repository notice failed: ", err2)
 			}
 		}
 
 		allComments = append(allComments, &base.Comment{
-			IssueIndex:  index,
+			IssueIndex:  opts.IssueNumber,
 			PosterID:    comment.Poster.ID,
 			PosterName:  comment.Poster.UserName,
 			PosterEmail: comment.Poster.Email,
@@ -481,7 +481,7 @@ func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) {
 	//		break
 	//	}
 	//}
-	return allComments, nil
+	return allComments, true, nil
 }
 
 // GetPullRequests returns pull requests according page and perPage
diff --git a/modules/migrations/gitea_downloader_test.go b/modules/migrations/gitea_downloader_test.go
index babf038280..f62b19897c 100644
--- a/modules/migrations/gitea_downloader_test.go
+++ b/modules/migrations/gitea_downloader_test.go
@@ -224,7 +224,9 @@ func TestGiteaDownloadRepo(t *testing.T) {
 		Closed:    &closed2,
 	}, issues[1])
 
-	comments, err := downloader.GetComments(4)
+	comments, _, err := downloader.GetComments(base.GetCommentOptions{
+		IssueNumber: 4,
+	})
 	assert.NoError(t, err)
 	assert.Len(t, comments, 2)
 	assert.EqualValues(t, 1598975370, comments[0].Created.Unix())
diff --git a/modules/migrations/github.go b/modules/migrations/github.go
index 8a3f5d34c7..9b897662d0 100644
--- a/modules/migrations/github.go
+++ b/modules/migrations/github.go
@@ -11,6 +11,7 @@ import (
 	"io"
 	"net/http"
 	"net/url"
+	"strconv"
 	"strings"
 	"time"
 
@@ -450,8 +451,22 @@ func (g *GithubDownloaderV3) GetIssues(page, perPage int) ([]*base.Issue, bool,
 	return allIssues, len(issues) < perPage, nil
 }
 
+// SupportGetRepoComments return true if it supports get repo comments
+func (g *GithubDownloaderV3) SupportGetRepoComments() bool {
+	return true
+}
+
 // GetComments returns comments according issueNumber
-func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, error) {
+func (g *GithubDownloaderV3) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
+	if opts.IssueNumber > 0 {
+		comments, err := g.getComments(opts.IssueNumber)
+		return comments, false, err
+	}
+
+	return g.GetAllComments(opts.Page, opts.PageSize)
+}
+
+func (g *GithubDownloaderV3) getComments(issueNumber int64) ([]*base.Comment, error) {
 	var (
 		allComments = make([]*base.Comment, 0, g.maxPerPage)
 		created     = "created"
@@ -519,6 +534,75 @@ func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, er
 	return allComments, nil
 }
 
+// GetAllComments returns repository comments according page and perPageSize
+func (g *GithubDownloaderV3) GetAllComments(page, perPage int) ([]*base.Comment, bool, error) {
+	var (
+		allComments = make([]*base.Comment, 0, perPage)
+		created     = "created"
+		asc         = "asc"
+	)
+	opt := &github.IssueListCommentsOptions{
+		Sort:      &created,
+		Direction: &asc,
+		ListOptions: github.ListOptions{
+			Page:    page,
+			PerPage: perPage,
+		},
+	}
+
+	g.sleep()
+	comments, resp, err := g.client.Issues.ListComments(g.ctx, g.repoOwner, g.repoName, 0, opt)
+	if err != nil {
+		return nil, false, fmt.Errorf("error while listing repos: %v", err)
+	}
+	log.Trace("Request get comments %d/%d, but in fact get %d", perPage, page, len(comments))
+	g.rate = &resp.Rate
+	for _, comment := range comments {
+		var email string
+		if comment.User.Email != nil {
+			email = *comment.User.Email
+		}
+
+		// get reactions
+		var reactions []*base.Reaction
+		for i := 1; ; i++ {
+			g.sleep()
+			res, resp, err := g.client.Reactions.ListIssueCommentReactions(g.ctx, g.repoOwner, g.repoName, comment.GetID(), &github.ListOptions{
+				Page:    i,
+				PerPage: g.maxPerPage,
+			})
+			if err != nil {
+				return nil, false, err
+			}
+			g.rate = &resp.Rate
+			if len(res) == 0 {
+				break
+			}
+			for _, reaction := range res {
+				reactions = append(reactions, &base.Reaction{
+					UserID:   reaction.User.GetID(),
+					UserName: reaction.User.GetLogin(),
+					Content:  reaction.GetContent(),
+				})
+			}
+		}
+		idx := strings.LastIndex(*comment.IssueURL, "/")
+		issueIndex, _ := strconv.ParseInt((*comment.IssueURL)[idx+1:], 10, 64)
+		allComments = append(allComments, &base.Comment{
+			IssueIndex:  issueIndex,
+			PosterID:    *comment.User.ID,
+			PosterName:  *comment.User.Login,
+			PosterEmail: email,
+			Content:     *comment.Body,
+			Created:     *comment.CreatedAt,
+			Updated:     *comment.UpdatedAt,
+			Reactions:   reactions,
+		})
+	}
+
+	return allComments, len(allComments) < perPage, nil
+}
+
 // GetPullRequests returns pull requests according page and perPage
 func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullRequest, bool, error) {
 	if perPage > g.maxPerPage {
@@ -539,6 +623,7 @@ func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullReq
 	if err != nil {
 		return nil, false, fmt.Errorf("error while listing repos: %v", err)
 	}
+	log.Trace("Request get pull requests %d/%d, but in fact get %d", perPage, page, len(prs))
 	g.rate = &resp.Rate
 	for _, pr := range prs {
 		var body string
diff --git a/modules/migrations/github_test.go b/modules/migrations/github_test.go
index 5bd980a9d8..e0ee2fea84 100644
--- a/modules/migrations/github_test.go
+++ b/modules/migrations/github_test.go
@@ -240,7 +240,9 @@ func TestGitHubDownloadRepo(t *testing.T) {
 	}, issues)
 
 	// downloader.GetComments()
-	comments, err := downloader.GetComments(2)
+	comments, _, err := downloader.GetComments(base.GetCommentOptions{
+		IssueNumber: 2,
+	})
 	assert.NoError(t, err)
 	assert.Len(t, comments, 2)
 	assert.EqualValues(t, []*base.Comment{
diff --git a/modules/migrations/gitlab.go b/modules/migrations/gitlab.go
index a697075ff8..c83989f771 100644
--- a/modules/migrations/gitlab.go
+++ b/modules/migrations/gitlab.go
@@ -430,7 +430,8 @@ func (g *GitlabDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, er
 
 // GetComments returns comments according issueNumber
 // TODO: figure out how to transfer comment reactions
-func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) {
+func (g *GitlabDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
+	var issueNumber = opts.IssueNumber
 	var allComments = make([]*base.Comment, 0, g.maxPerPage)
 
 	var page = 1
@@ -457,7 +458,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro
 		}
 
 		if err != nil {
-			return nil, fmt.Errorf("error while listing comments: %v %v", g.repoID, err)
+			return nil, false, fmt.Errorf("error while listing comments: %v %v", g.repoID, err)
 		}
 		for _, comment := range comments {
 			// Flatten comment threads
@@ -490,7 +491,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro
 		}
 		page = resp.NextPage
 	}
-	return allComments, nil
+	return allComments, true, nil
 }
 
 // GetPullRequests returns pull requests according page and perPage
diff --git a/modules/migrations/gitlab_test.go b/modules/migrations/gitlab_test.go
index 32ed6d807a..6a77ff3c23 100644
--- a/modules/migrations/gitlab_test.go
+++ b/modules/migrations/gitlab_test.go
@@ -204,7 +204,9 @@ func TestGitlabDownloadRepo(t *testing.T) {
 		},
 	}, issues)
 
-	comments, err := downloader.GetComments(2)
+	comments, _, err := downloader.GetComments(base.GetCommentOptions{
+		IssueNumber: 2,
+	})
 	assert.NoError(t, err)
 	assert.Len(t, comments, 4)
 	assert.EqualValues(t, []*base.Comment{
diff --git a/modules/migrations/gogs.go b/modules/migrations/gogs.go
index b616907938..d689b0da11 100644
--- a/modules/migrations/gogs.go
+++ b/modules/migrations/gogs.go
@@ -227,12 +227,13 @@ func (g *GogsDownloader) getIssues(page int, state string) ([]*base.Issue, bool,
 }
 
 // GetComments returns comments according issueNumber
-func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) {
+func (g *GogsDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
+	var issueNumber = opts.IssueNumber
 	var allComments = make([]*base.Comment, 0, 100)
 
 	comments, err := g.client.ListIssueComments(g.repoOwner, g.repoName, issueNumber)
 	if err != nil {
-		return nil, fmt.Errorf("error while listing repos: %v", err)
+		return nil, false, fmt.Errorf("error while listing repos: %v", err)
 	}
 	for _, comment := range comments {
 		if len(comment.Body) == 0 || comment.Poster == nil {
@@ -249,7 +250,7 @@ func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error)
 		})
 	}
 
-	return allComments, nil
+	return allComments, true, nil
 }
 
 // GetTopics return repository topics
diff --git a/modules/migrations/gogs_test.go b/modules/migrations/gogs_test.go
index d173952b06..4e384036d7 100644
--- a/modules/migrations/gogs_test.go
+++ b/modules/migrations/gogs_test.go
@@ -103,7 +103,9 @@ func TestGogsDownloadRepo(t *testing.T) {
 	}, issues)
 
 	// downloader.GetComments()
-	comments, err := downloader.GetComments(1)
+	comments, _, err := downloader.GetComments(base.GetCommentOptions{
+		IssueNumber: 1,
+	})
 	assert.NoError(t, err)
 	assert.Len(t, comments, 1)
 	assert.EqualValues(t, []*base.Comment{
diff --git a/modules/migrations/migrate.go b/modules/migrations/migrate.go
index 3cdf68ab62..0a507d9c33 100644
--- a/modules/migrations/migrate.go
+++ b/modules/migrations/migrate.go
@@ -292,6 +292,8 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
 		reviewBatchSize  = uploader.MaxBatchInsertSize("review")
 	)
 
+	supportAllComments := downloader.SupportGetRepoComments()
+
 	if opts.Issues {
 		log.Trace("migrating issues and comments")
 		messenger("repo.migrate.migrating_issues")
@@ -311,11 +313,13 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
 				return err
 			}
 
-			if opts.Comments {
+			if opts.Comments && !supportAllComments {
 				var allComments = make([]*base.Comment, 0, commentBatchSize)
 				for _, issue := range issues {
 					log.Trace("migrating issue %d's comments", issue.Number)
-					comments, err := downloader.GetComments(issue.Number)
+					comments, _, err := downloader.GetComments(base.GetCommentOptions{
+						IssueNumber: issue.Number,
+					})
 					if err != nil {
 						if !base.IsErrNotSupported(err) {
 							return err
@@ -366,30 +370,34 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
 			}
 
 			if opts.Comments {
-				// plain comments
-				var allComments = make([]*base.Comment, 0, commentBatchSize)
-				for _, pr := range prs {
-					log.Trace("migrating pull request %d's comments", pr.Number)
-					comments, err := downloader.GetComments(pr.Number)
-					if err != nil {
-						if !base.IsErrNotSupported(err) {
+				if !supportAllComments {
+					// plain comments
+					var allComments = make([]*base.Comment, 0, commentBatchSize)
+					for _, pr := range prs {
+						log.Trace("migrating pull request %d's comments", pr.Number)
+						comments, _, err := downloader.GetComments(base.GetCommentOptions{
+							IssueNumber: pr.Number,
+						})
+						if err != nil {
+							if !base.IsErrNotSupported(err) {
+								return err
+							}
+							log.Warn("migrating comments is not supported, ignored")
+						}
+
+						allComments = append(allComments, comments...)
+
+						if len(allComments) >= commentBatchSize {
+							if err = uploader.CreateComments(allComments[:commentBatchSize]...); err != nil {
+								return err
+							}
+							allComments = allComments[commentBatchSize:]
+						}
+					}
+					if len(allComments) > 0 {
+						if err = uploader.CreateComments(allComments...); err != nil {
 							return err
 						}
-						log.Warn("migrating comments is not supported, ignored")
-					}
-
-					allComments = append(allComments, comments...)
-
-					if len(allComments) >= commentBatchSize {
-						if err = uploader.CreateComments(allComments[:commentBatchSize]...); err != nil {
-							return err
-						}
-						allComments = allComments[commentBatchSize:]
-					}
-				}
-				if len(allComments) > 0 {
-					if err = uploader.CreateComments(allComments...); err != nil {
-						return err
 					}
 				}
 
@@ -439,6 +447,27 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
 		}
 	}
 
+	if opts.Comments && supportAllComments {
+		log.Trace("migrating comments")
+		for i := 1; ; i++ {
+			comments, isEnd, err := downloader.GetComments(base.GetCommentOptions{
+				Page:     i,
+				PageSize: commentBatchSize,
+			})
+			if err != nil {
+				return err
+			}
+
+			if err := uploader.CreateComments(comments...); err != nil {
+				return err
+			}
+
+			if isEnd {
+				break
+			}
+		}
+	}
+
 	return uploader.Finish()
 }
 
diff --git a/modules/migrations/restore.go b/modules/migrations/restore.go
index 5b44811d52..6177f80cbb 100644
--- a/modules/migrations/restore.go
+++ b/modules/migrations/restore.go
@@ -212,27 +212,27 @@ func (r *RepositoryRestorer) GetIssues(page, perPage int) ([]*base.Issue, bool,
 }
 
 // GetComments returns comments according issueNumber
-func (r *RepositoryRestorer) GetComments(issueNumber int64) ([]*base.Comment, error) {
+func (r *RepositoryRestorer) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
 	var comments = make([]*base.Comment, 0, 10)
-	p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", issueNumber))
+	p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", opts.IssueNumber))
 	_, err := os.Stat(p)
 	if err != nil {
 		if os.IsNotExist(err) {
-			return nil, nil
+			return nil, false, nil
 		}
-		return nil, err
+		return nil, false, err
 	}
 
 	bs, err := ioutil.ReadFile(p)
 	if err != nil {
-		return nil, err
+		return nil, false, err
 	}
 
 	err = yaml.Unmarshal(bs, &comments)
 	if err != nil {
-		return nil, err
+		return nil, false, err
 	}
-	return comments, nil
+	return comments, false, nil
 }
 
 // GetPullRequests returns pull requests according page and perPage