gitea source for verification 2026-05-22
This commit is contained in:
310
modules/indexer/issues/elasticsearch/elasticsearch.go
Normal file
310
modules/indexer/issues/elasticsearch/elasticsearch.go
Normal file
@@ -0,0 +1,310 @@
|
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/graceful"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/olivere/elastic/v7"
|
||||
)
|
||||
|
||||
const (
|
||||
issueIndexerLatestVersion = 2
|
||||
// multi-match-types, currently only 2 types are used
|
||||
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
|
||||
esMultiMatchTypeBestFields = "best_fields"
|
||||
esMultiMatchTypePhrasePrefix = "phrase_prefix"
|
||||
)
|
||||
|
||||
var _ internal.Indexer = &Indexer{}
|
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct {
|
||||
inner *inner_elasticsearch.Indexer
|
||||
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new elasticsearch indexer
|
||||
func NewIndexer(url, indexerName string) *Indexer {
|
||||
inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
|
||||
indexer := &Indexer{
|
||||
inner: inner,
|
||||
Indexer: inner,
|
||||
}
|
||||
return indexer
|
||||
}
|
||||
|
||||
const (
|
||||
defaultMapping = `
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"id": { "type": "integer", "index": true },
|
||||
"repo_id": { "type": "integer", "index": true },
|
||||
"is_public": { "type": "boolean", "index": true },
|
||||
|
||||
"title": { "type": "text", "index": true },
|
||||
"content": { "type": "text", "index": true },
|
||||
"comments": { "type" : "text", "index": true },
|
||||
|
||||
"is_pull": { "type": "boolean", "index": true },
|
||||
"is_closed": { "type": "boolean", "index": true },
|
||||
"is_archived": { "type": "boolean", "index": true },
|
||||
"label_ids": { "type": "integer", "index": true },
|
||||
"no_label": { "type": "boolean", "index": true },
|
||||
"milestone_id": { "type": "integer", "index": true },
|
||||
"project_id": { "type": "integer", "index": true },
|
||||
"project_board_id": { "type": "integer", "index": true },
|
||||
"poster_id": { "type": "integer", "index": true },
|
||||
"assignee_id": { "type": "integer", "index": true },
|
||||
"mention_ids": { "type": "integer", "index": true },
|
||||
"reviewed_ids": { "type": "integer", "index": true },
|
||||
"review_requested_ids": { "type": "integer", "index": true },
|
||||
"subscriber_ids": { "type": "integer", "index": true },
|
||||
"updated_unix": { "type": "integer", "index": true },
|
||||
|
||||
"created_unix": { "type": "integer", "index": true },
|
||||
"deadline_unix": { "type": "integer", "index": true },
|
||||
"comment_count": { "type": "integer", "index": true }
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
)
|
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
|
||||
if len(issues) == 0 {
|
||||
return nil
|
||||
} else if len(issues) == 1 {
|
||||
issue := issues[0]
|
||||
_, err := b.inner.Client.Index().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Id(strconv.FormatInt(issue.ID, 10)).
|
||||
BodyJson(issue).
|
||||
Do(ctx)
|
||||
return err
|
||||
}
|
||||
|
||||
reqs := make([]elastic.BulkableRequest, 0)
|
||||
for _, issue := range issues {
|
||||
reqs = append(reqs,
|
||||
elastic.NewBulkIndexRequest().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Id(strconv.FormatInt(issue.ID, 10)).
|
||||
Doc(issue),
|
||||
)
|
||||
}
|
||||
|
||||
_, err := b.inner.Client.Bulk().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Add(reqs...).
|
||||
Do(graceful.GetManager().HammerContext())
|
||||
return err
|
||||
}
|
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
|
||||
if len(ids) == 0 {
|
||||
return nil
|
||||
} else if len(ids) == 1 {
|
||||
_, err := b.inner.Client.Delete().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Id(strconv.FormatInt(ids[0], 10)).
|
||||
Do(ctx)
|
||||
return err
|
||||
}
|
||||
|
||||
reqs := make([]elastic.BulkableRequest, 0)
|
||||
for _, id := range ids {
|
||||
reqs = append(reqs,
|
||||
elastic.NewBulkDeleteRequest().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Id(strconv.FormatInt(id, 10)),
|
||||
)
|
||||
}
|
||||
|
||||
_, err := b.inner.Client.Bulk().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Add(reqs...).
|
||||
Do(graceful.GetManager().HammerContext())
|
||||
return err
|
||||
}
|
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||
query := elastic.NewBoolQuery()
|
||||
|
||||
if options.Keyword != "" {
|
||||
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||
if searchMode == indexer.SearchModeExact {
|
||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
|
||||
} else /* words */ {
|
||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))
|
||||
}
|
||||
}
|
||||
|
||||
if len(options.RepoIDs) > 0 {
|
||||
q := elastic.NewBoolQuery()
|
||||
q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
|
||||
if options.AllPublic {
|
||||
q.Should(elastic.NewTermQuery("is_public", true))
|
||||
}
|
||||
query.Must(q)
|
||||
}
|
||||
|
||||
if options.IsPull.Has() {
|
||||
query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value()))
|
||||
}
|
||||
if options.IsClosed.Has() {
|
||||
query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value()))
|
||||
}
|
||||
if options.IsArchived.Has() {
|
||||
query.Must(elastic.NewTermQuery("is_archived", options.IsArchived.Value()))
|
||||
}
|
||||
|
||||
if options.NoLabelOnly {
|
||||
query.Must(elastic.NewTermQuery("no_label", true))
|
||||
} else {
|
||||
if len(options.IncludedLabelIDs) > 0 {
|
||||
q := elastic.NewBoolQuery()
|
||||
for _, labelID := range options.IncludedLabelIDs {
|
||||
q.Must(elastic.NewTermQuery("label_ids", labelID))
|
||||
}
|
||||
query.Must(q)
|
||||
} else if len(options.IncludedAnyLabelIDs) > 0 {
|
||||
query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
|
||||
}
|
||||
if len(options.ExcludedLabelIDs) > 0 {
|
||||
q := elastic.NewBoolQuery()
|
||||
for _, labelID := range options.ExcludedLabelIDs {
|
||||
q.MustNot(elastic.NewTermQuery("label_ids", labelID))
|
||||
}
|
||||
query.Must(q)
|
||||
}
|
||||
}
|
||||
|
||||
if len(options.MilestoneIDs) > 0 {
|
||||
query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
|
||||
}
|
||||
|
||||
if options.ProjectID.Has() {
|
||||
query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value()))
|
||||
}
|
||||
if options.ProjectColumnID.Has() {
|
||||
query.Must(elastic.NewTermQuery("project_board_id", options.ProjectColumnID.Value()))
|
||||
}
|
||||
|
||||
if options.PosterID != "" {
|
||||
// "(none)" becomes 0, it means no poster
|
||||
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
|
||||
query.Must(elastic.NewTermQuery("poster_id", posterIDInt64))
|
||||
}
|
||||
|
||||
if options.AssigneeID != "" {
|
||||
if options.AssigneeID == "(any)" {
|
||||
q := elastic.NewRangeQuery("assignee_id")
|
||||
q.Gte(1)
|
||||
query.Must(q)
|
||||
} else {
|
||||
// "(none)" becomes 0, it means no assignee
|
||||
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
|
||||
query.Must(elastic.NewTermQuery("assignee_id", assigneeIDInt64))
|
||||
}
|
||||
}
|
||||
|
||||
if options.MentionID.Has() {
|
||||
query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value()))
|
||||
}
|
||||
|
||||
if options.ReviewedID.Has() {
|
||||
query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value()))
|
||||
}
|
||||
if options.ReviewRequestedID.Has() {
|
||||
query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
|
||||
}
|
||||
|
||||
if options.SubscriberID.Has() {
|
||||
query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value()))
|
||||
}
|
||||
|
||||
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
|
||||
q := elastic.NewRangeQuery("updated_unix")
|
||||
if options.UpdatedAfterUnix.Has() {
|
||||
q.Gte(options.UpdatedAfterUnix.Value())
|
||||
}
|
||||
if options.UpdatedBeforeUnix.Has() {
|
||||
q.Lte(options.UpdatedBeforeUnix.Value())
|
||||
}
|
||||
query.Must(q)
|
||||
}
|
||||
|
||||
if options.SortBy == "" {
|
||||
options.SortBy = internal.SortByCreatedAsc
|
||||
}
|
||||
sortBy := []elastic.Sorter{
|
||||
parseSortBy(options.SortBy),
|
||||
elastic.NewFieldSort("id").Desc(),
|
||||
}
|
||||
|
||||
// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
|
||||
// TODO: make it configurable since it's configurable in elasticsearch
|
||||
const maxPageSize = 10000
|
||||
|
||||
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
|
||||
searchResult, err := b.inner.Client.Search().
|
||||
Index(b.inner.VersionedIndexName()).
|
||||
Query(query).
|
||||
SortBy(sortBy...).
|
||||
From(skip).Size(limit).
|
||||
Do(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := make([]internal.Match, 0, limit)
|
||||
for _, hit := range searchResult.Hits.Hits {
|
||||
id, _ := strconv.ParseInt(hit.Id, 10, 64)
|
||||
hits = append(hits, internal.Match{
|
||||
ID: id,
|
||||
})
|
||||
}
|
||||
|
||||
return &internal.SearchResult{
|
||||
Total: searchResult.TotalHits(),
|
||||
Hits: hits,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func toAnySlice[T any](s []T) []any {
|
||||
ret := make([]any, 0, len(s))
|
||||
for _, item := range s {
|
||||
ret = append(ret, item)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
|
||||
field := strings.TrimPrefix(string(sortBy), "-")
|
||||
ret := elastic.NewFieldSort(field)
|
||||
if strings.HasPrefix(string(sortBy), "-") {
|
||||
ret.Desc()
|
||||
}
|
||||
return ret
|
||||
}
|
||||
40
modules/indexer/issues/elasticsearch/elasticsearch_test.go
Normal file
40
modules/indexer/issues/elasticsearch/elasticsearch_test.go
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal/tests"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestElasticsearchIndexer(t *testing.T) {
|
||||
// The elasticsearch instance started by pull-db-tests.yml > test-unit > services > elasticsearch
|
||||
url := "http://elastic:changeme@elasticsearch:9200"
|
||||
|
||||
if os.Getenv("CI") == "" {
|
||||
// Make it possible to run tests against a local elasticsearch instance
|
||||
url = os.Getenv("TEST_ELASTICSEARCH_URL")
|
||||
if url == "" {
|
||||
t.Skip("TEST_ELASTICSEARCH_URL not set and not running in CI")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
resp, err := http.Get(url)
|
||||
return err == nil && resp.StatusCode == http.StatusOK
|
||||
}, time.Minute, time.Second, "Expected elasticsearch to be up")
|
||||
|
||||
indexer := NewIndexer(url, fmt.Sprintf("test_elasticsearch_indexer_%d", time.Now().Unix()))
|
||||
defer indexer.Close()
|
||||
|
||||
tests.TestIndexer(t, indexer)
|
||||
}
|
||||
Reference in New Issue
Block a user