aboutsummaryrefslogtreecommitdiff
path: root/internal/fulltext
diff options
context:
space:
mode:
authorTill <2353100+S7evinK@users.noreply.github.com>2023-03-27 11:26:52 +0200
committerGitHub <noreply@github.com>2023-03-27 11:26:52 +0200
commite8b2162a01bf0e735869d5a2b9be258cb380255e (patch)
tree993b887ed4f9d352f2a5e43d3bf5fa5e59255875 /internal/fulltext
parentaa1bda4c58d20e7d14267f9c87fab8efd7ae36ad (diff)
Add `/search` tests (#3025)
Diffstat (limited to 'internal/fulltext')
-rw-r--r--internal/fulltext/bleve.go47
-rw-r--r--internal/fulltext/bleve_test.go46
-rw-r--r--internal/fulltext/bleve_wasm.go5
3 files changed, 82 insertions, 16 deletions
diff --git a/internal/fulltext/bleve.go b/internal/fulltext/bleve.go
index dea7c504..f7412470 100644
--- a/internal/fulltext/bleve.go
+++ b/internal/fulltext/bleve.go
@@ -18,6 +18,7 @@
package fulltext
import (
+ "regexp"
"strings"
"github.com/blevesearch/bleve/v2"
@@ -60,6 +61,7 @@ type Indexer interface {
Index(elements ...IndexElement) error
Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error)
+ GetHighlights(result *bleve.SearchResult) []string
Close() error
}
@@ -124,6 +126,47 @@ func (f *Search) Delete(eventID string) error {
return f.FulltextIndex.Delete(eventID)
}
+var highlightMatcher = regexp.MustCompile("<mark>(.*?)</mark>")
+
+// GetHighlights extracts the highlights from a SearchResult.
+func (f *Search) GetHighlights(result *bleve.SearchResult) []string {
+ if result == nil {
+ return []string{}
+ }
+
+ seenMatches := make(map[string]struct{})
+
+ for _, hit := range result.Hits {
+ if hit.Fragments == nil {
+ continue
+ }
+ fragments, ok := hit.Fragments["Content"]
+ if !ok {
+ continue
+ }
+ for _, x := range fragments {
+ substringMatches := highlightMatcher.FindAllStringSubmatch(x, -1)
+ for _, matches := range substringMatches {
+ for i := range matches {
+ if i == 0 { // skip first match, this is the complete substring match
+ continue
+ }
+ if _, ok := seenMatches[matches[i]]; ok {
+ continue
+ }
+ seenMatches[matches[i]] = struct{}{}
+ }
+ }
+ }
+ }
+
+ res := make([]string, 0, len(seenMatches))
+ for m := range seenMatches {
+ res = append(res, m)
+ }
+ return res
+}
+
// Search searches the index given a search term, roomIDs and keys.
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (*bleve.SearchResult, error) {
qry := bleve.NewConjunctionQuery()
@@ -163,6 +206,10 @@ func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, or
s.SortBy([]string{"-StreamPosition"})
}
+ // Highlight some words
+ s.Highlight = bleve.NewHighlight()
+ s.Highlight.Fields = []string{"Content"}
+
return f.FulltextIndex.Search(s)
}
diff --git a/internal/fulltext/bleve_test.go b/internal/fulltext/bleve_test.go
index bd8289d5..a77c2393 100644
--- a/internal/fulltext/bleve_test.go
+++ b/internal/fulltext/bleve_test.go
@@ -160,14 +160,16 @@ func TestSearch(t *testing.T) {
roomIndex []int
}
tests := []struct {
- name string
- args args
- wantCount int
- wantErr bool
+ name string
+ args args
+ wantCount int
+ wantErr bool
+ wantHighlights []string
}{
{
- name: "Can search for many results in one room",
- wantCount: 16,
+ name: "Can search for many results in one room",
+ wantCount: 16,
+ wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
roomIndex: []int{0},
@@ -175,8 +177,9 @@ func TestSearch(t *testing.T) {
},
},
{
- name: "Can search for one result in one room",
- wantCount: 1,
+ name: "Can search for one result in one room",
+ wantCount: 1,
+ wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
roomIndex: []int{16},
@@ -184,8 +187,9 @@ func TestSearch(t *testing.T) {
},
},
{
- name: "Can search for many results in multiple rooms",
- wantCount: 17,
+ name: "Can search for many results in multiple rooms",
+ wantCount: 17,
+ wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
roomIndex: []int{0, 16},
@@ -193,8 +197,9 @@ func TestSearch(t *testing.T) {
},
},
{
- name: "Can search for many results in all rooms, reversed",
- wantCount: 30,
+ name: "Can search for many results in all rooms, reversed",
+ wantCount: 30,
+ wantHighlights: []string{"lorem"},
args: args{
term: "lorem",
limit: 30,
@@ -202,8 +207,9 @@ func TestSearch(t *testing.T) {
},
},
{
- name: "Can search for specific search room name",
- wantCount: 1,
+ name: "Can search for specific search room name",
+ wantCount: 1,
+ wantHighlights: []string{"testing"},
args: args{
term: "testing",
roomIndex: []int{},
@@ -212,8 +218,9 @@ func TestSearch(t *testing.T) {
},
},
{
- name: "Can search for specific search room topic",
- wantCount: 1,
+ name: "Can search for specific search room topic",
+ wantCount: 1,
+ wantHighlights: []string{"fulltext"},
args: args{
term: "fulltext",
roomIndex: []int{},
@@ -222,6 +229,7 @@ func TestSearch(t *testing.T) {
},
},
}
+
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f, ctx := mustOpenIndex(t, "")
@@ -238,6 +246,12 @@ func TestSearch(t *testing.T) {
t.Errorf("Search() error = %v, wantErr %v", err, tt.wantErr)
return
}
+
+ highlights := f.GetHighlights(got)
+ if !reflect.DeepEqual(highlights, tt.wantHighlights) {
+ t.Errorf("Search() got highligts = %v, want %v", highlights, tt.wantHighlights)
+ }
+
if !reflect.DeepEqual(len(got.Hits), tt.wantCount) {
t.Errorf("Search() got = %v, want %v", len(got.Hits), tt.wantCount)
}
diff --git a/internal/fulltext/bleve_wasm.go b/internal/fulltext/bleve_wasm.go
index 0053ed8c..12709900 100644
--- a/internal/fulltext/bleve_wasm.go
+++ b/internal/fulltext/bleve_wasm.go
@@ -33,6 +33,7 @@ type Indexer interface {
Index(elements ...IndexElement) error
Delete(eventID string) error
Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error)
+ GetHighlights(result SearchResult) []string
Close() error
}
@@ -71,3 +72,7 @@ func (f *Search) Delete(eventID string) error {
func (f *Search) Search(term string, roomIDs, keys []string, limit, from int, orderByStreamPos bool) (SearchResult, error) {
return SearchResult{}, nil
}
+
+func (f *Search) GetHighlights(result SearchResult) []string {
+ return []string{}
+}