Add support for optional LANG param

This commit is contained in:
Timur Garifulin 2021-01-20 00:37:13 +03:00
parent 1cfe7c425f
commit 883b9471ce
5 changed files with 145 additions and 19 deletions

View file

@ -2,12 +2,15 @@ package main
import (
"fmt"
"github.com/expectedsh/go-sonic/sonic"
)
const pswd = "SecretPassword"
func main() {
ingester, err := sonic.NewIngester("localhost", 1491, "SecretPassword")
ingester, err := sonic.NewIngester("localhost", 1491, pswd)
if err != nil {
panic(err)
}
@ -19,14 +22,33 @@ func main() {
{Object: "id:5hg67f8dg5", Text: "Spider man"},
{Object: "id:1m2n3b4vf6", Text: "Batman"},
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
})
}, sonic.LangAutoDetect)
search, err := sonic.NewSearch("localhost", 1491, "SecretPassword")
search, err := sonic.NewSearch("localhost", 1491, pswd)
if err != nil {
panic(err)
}
results, _ := search.Query("movies", "general", "man", 10, 0)
results, _ := search.Query("movies", "general", "man", 10, 0, sonic.LangAutoDetect)
fmt.Println(results)
// Search with LANG set to "none" and "eng"
_ = ingester.FlushCollection("movies")
_ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{
{Object: "id:6ab56b4kk3", Text: "Star wars"},
{Object: "id:5hg67f8dg5", Text: "Spider man"},
{Object: "id:1m2n3b4vf6", Text: "Batman"},
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
}, sonic.LangNone)
results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangNone)
fmt.Println(results)
// [id:68d96h5h9d0]
// English stop words should be encountered by Sonic now
results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangEng)
fmt.Println(results)
// []
}

View file

@ -21,14 +21,14 @@ type IngestBulkError struct {
// Ingestable is used for altering the search index (push, pop and flush).
type Ingestable interface {
// Push search data in the index.
// Command syntax PUSH <collection> <bucket> <object> "<text>"
Push(collection, bucket, object, text string) (err error)
// Command syntax PUSH <collection> <bucket> <object> "<text>" [LANG(<locale>)]?
Push(collection, bucket, object, text string, lang Lang) (err error)
// BulkPush will execute N (parallelRoutines) goroutines at the same time to
// dispatch the records at best.
// If parallelRoutines <= 0; parallelRoutines will be equal to 1.
// If parallelRoutines > len(records); parallelRoutines will be equal to len(records).
BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) []IngestBulkError
BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) []IngestBulkError
// Pop search data from the index.
// Command syntax POP <collection> <bucket> <object> "<text>".
@ -96,7 +96,7 @@ func NewIngester(host string, port int, password string) (Ingestable, error) {
}, nil
}
func (i ingesterChannel) Push(collection, bucket, object, text string) (err error) {
func (i ingesterChannel) Push(collection, bucket, object, text string, lang Lang) (err error) {
//
patterns := []struct {
Pattern string
@ -111,7 +111,8 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro
chunks := splitText(text, i.cmdMaxBytes/2)
// split chunks with partial success will yield single error
for _, chunk := range chunks {
err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, object, chunk))
ff := fmt.Sprintf("%s %s %s %s \"%s\""+langFormat(lang), push, collection, bucket, object, chunk, lang)
err = i.write(ff)
if err != nil {
return err
@ -127,6 +128,13 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro
return nil
}
func langFormat(lang Lang) string {
if lang != "" {
return " LANG(%s)"
}
return "%s"
}
// Ensure splitting on a valid leading byte
// Slicing the string directly is more efficient than converting to []byte and back because
// since a string is immutable and a []byte isn't,
@ -148,7 +156,7 @@ func splitText(longString string, maxLen int) []string {
return splits
}
func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) (errs []IngestBulkError) {
func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) (errs []IngestBulkError) {
if parallelRoutines <= 0 {
parallelRoutines = 1
}
@ -170,7 +178,7 @@ func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines in
addBulkError(&errs, rec, ErrClosed)
continue
}
err := newIngester.Push(collection, bucket, rec.Object, rec.Text)
err := newIngester.Push(collection, bucket, rec.Object, rec.Text, lang)
if err != nil {
addBulkError(&errs, rec, err)
}

View file

@ -23,7 +23,7 @@ func BenchmarkIngesterChannel_BulkPush2XMaxCPUs(b *testing.B) {
b.Log(e)
b.Fail()
}
be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records)
be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records, LangAutoDetect)
if len(be) > 0 {
b.Log(be, e)
b.Fail()
@ -44,7 +44,7 @@ func BenchmarkIngesterChannel_BulkPushMaxCPUs(b *testing.B) {
b.Log(e)
b.Fail()
}
be := ingester.BulkPush("test", "testMaxCpus", cpus, records)
be := ingester.BulkPush("test", "testMaxCpus", cpus, records, LangAutoDetect)
if len(be) > 0 {
b.Log(be, e)
b.Fail()
@ -63,7 +63,7 @@ func BenchmarkIngesterChannel_BulkPush10(b *testing.B) {
b.Log(e)
b.Fail()
}
be := ingester.BulkPush("test", "test10", 10, records)
be := ingester.BulkPush("test", "test10", 10, records, LangAutoDetect)
if len(be) > 0 {
b.Log(be, err)
b.Fail()
@ -102,7 +102,7 @@ func BenchmarkIngesterChannel_Push(b *testing.B) {
b.Fail()
}
for _, v := range records {
e := ingester.Push("test", "testBulk", v.Object, v.Text)
e := ingester.Push("test", "testBulk", v.Object, v.Text, LangAutoDetect)
if e != nil {
b.Log(e)
b.Fail()

96
sonic/lang.go Normal file
View file

@ -0,0 +1,96 @@
package sonic
type Lang string
const (
LangAutoDetect Lang = ""
LangNone Lang = "none"
LangAfr Lang = "afr"
LangAka Lang = "aka"
LangAmh Lang = "amh"
LangAra Lang = "ara"
LangAzj Lang = "azj"
LangBel Lang = "bel"
LangBen Lang = "ben"
LangBho Lang = "bho"
LangBul Lang = "bul"
LangCat Lang = "cat"
LangCeb Lang = "ceb"
LangCes Lang = "ces"
LangCmn Lang = "cmn"
LangDan Lang = "dan"
LangDeu Lang = "deu"
LangEll Lang = "ell"
LangEng Lang = "eng"
LangEpo Lang = "epo"
LangEst Lang = "est"
LangFin Lang = "fin"
LangFra Lang = "fra"
LangGuj Lang = "guj"
LangHat Lang = "hat"
LangHau Lang = "hau"
LangHeb Lang = "heb"
LangHin Lang = "hin"
LangHrv Lang = "hrv"
LangHun Lang = "hun"
LangIbo Lang = "ibo"
LangIlo Lang = "ilo"
LangInd Lang = "ind"
LangIta Lang = "ita"
LangJav Lang = "jav"
LangJpn Lang = "jpn"
LangKan Lang = "kan"
LangKat Lang = "kat"
LangKhm Lang = "khm"
LangKin Lang = "kin"
LangKor Lang = "kor"
LangKur Lang = "kur"
LangLat Lang = "lat"
LangLav Lang = "lav"
LangLit Lang = "lit"
LangMai Lang = "mai"
LangMal Lang = "mal"
LangMar Lang = "mar"
LangMkd Lang = "mkd"
LangMlg Lang = "mlg"
LangMod Lang = "mod"
LangMya Lang = "mya"
LangNep Lang = "nep"
LangNld Lang = "nld"
LangNno Lang = "nno"
LangNob Lang = "nob"
LangNya Lang = "nya"
LangOri Lang = "ori"
LangOrm Lang = "orm"
LangPan Lang = "pan"
LangPes Lang = "pes"
LangPol Lang = "pol"
LangPor Lang = "por"
LangRon Lang = "ron"
LangRun Lang = "run"
LangRus Lang = "rus"
LangSin Lang = "sin"
LangSkr Lang = "skr"
LangSlk Lang = "slk"
LangSlv Lang = "slv"
LangSna Lang = "sna"
LangSom Lang = "som"
LangSpa Lang = "spa"
LangSrp Lang = "srp"
LangSwe Lang = "swe"
LangTam Lang = "tam"
LangTel Lang = "tel"
LangTgl Lang = "tgl"
LangTha Lang = "tha"
LangTir Lang = "tir"
LangTuk Lang = "tuk"
LangTur Lang = "tur"
LangUig Lang = "uig"
LangUkr Lang = "ukr"
LangUrd Lang = "urd"
LangUzb Lang = "uzb"
LangVie Lang = "vie"
LangYdd Lang = "ydd"
LangYor Lang = "yor"
LangZul Lang = "zul"
)

View file

@ -10,8 +10,8 @@ type Searchable interface {
// Query the database, return a list of object, represented as a string.
// Sonic default limit is 10.
// Command syntax QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]?.
Query(collection, bucket, terms string, limit, offset int) (results []string, err error)
// Command syntax QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]? [LANG(<locale>)]?.
Query(collection, bucket, terms string, limit, offset int, lang Lang) (results []string, err error)
// Suggest auto-completes word, return a list of words as a string.
// Command syntax SUGGEST <collection> <bucket> "<word>" [LIMIT(<count>)]?.
@ -53,8 +53,8 @@ func NewSearch(host string, port int, password string) (Searchable, error) {
}, nil
}
func (s searchChannel) Query(collection, bucket, term string, limit, offset int) (results []string, err error) {
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)", query, collection, bucket, term, limit, offset))
func (s searchChannel) Query(collection, bucket, term string, limit, offset int, lang Lang) (results []string, err error) {
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)"+langFormat(lang), query, collection, bucket, term, limit, offset, lang))
if err != nil {
return nil, err
}