Add support for optional LANG param
This commit is contained in:
parent
1cfe7c425f
commit
883b9471ce
5 changed files with 145 additions and 19 deletions
|
|
@ -2,12 +2,15 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/expectedsh/go-sonic/sonic"
|
"github.com/expectedsh/go-sonic/sonic"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const pswd = "SecretPassword"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
ingester, err := sonic.NewIngester("localhost", 1491, "SecretPassword")
|
ingester, err := sonic.NewIngester("localhost", 1491, pswd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
@ -19,14 +22,33 @@ func main() {
|
||||||
{Object: "id:5hg67f8dg5", Text: "Spider man"},
|
{Object: "id:5hg67f8dg5", Text: "Spider man"},
|
||||||
{Object: "id:1m2n3b4vf6", Text: "Batman"},
|
{Object: "id:1m2n3b4vf6", Text: "Batman"},
|
||||||
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
|
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
|
||||||
})
|
}, sonic.LangAutoDetect)
|
||||||
|
|
||||||
search, err := sonic.NewSearch("localhost", 1491, "SecretPassword")
|
search, err := sonic.NewSearch("localhost", 1491, pswd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
results, _ := search.Query("movies", "general", "man", 10, 0)
|
results, _ := search.Query("movies", "general", "man", 10, 0, sonic.LangAutoDetect)
|
||||||
|
|
||||||
fmt.Println(results)
|
fmt.Println(results)
|
||||||
|
|
||||||
|
// Search with LANG set to "none" and "eng"
|
||||||
|
|
||||||
|
_ = ingester.FlushCollection("movies")
|
||||||
|
_ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{
|
||||||
|
{Object: "id:6ab56b4kk3", Text: "Star wars"},
|
||||||
|
{Object: "id:5hg67f8dg5", Text: "Spider man"},
|
||||||
|
{Object: "id:1m2n3b4vf6", Text: "Batman"},
|
||||||
|
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
|
||||||
|
}, sonic.LangNone)
|
||||||
|
|
||||||
|
results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangNone)
|
||||||
|
fmt.Println(results)
|
||||||
|
// [id:68d96h5h9d0]
|
||||||
|
|
||||||
|
// English stop words should be encountered by Sonic now
|
||||||
|
results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangEng)
|
||||||
|
fmt.Println(results)
|
||||||
|
// []
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,14 +21,14 @@ type IngestBulkError struct {
|
||||||
// Ingestable is used for altering the search index (push, pop and flush).
|
// Ingestable is used for altering the search index (push, pop and flush).
|
||||||
type Ingestable interface {
|
type Ingestable interface {
|
||||||
// Push search data in the index.
|
// Push search data in the index.
|
||||||
// Command syntax PUSH <collection> <bucket> <object> "<text>"
|
// Command syntax PUSH <collection> <bucket> <object> "<text>" [LANG(<locale>)]?
|
||||||
Push(collection, bucket, object, text string) (err error)
|
Push(collection, bucket, object, text string, lang Lang) (err error)
|
||||||
|
|
||||||
// BulkPush will execute N (parallelRoutines) goroutines at the same time to
|
// BulkPush will execute N (parallelRoutines) goroutines at the same time to
|
||||||
// dispatch the records at best.
|
// dispatch the records at best.
|
||||||
// If parallelRoutines <= 0; parallelRoutines will be equal to 1.
|
// If parallelRoutines <= 0; parallelRoutines will be equal to 1.
|
||||||
// If parallelRoutines > len(records); parallelRoutines will be equal to len(records).
|
// If parallelRoutines > len(records); parallelRoutines will be equal to len(records).
|
||||||
BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) []IngestBulkError
|
BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) []IngestBulkError
|
||||||
|
|
||||||
// Pop search data from the index.
|
// Pop search data from the index.
|
||||||
// Command syntax POP <collection> <bucket> <object> "<text>".
|
// Command syntax POP <collection> <bucket> <object> "<text>".
|
||||||
|
|
@ -96,7 +96,7 @@ func NewIngester(host string, port int, password string) (Ingestable, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i ingesterChannel) Push(collection, bucket, object, text string) (err error) {
|
func (i ingesterChannel) Push(collection, bucket, object, text string, lang Lang) (err error) {
|
||||||
//
|
//
|
||||||
patterns := []struct {
|
patterns := []struct {
|
||||||
Pattern string
|
Pattern string
|
||||||
|
|
@ -111,7 +111,8 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro
|
||||||
chunks := splitText(text, i.cmdMaxBytes/2)
|
chunks := splitText(text, i.cmdMaxBytes/2)
|
||||||
// split chunks with partial success will yield single error
|
// split chunks with partial success will yield single error
|
||||||
for _, chunk := range chunks {
|
for _, chunk := range chunks {
|
||||||
err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, object, chunk))
|
ff := fmt.Sprintf("%s %s %s %s \"%s\""+langFormat(lang), push, collection, bucket, object, chunk, lang)
|
||||||
|
err = i.write(ff)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
@ -127,6 +128,13 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func langFormat(lang Lang) string {
|
||||||
|
if lang != "" {
|
||||||
|
return " LANG(%s)"
|
||||||
|
}
|
||||||
|
return "%s"
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure splitting on a valid leading byte
|
// Ensure splitting on a valid leading byte
|
||||||
// Slicing the string directly is more efficient than converting to []byte and back because
|
// Slicing the string directly is more efficient than converting to []byte and back because
|
||||||
// since a string is immutable and a []byte isn't,
|
// since a string is immutable and a []byte isn't,
|
||||||
|
|
@ -148,7 +156,7 @@ func splitText(longString string, maxLen int) []string {
|
||||||
return splits
|
return splits
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) (errs []IngestBulkError) {
|
func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) (errs []IngestBulkError) {
|
||||||
if parallelRoutines <= 0 {
|
if parallelRoutines <= 0 {
|
||||||
parallelRoutines = 1
|
parallelRoutines = 1
|
||||||
}
|
}
|
||||||
|
|
@ -170,7 +178,7 @@ func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines in
|
||||||
addBulkError(&errs, rec, ErrClosed)
|
addBulkError(&errs, rec, ErrClosed)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
err := newIngester.Push(collection, bucket, rec.Object, rec.Text)
|
err := newIngester.Push(collection, bucket, rec.Object, rec.Text, lang)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
addBulkError(&errs, rec, err)
|
addBulkError(&errs, rec, err)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ func BenchmarkIngesterChannel_BulkPush2XMaxCPUs(b *testing.B) {
|
||||||
b.Log(e)
|
b.Log(e)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
}
|
}
|
||||||
be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records)
|
be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records, LangAutoDetect)
|
||||||
if len(be) > 0 {
|
if len(be) > 0 {
|
||||||
b.Log(be, e)
|
b.Log(be, e)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
|
|
@ -44,7 +44,7 @@ func BenchmarkIngesterChannel_BulkPushMaxCPUs(b *testing.B) {
|
||||||
b.Log(e)
|
b.Log(e)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
}
|
}
|
||||||
be := ingester.BulkPush("test", "testMaxCpus", cpus, records)
|
be := ingester.BulkPush("test", "testMaxCpus", cpus, records, LangAutoDetect)
|
||||||
if len(be) > 0 {
|
if len(be) > 0 {
|
||||||
b.Log(be, e)
|
b.Log(be, e)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
|
|
@ -63,7 +63,7 @@ func BenchmarkIngesterChannel_BulkPush10(b *testing.B) {
|
||||||
b.Log(e)
|
b.Log(e)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
}
|
}
|
||||||
be := ingester.BulkPush("test", "test10", 10, records)
|
be := ingester.BulkPush("test", "test10", 10, records, LangAutoDetect)
|
||||||
if len(be) > 0 {
|
if len(be) > 0 {
|
||||||
b.Log(be, err)
|
b.Log(be, err)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
|
|
@ -102,7 +102,7 @@ func BenchmarkIngesterChannel_Push(b *testing.B) {
|
||||||
b.Fail()
|
b.Fail()
|
||||||
}
|
}
|
||||||
for _, v := range records {
|
for _, v := range records {
|
||||||
e := ingester.Push("test", "testBulk", v.Object, v.Text)
|
e := ingester.Push("test", "testBulk", v.Object, v.Text, LangAutoDetect)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
b.Log(e)
|
b.Log(e)
|
||||||
b.Fail()
|
b.Fail()
|
||||||
|
|
|
||||||
96
sonic/lang.go
Normal file
96
sonic/lang.go
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
package sonic
|
||||||
|
|
||||||
|
type Lang string
|
||||||
|
|
||||||
|
const (
|
||||||
|
LangAutoDetect Lang = ""
|
||||||
|
LangNone Lang = "none"
|
||||||
|
LangAfr Lang = "afr"
|
||||||
|
LangAka Lang = "aka"
|
||||||
|
LangAmh Lang = "amh"
|
||||||
|
LangAra Lang = "ara"
|
||||||
|
LangAzj Lang = "azj"
|
||||||
|
LangBel Lang = "bel"
|
||||||
|
LangBen Lang = "ben"
|
||||||
|
LangBho Lang = "bho"
|
||||||
|
LangBul Lang = "bul"
|
||||||
|
LangCat Lang = "cat"
|
||||||
|
LangCeb Lang = "ceb"
|
||||||
|
LangCes Lang = "ces"
|
||||||
|
LangCmn Lang = "cmn"
|
||||||
|
LangDan Lang = "dan"
|
||||||
|
LangDeu Lang = "deu"
|
||||||
|
LangEll Lang = "ell"
|
||||||
|
LangEng Lang = "eng"
|
||||||
|
LangEpo Lang = "epo"
|
||||||
|
LangEst Lang = "est"
|
||||||
|
LangFin Lang = "fin"
|
||||||
|
LangFra Lang = "fra"
|
||||||
|
LangGuj Lang = "guj"
|
||||||
|
LangHat Lang = "hat"
|
||||||
|
LangHau Lang = "hau"
|
||||||
|
LangHeb Lang = "heb"
|
||||||
|
LangHin Lang = "hin"
|
||||||
|
LangHrv Lang = "hrv"
|
||||||
|
LangHun Lang = "hun"
|
||||||
|
LangIbo Lang = "ibo"
|
||||||
|
LangIlo Lang = "ilo"
|
||||||
|
LangInd Lang = "ind"
|
||||||
|
LangIta Lang = "ita"
|
||||||
|
LangJav Lang = "jav"
|
||||||
|
LangJpn Lang = "jpn"
|
||||||
|
LangKan Lang = "kan"
|
||||||
|
LangKat Lang = "kat"
|
||||||
|
LangKhm Lang = "khm"
|
||||||
|
LangKin Lang = "kin"
|
||||||
|
LangKor Lang = "kor"
|
||||||
|
LangKur Lang = "kur"
|
||||||
|
LangLat Lang = "lat"
|
||||||
|
LangLav Lang = "lav"
|
||||||
|
LangLit Lang = "lit"
|
||||||
|
LangMai Lang = "mai"
|
||||||
|
LangMal Lang = "mal"
|
||||||
|
LangMar Lang = "mar"
|
||||||
|
LangMkd Lang = "mkd"
|
||||||
|
LangMlg Lang = "mlg"
|
||||||
|
LangMod Lang = "mod"
|
||||||
|
LangMya Lang = "mya"
|
||||||
|
LangNep Lang = "nep"
|
||||||
|
LangNld Lang = "nld"
|
||||||
|
LangNno Lang = "nno"
|
||||||
|
LangNob Lang = "nob"
|
||||||
|
LangNya Lang = "nya"
|
||||||
|
LangOri Lang = "ori"
|
||||||
|
LangOrm Lang = "orm"
|
||||||
|
LangPan Lang = "pan"
|
||||||
|
LangPes Lang = "pes"
|
||||||
|
LangPol Lang = "pol"
|
||||||
|
LangPor Lang = "por"
|
||||||
|
LangRon Lang = "ron"
|
||||||
|
LangRun Lang = "run"
|
||||||
|
LangRus Lang = "rus"
|
||||||
|
LangSin Lang = "sin"
|
||||||
|
LangSkr Lang = "skr"
|
||||||
|
LangSlk Lang = "slk"
|
||||||
|
LangSlv Lang = "slv"
|
||||||
|
LangSna Lang = "sna"
|
||||||
|
LangSom Lang = "som"
|
||||||
|
LangSpa Lang = "spa"
|
||||||
|
LangSrp Lang = "srp"
|
||||||
|
LangSwe Lang = "swe"
|
||||||
|
LangTam Lang = "tam"
|
||||||
|
LangTel Lang = "tel"
|
||||||
|
LangTgl Lang = "tgl"
|
||||||
|
LangTha Lang = "tha"
|
||||||
|
LangTir Lang = "tir"
|
||||||
|
LangTuk Lang = "tuk"
|
||||||
|
LangTur Lang = "tur"
|
||||||
|
LangUig Lang = "uig"
|
||||||
|
LangUkr Lang = "ukr"
|
||||||
|
LangUrd Lang = "urd"
|
||||||
|
LangUzb Lang = "uzb"
|
||||||
|
LangVie Lang = "vie"
|
||||||
|
LangYdd Lang = "ydd"
|
||||||
|
LangYor Lang = "yor"
|
||||||
|
LangZul Lang = "zul"
|
||||||
|
)
|
||||||
|
|
@ -10,8 +10,8 @@ type Searchable interface {
|
||||||
|
|
||||||
// Query the database, return a list of object, represented as a string.
|
// Query the database, return a list of object, represented as a string.
|
||||||
// Sonic default limit is 10.
|
// Sonic default limit is 10.
|
||||||
// Command syntax QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]?.
|
// Command syntax QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]? [LANG(<locale>)]?.
|
||||||
Query(collection, bucket, terms string, limit, offset int) (results []string, err error)
|
Query(collection, bucket, terms string, limit, offset int, lang Lang) (results []string, err error)
|
||||||
|
|
||||||
// Suggest auto-completes word, return a list of words as a string.
|
// Suggest auto-completes word, return a list of words as a string.
|
||||||
// Command syntax SUGGEST <collection> <bucket> "<word>" [LIMIT(<count>)]?.
|
// Command syntax SUGGEST <collection> <bucket> "<word>" [LIMIT(<count>)]?.
|
||||||
|
|
@ -53,8 +53,8 @@ func NewSearch(host string, port int, password string) (Searchable, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s searchChannel) Query(collection, bucket, term string, limit, offset int) (results []string, err error) {
|
func (s searchChannel) Query(collection, bucket, term string, limit, offset int, lang Lang) (results []string, err error) {
|
||||||
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)", query, collection, bucket, term, limit, offset))
|
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)"+langFormat(lang), query, collection, bucket, term, limit, offset, lang))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue