Merge pull request #18 from timurgarif/feature_lang_param
Add support for optional LANG param
This commit is contained in:
commit
a5da12c2ea
5 changed files with 145 additions and 19 deletions
|
|
@ -2,12 +2,15 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/expectedsh/go-sonic/sonic"
|
||||
)
|
||||
|
||||
const pswd = "SecretPassword"
|
||||
|
||||
func main() {
|
||||
|
||||
ingester, err := sonic.NewIngester("localhost", 1491, "SecretPassword")
|
||||
ingester, err := sonic.NewIngester("localhost", 1491, pswd)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
@ -19,14 +22,33 @@ func main() {
|
|||
{Object: "id:5hg67f8dg5", Text: "Spider man"},
|
||||
{Object: "id:1m2n3b4vf6", Text: "Batman"},
|
||||
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
|
||||
})
|
||||
}, sonic.LangAutoDetect)
|
||||
|
||||
search, err := sonic.NewSearch("localhost", 1491, "SecretPassword")
|
||||
search, err := sonic.NewSearch("localhost", 1491, pswd)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
results, _ := search.Query("movies", "general", "man", 10, 0)
|
||||
results, _ := search.Query("movies", "general", "man", 10, 0, sonic.LangAutoDetect)
|
||||
|
||||
fmt.Println(results)
|
||||
|
||||
// Search with LANG set to "none" and "eng"
|
||||
|
||||
_ = ingester.FlushCollection("movies")
|
||||
_ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{
|
||||
{Object: "id:6ab56b4kk3", Text: "Star wars"},
|
||||
{Object: "id:5hg67f8dg5", Text: "Spider man"},
|
||||
{Object: "id:1m2n3b4vf6", Text: "Batman"},
|
||||
{Object: "id:68d96h5h9d0", Text: "This is another movie"},
|
||||
}, sonic.LangNone)
|
||||
|
||||
results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangNone)
|
||||
fmt.Println(results)
|
||||
// [id:68d96h5h9d0]
|
||||
|
||||
// English stop words should be encountered by Sonic now
|
||||
results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangEng)
|
||||
fmt.Println(results)
|
||||
// []
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,14 +21,14 @@ type IngestBulkError struct {
|
|||
// Ingestable is used for altering the search index (push, pop and flush).
|
||||
type Ingestable interface {
|
||||
// Push search data in the index.
|
||||
// Command syntax PUSH <collection> <bucket> <object> "<text>"
|
||||
Push(collection, bucket, object, text string) (err error)
|
||||
// Command syntax PUSH <collection> <bucket> <object> "<text>" [LANG(<locale>)]?
|
||||
Push(collection, bucket, object, text string, lang Lang) (err error)
|
||||
|
||||
// BulkPush will execute N (parallelRoutines) goroutines at the same time to
|
||||
// dispatch the records at best.
|
||||
// If parallelRoutines <= 0; parallelRoutines will be equal to 1.
|
||||
// If parallelRoutines > len(records); parallelRoutines will be equal to len(records).
|
||||
BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) []IngestBulkError
|
||||
BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) []IngestBulkError
|
||||
|
||||
// Pop search data from the index.
|
||||
// Command syntax POP <collection> <bucket> <object> "<text>".
|
||||
|
|
@ -96,7 +96,7 @@ func NewIngester(host string, port int, password string) (Ingestable, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (i ingesterChannel) Push(collection, bucket, object, text string) (err error) {
|
||||
func (i ingesterChannel) Push(collection, bucket, object, text string, lang Lang) (err error) {
|
||||
//
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
|
|
@ -111,7 +111,8 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro
|
|||
chunks := splitText(text, i.cmdMaxBytes/2)
|
||||
// split chunks with partial success will yield single error
|
||||
for _, chunk := range chunks {
|
||||
err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, object, chunk))
|
||||
ff := fmt.Sprintf("%s %s %s %s \"%s\""+langFormat(lang), push, collection, bucket, object, chunk, lang)
|
||||
err = i.write(ff)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -127,6 +128,13 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro
|
|||
return nil
|
||||
}
|
||||
|
||||
func langFormat(lang Lang) string {
|
||||
if lang != "" {
|
||||
return " LANG(%s)"
|
||||
}
|
||||
return "%s"
|
||||
}
|
||||
|
||||
// Ensure splitting on a valid leading byte
|
||||
// Slicing the string directly is more efficient than converting to []byte and back because
|
||||
// since a string is immutable and a []byte isn't,
|
||||
|
|
@ -148,7 +156,7 @@ func splitText(longString string, maxLen int) []string {
|
|||
return splits
|
||||
}
|
||||
|
||||
func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) (errs []IngestBulkError) {
|
||||
func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) (errs []IngestBulkError) {
|
||||
if parallelRoutines <= 0 {
|
||||
parallelRoutines = 1
|
||||
}
|
||||
|
|
@ -170,7 +178,7 @@ func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines in
|
|||
addBulkError(&errs, rec, ErrClosed)
|
||||
continue
|
||||
}
|
||||
err := newIngester.Push(collection, bucket, rec.Object, rec.Text)
|
||||
err := newIngester.Push(collection, bucket, rec.Object, rec.Text, lang)
|
||||
if err != nil {
|
||||
addBulkError(&errs, rec, err)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ func BenchmarkIngesterChannel_BulkPush2XMaxCPUs(b *testing.B) {
|
|||
b.Log(e)
|
||||
b.Fail()
|
||||
}
|
||||
be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records)
|
||||
be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records, LangAutoDetect)
|
||||
if len(be) > 0 {
|
||||
b.Log(be, e)
|
||||
b.Fail()
|
||||
|
|
@ -44,7 +44,7 @@ func BenchmarkIngesterChannel_BulkPushMaxCPUs(b *testing.B) {
|
|||
b.Log(e)
|
||||
b.Fail()
|
||||
}
|
||||
be := ingester.BulkPush("test", "testMaxCpus", cpus, records)
|
||||
be := ingester.BulkPush("test", "testMaxCpus", cpus, records, LangAutoDetect)
|
||||
if len(be) > 0 {
|
||||
b.Log(be, e)
|
||||
b.Fail()
|
||||
|
|
@ -63,7 +63,7 @@ func BenchmarkIngesterChannel_BulkPush10(b *testing.B) {
|
|||
b.Log(e)
|
||||
b.Fail()
|
||||
}
|
||||
be := ingester.BulkPush("test", "test10", 10, records)
|
||||
be := ingester.BulkPush("test", "test10", 10, records, LangAutoDetect)
|
||||
if len(be) > 0 {
|
||||
b.Log(be, err)
|
||||
b.Fail()
|
||||
|
|
@ -102,7 +102,7 @@ func BenchmarkIngesterChannel_Push(b *testing.B) {
|
|||
b.Fail()
|
||||
}
|
||||
for _, v := range records {
|
||||
e := ingester.Push("test", "testBulk", v.Object, v.Text)
|
||||
e := ingester.Push("test", "testBulk", v.Object, v.Text, LangAutoDetect)
|
||||
if e != nil {
|
||||
b.Log(e)
|
||||
b.Fail()
|
||||
|
|
|
|||
96
sonic/lang.go
Normal file
96
sonic/lang.go
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
package sonic
|
||||
|
||||
type Lang string
|
||||
|
||||
const (
|
||||
LangAutoDetect Lang = ""
|
||||
LangNone Lang = "none"
|
||||
LangAfr Lang = "afr"
|
||||
LangAka Lang = "aka"
|
||||
LangAmh Lang = "amh"
|
||||
LangAra Lang = "ara"
|
||||
LangAzj Lang = "azj"
|
||||
LangBel Lang = "bel"
|
||||
LangBen Lang = "ben"
|
||||
LangBho Lang = "bho"
|
||||
LangBul Lang = "bul"
|
||||
LangCat Lang = "cat"
|
||||
LangCeb Lang = "ceb"
|
||||
LangCes Lang = "ces"
|
||||
LangCmn Lang = "cmn"
|
||||
LangDan Lang = "dan"
|
||||
LangDeu Lang = "deu"
|
||||
LangEll Lang = "ell"
|
||||
LangEng Lang = "eng"
|
||||
LangEpo Lang = "epo"
|
||||
LangEst Lang = "est"
|
||||
LangFin Lang = "fin"
|
||||
LangFra Lang = "fra"
|
||||
LangGuj Lang = "guj"
|
||||
LangHat Lang = "hat"
|
||||
LangHau Lang = "hau"
|
||||
LangHeb Lang = "heb"
|
||||
LangHin Lang = "hin"
|
||||
LangHrv Lang = "hrv"
|
||||
LangHun Lang = "hun"
|
||||
LangIbo Lang = "ibo"
|
||||
LangIlo Lang = "ilo"
|
||||
LangInd Lang = "ind"
|
||||
LangIta Lang = "ita"
|
||||
LangJav Lang = "jav"
|
||||
LangJpn Lang = "jpn"
|
||||
LangKan Lang = "kan"
|
||||
LangKat Lang = "kat"
|
||||
LangKhm Lang = "khm"
|
||||
LangKin Lang = "kin"
|
||||
LangKor Lang = "kor"
|
||||
LangKur Lang = "kur"
|
||||
LangLat Lang = "lat"
|
||||
LangLav Lang = "lav"
|
||||
LangLit Lang = "lit"
|
||||
LangMai Lang = "mai"
|
||||
LangMal Lang = "mal"
|
||||
LangMar Lang = "mar"
|
||||
LangMkd Lang = "mkd"
|
||||
LangMlg Lang = "mlg"
|
||||
LangMod Lang = "mod"
|
||||
LangMya Lang = "mya"
|
||||
LangNep Lang = "nep"
|
||||
LangNld Lang = "nld"
|
||||
LangNno Lang = "nno"
|
||||
LangNob Lang = "nob"
|
||||
LangNya Lang = "nya"
|
||||
LangOri Lang = "ori"
|
||||
LangOrm Lang = "orm"
|
||||
LangPan Lang = "pan"
|
||||
LangPes Lang = "pes"
|
||||
LangPol Lang = "pol"
|
||||
LangPor Lang = "por"
|
||||
LangRon Lang = "ron"
|
||||
LangRun Lang = "run"
|
||||
LangRus Lang = "rus"
|
||||
LangSin Lang = "sin"
|
||||
LangSkr Lang = "skr"
|
||||
LangSlk Lang = "slk"
|
||||
LangSlv Lang = "slv"
|
||||
LangSna Lang = "sna"
|
||||
LangSom Lang = "som"
|
||||
LangSpa Lang = "spa"
|
||||
LangSrp Lang = "srp"
|
||||
LangSwe Lang = "swe"
|
||||
LangTam Lang = "tam"
|
||||
LangTel Lang = "tel"
|
||||
LangTgl Lang = "tgl"
|
||||
LangTha Lang = "tha"
|
||||
LangTir Lang = "tir"
|
||||
LangTuk Lang = "tuk"
|
||||
LangTur Lang = "tur"
|
||||
LangUig Lang = "uig"
|
||||
LangUkr Lang = "ukr"
|
||||
LangUrd Lang = "urd"
|
||||
LangUzb Lang = "uzb"
|
||||
LangVie Lang = "vie"
|
||||
LangYdd Lang = "ydd"
|
||||
LangYor Lang = "yor"
|
||||
LangZul Lang = "zul"
|
||||
)
|
||||
|
|
@ -10,8 +10,8 @@ type Searchable interface {
|
|||
|
||||
// Query the database, return a list of object, represented as a string.
|
||||
// Sonic default limit is 10.
|
||||
// Command syntax QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]?.
|
||||
Query(collection, bucket, terms string, limit, offset int) (results []string, err error)
|
||||
// Command syntax QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]? [LANG(<locale>)]?.
|
||||
Query(collection, bucket, terms string, limit, offset int, lang Lang) (results []string, err error)
|
||||
|
||||
// Suggest auto-completes word, return a list of words as a string.
|
||||
// Command syntax SUGGEST <collection> <bucket> "<word>" [LIMIT(<count>)]?.
|
||||
|
|
@ -53,8 +53,8 @@ func NewSearch(host string, port int, password string) (Searchable, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (s searchChannel) Query(collection, bucket, term string, limit, offset int) (results []string, err error) {
|
||||
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)", query, collection, bucket, term, limit, offset))
|
||||
func (s searchChannel) Query(collection, bucket, term string, limit, offset int, lang Lang) (results []string, err error) {
|
||||
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)"+langFormat(lang), query, collection, bucket, term, limit, offset, lang))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue