diff --git a/cmd/example/main.go b/cmd/example/main.go index c1266a9..fcf9c2d 100644 --- a/cmd/example/main.go +++ b/cmd/example/main.go @@ -2,12 +2,15 @@ package main import ( "fmt" + "github.com/expectedsh/go-sonic/sonic" ) +const pswd = "SecretPassword" + func main() { - ingester, err := sonic.NewIngester("localhost", 1491, "SecretPassword") + ingester, err := sonic.NewIngester("localhost", 1491, pswd) if err != nil { panic(err) } @@ -19,14 +22,33 @@ func main() { {Object: "id:5hg67f8dg5", Text: "Spider man"}, {Object: "id:1m2n3b4vf6", Text: "Batman"}, {Object: "id:68d96h5h9d0", Text: "This is another movie"}, - }) + }, sonic.LangAutoDetect) - search, err := sonic.NewSearch("localhost", 1491, "SecretPassword") + search, err := sonic.NewSearch("localhost", 1491, pswd) if err != nil { panic(err) } - results, _ := search.Query("movies", "general", "man", 10, 0) + results, _ := search.Query("movies", "general", "man", 10, 0, sonic.LangAutoDetect) fmt.Println(results) + + // Search with LANG set to "none" and "eng" + + _ = ingester.FlushCollection("movies") + _ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{ + {Object: "id:6ab56b4kk3", Text: "Star wars"}, + {Object: "id:5hg67f8dg5", Text: "Spider man"}, + {Object: "id:1m2n3b4vf6", Text: "Batman"}, + {Object: "id:68d96h5h9d0", Text: "This is another movie"}, + }, sonic.LangNone) + + results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangNone) + fmt.Println(results) + // [id:68d96h5h9d0] + + // English stop words should be encountered by Sonic now + results, _ = search.Query("movies", "general", "this is", 10, 0, sonic.LangEng) + fmt.Println(results) + // [] } diff --git a/sonic/ingester.go b/sonic/ingester.go index 0d6f4ea..fab7869 100644 --- a/sonic/ingester.go +++ b/sonic/ingester.go @@ -21,14 +21,14 @@ type IngestBulkError struct { // Ingestable is used for altering the search index (push, pop and flush). type Ingestable interface { // Push search data in the index. - // Command syntax PUSH "" - Push(collection, bucket, object, text string) (err error) + // Command syntax PUSH "" [LANG()]? + Push(collection, bucket, object, text string, lang Lang) (err error) // BulkPush will execute N (parallelRoutines) goroutines at the same time to // dispatch the records at best. // If parallelRoutines <= 0; parallelRoutines will be equal to 1. // If parallelRoutines > len(records); parallelRoutines will be equal to len(records). - BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) []IngestBulkError + BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) []IngestBulkError // Pop search data from the index. // Command syntax POP "". @@ -96,7 +96,7 @@ func NewIngester(host string, port int, password string) (Ingestable, error) { }, nil } -func (i ingesterChannel) Push(collection, bucket, object, text string) (err error) { +func (i ingesterChannel) Push(collection, bucket, object, text string, lang Lang) (err error) { // patterns := []struct { Pattern string @@ -111,7 +111,8 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro chunks := splitText(text, i.cmdMaxBytes/2) // split chunks with partial success will yield single error for _, chunk := range chunks { - err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, object, chunk)) + ff := fmt.Sprintf("%s %s %s %s \"%s\""+langFormat(lang), push, collection, bucket, object, chunk, lang) + err = i.write(ff) if err != nil { return err @@ -127,6 +128,13 @@ func (i ingesterChannel) Push(collection, bucket, object, text string) (err erro return nil } +func langFormat(lang Lang) string { + if lang != "" { + return " LANG(%s)" + } + return "%s" +} + // Ensure splitting on a valid leading byte // Slicing the string directly is more efficient than converting to []byte and back because // since a string is immutable and a []byte isn't, @@ -148,7 +156,7 @@ func splitText(longString string, maxLen int) []string { return splits } -func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) (errs []IngestBulkError) { +func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord, lang Lang) (errs []IngestBulkError) { if parallelRoutines <= 0 { parallelRoutines = 1 } @@ -170,7 +178,7 @@ func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines in addBulkError(&errs, rec, ErrClosed) continue } - err := newIngester.Push(collection, bucket, rec.Object, rec.Text) + err := newIngester.Push(collection, bucket, rec.Object, rec.Text, lang) if err != nil { addBulkError(&errs, rec, err) } diff --git a/sonic/ingester_test.go b/sonic/ingester_test.go index f7f7e51..15f1777 100644 --- a/sonic/ingester_test.go +++ b/sonic/ingester_test.go @@ -23,7 +23,7 @@ func BenchmarkIngesterChannel_BulkPush2XMaxCPUs(b *testing.B) { b.Log(e) b.Fail() } - be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records) + be := ingester.BulkPush("test", "test2XMaxCpus", cpus, records, LangAutoDetect) if len(be) > 0 { b.Log(be, e) b.Fail() @@ -44,7 +44,7 @@ func BenchmarkIngesterChannel_BulkPushMaxCPUs(b *testing.B) { b.Log(e) b.Fail() } - be := ingester.BulkPush("test", "testMaxCpus", cpus, records) + be := ingester.BulkPush("test", "testMaxCpus", cpus, records, LangAutoDetect) if len(be) > 0 { b.Log(be, e) b.Fail() @@ -63,7 +63,7 @@ func BenchmarkIngesterChannel_BulkPush10(b *testing.B) { b.Log(e) b.Fail() } - be := ingester.BulkPush("test", "test10", 10, records) + be := ingester.BulkPush("test", "test10", 10, records, LangAutoDetect) if len(be) > 0 { b.Log(be, err) b.Fail() @@ -102,7 +102,7 @@ func BenchmarkIngesterChannel_Push(b *testing.B) { b.Fail() } for _, v := range records { - e := ingester.Push("test", "testBulk", v.Object, v.Text) + e := ingester.Push("test", "testBulk", v.Object, v.Text, LangAutoDetect) if e != nil { b.Log(e) b.Fail() diff --git a/sonic/lang.go b/sonic/lang.go new file mode 100644 index 0000000..bf3430d --- /dev/null +++ b/sonic/lang.go @@ -0,0 +1,96 @@ +package sonic + +type Lang string + +const ( + LangAutoDetect Lang = "" + LangNone Lang = "none" + LangAfr Lang = "afr" + LangAka Lang = "aka" + LangAmh Lang = "amh" + LangAra Lang = "ara" + LangAzj Lang = "azj" + LangBel Lang = "bel" + LangBen Lang = "ben" + LangBho Lang = "bho" + LangBul Lang = "bul" + LangCat Lang = "cat" + LangCeb Lang = "ceb" + LangCes Lang = "ces" + LangCmn Lang = "cmn" + LangDan Lang = "dan" + LangDeu Lang = "deu" + LangEll Lang = "ell" + LangEng Lang = "eng" + LangEpo Lang = "epo" + LangEst Lang = "est" + LangFin Lang = "fin" + LangFra Lang = "fra" + LangGuj Lang = "guj" + LangHat Lang = "hat" + LangHau Lang = "hau" + LangHeb Lang = "heb" + LangHin Lang = "hin" + LangHrv Lang = "hrv" + LangHun Lang = "hun" + LangIbo Lang = "ibo" + LangIlo Lang = "ilo" + LangInd Lang = "ind" + LangIta Lang = "ita" + LangJav Lang = "jav" + LangJpn Lang = "jpn" + LangKan Lang = "kan" + LangKat Lang = "kat" + LangKhm Lang = "khm" + LangKin Lang = "kin" + LangKor Lang = "kor" + LangKur Lang = "kur" + LangLat Lang = "lat" + LangLav Lang = "lav" + LangLit Lang = "lit" + LangMai Lang = "mai" + LangMal Lang = "mal" + LangMar Lang = "mar" + LangMkd Lang = "mkd" + LangMlg Lang = "mlg" + LangMod Lang = "mod" + LangMya Lang = "mya" + LangNep Lang = "nep" + LangNld Lang = "nld" + LangNno Lang = "nno" + LangNob Lang = "nob" + LangNya Lang = "nya" + LangOri Lang = "ori" + LangOrm Lang = "orm" + LangPan Lang = "pan" + LangPes Lang = "pes" + LangPol Lang = "pol" + LangPor Lang = "por" + LangRon Lang = "ron" + LangRun Lang = "run" + LangRus Lang = "rus" + LangSin Lang = "sin" + LangSkr Lang = "skr" + LangSlk Lang = "slk" + LangSlv Lang = "slv" + LangSna Lang = "sna" + LangSom Lang = "som" + LangSpa Lang = "spa" + LangSrp Lang = "srp" + LangSwe Lang = "swe" + LangTam Lang = "tam" + LangTel Lang = "tel" + LangTgl Lang = "tgl" + LangTha Lang = "tha" + LangTir Lang = "tir" + LangTuk Lang = "tuk" + LangTur Lang = "tur" + LangUig Lang = "uig" + LangUkr Lang = "ukr" + LangUrd Lang = "urd" + LangUzb Lang = "uzb" + LangVie Lang = "vie" + LangYdd Lang = "ydd" + LangYor Lang = "yor" + LangZul Lang = "zul" +) diff --git a/sonic/search.go b/sonic/search.go index ff22c13..c067232 100644 --- a/sonic/search.go +++ b/sonic/search.go @@ -10,8 +10,8 @@ type Searchable interface { // Query the database, return a list of object, represented as a string. // Sonic default limit is 10. - // Command syntax QUERY "" [LIMIT()]? [OFFSET()]?. - Query(collection, bucket, terms string, limit, offset int) (results []string, err error) + // Command syntax QUERY "" [LIMIT()]? [OFFSET()]? [LANG()]?. + Query(collection, bucket, terms string, limit, offset int, lang Lang) (results []string, err error) // Suggest auto-completes word, return a list of words as a string. // Command syntax SUGGEST "" [LIMIT()]?. @@ -53,8 +53,8 @@ func NewSearch(host string, port int, password string) (Searchable, error) { }, nil } -func (s searchChannel) Query(collection, bucket, term string, limit, offset int) (results []string, err error) { - err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)", query, collection, bucket, term, limit, offset)) +func (s searchChannel) Query(collection, bucket, term string, limit, offset int, lang Lang) (results []string, err error) { + err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)"+langFormat(lang), query, collection, bucket, term, limit, offset, lang)) if err != nil { return nil, err }