add bulk with go routines

This commit is contained in:
alexisvisco 2019-03-27 15:22:27 +01:00
parent 56a7e26248
commit 52d3ad065b
9 changed files with 324 additions and 127 deletions

View file

@ -14,7 +14,7 @@ func main() {
// I will ignore all errors for demonstration purposes // I will ignore all errors for demonstration purposes
_ = ingester.BulkPush("movies", "general", []sonic.IngestBulkRecord{ _, _ = ingester.BulkPush("movies", "general", 2, []sonic.IngestBulkRecord{
{"id:6ab56b4kk3", "Star wars"}, {"id:6ab56b4kk3", "Star wars"},
{"id:5hg67f8dg5", "Spider man"}, {"id:5hg67f8dg5", "Spider man"},
{"id:1m2n3b4vf6", "Batman"}, {"id:1m2n3b4vf6", "Batman"},

View file

@ -1,11 +1,15 @@
package sonic package sonic
// Action refer to list of actions for TRIGGER command.
type Action string type Action string
const ( const (
// Consolidate action is not detailed in the sonic protocol.
Consolidate Action = "consolidate" Consolidate Action = "consolidate"
) )
// IsActionValid check if the action passed in parameter is valid.
// Mean that TRIGGER command can handle it.
func IsActionValid(action Action) bool { func IsActionValid(action Action) bool {
return action == Consolidate return action == Consolidate
} }

View file

@ -1,13 +1,20 @@
package sonic package sonic
// Channel refer to the list of channels available.
type Channel string type Channel string
const ( const (
// Search is used for querying the search index.
Search Channel = "search" Search Channel = "search"
// Ingest is used for altering the search index (push, pop and flush).
Ingest Channel = "ingest" Ingest Channel = "ingest"
// Control is used for administration purposes.
Control Channel = "control" Control Channel = "control"
) )
// IsChannelValid check if the parameter is a valid channel.
func IsChannelValid(ch Channel) bool { func IsChannelValid(ch Channel) bool {
return ch == Search || ch == Ingest || ch == Control return ch == Search || ch == Ingest || ch == Control
} }

84
sonic/connection.go Normal file
View file

@ -0,0 +1,84 @@
package sonic
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"net"
"strings"
)
type connection struct {
reader *bufio.Reader
conn net.Conn
closed bool
}
func newConnection(d *driver) (*connection, error) {
c := &connection{}
c.close()
conn, err := net.Dial("tcp", fmt.Sprintf("%s:%d", d.Host, d.Port))
if err != nil {
return nil, err
}
c.closed = false
c.conn = conn
c.reader = bufio.NewReader(c.conn)
err = c.write(fmt.Sprintf("START %s %s", d.channel, d.Password))
if err != nil {
return nil, err
}
_, err = c.read()
_, err = c.read()
if err != nil {
return nil, err
}
return c, nil
}
func (c *connection) read() (string, error) {
if c.closed {
return "", ErrClosed
}
buffer := bytes.Buffer{}
for {
line, isPrefix, err := c.reader.ReadLine()
buffer.Write(line)
if err != nil {
if err == io.EOF {
c.close()
}
return "", err
}
if !isPrefix {
break
}
}
str := buffer.String()
if strings.HasPrefix(str, "ERR ") {
return "", errors.New(str[4:])
}
return str, nil
}
func (c connection) write(str string) error {
if c.closed {
return ErrClosed
}
_, err := c.conn.Write([]byte(str + "\r\n"))
return err
}
func (c *connection) close() {
if c.conn != nil {
_ = c.conn.Close()
c.conn = nil
}
c.closed = true
c.reader = nil
}

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
) )
// ErrActionName is throw when the action is invalid.
var ErrActionName = errors.New("invalid action name") var ErrActionName = errors.New("invalid action name")
// Controllable is used for administration purposes. // Controllable is used for administration purposes.
@ -16,16 +17,19 @@ type Controllable interface {
// Quit refer to the Base interface // Quit refer to the Base interface
Quit() (err error) Quit() (err error)
// Quit refer to the Base interface // Ping refer to the Base interface
Ping() (err error) Ping() (err error)
} }
type ControlChannel struct { // controlChannel is used for administration purposes.
*Driver type controlChannel struct {
*driver
} }
// NewControl create a new driver instance with a controlChannel instance.
// Only way to get a Controllable implementation.
func NewControl(host string, port int, password string) (Controllable, error) { func NewControl(host string, port int, password string) (Controllable, error) {
driver := &Driver{ driver := &driver{
Host: host, Host: host,
Port: port, Port: port,
Password: password, Password: password,
@ -35,12 +39,12 @@ func NewControl(host string, port int, password string) (Controllable, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
return ControlChannel{ return controlChannel{
Driver: driver, driver: driver,
}, nil }, nil
} }
func (c ControlChannel) Trigger(action Action) (err error) { func (c controlChannel) Trigger(action Action) (err error) {
if IsActionValid(action) { if IsActionValid(action) {
return ErrActionName return ErrActionName
} }

View file

@ -1,13 +1,7 @@
package sonic package sonic
import ( import (
"bufio"
"bytes"
"errors" "errors"
"fmt"
"io"
"net"
"strings"
) )
var ( var (
@ -28,47 +22,27 @@ type Base interface {
Ping() error Ping() error
} }
type Driver struct { type driver struct {
Host string Host string
Port int Port int
Password string Password string
channel Channel channel Channel
reader *bufio.Reader *connection
conn net.Conn
closed bool
} }
// Connect open a connection via TCP with the sonic server. // Connect open a connection via TCP with the sonic server.
func (c *Driver) Connect() error { func (c *driver) Connect() error {
if !IsChannelValid(c.channel) { if !IsChannelValid(c.channel) {
return ErrChanName return ErrChanName
} }
c.clean() var err error
conn, err := net.Dial("tcp", fmt.Sprintf("%s:%d", c.Host, c.Port)) c.connection, err = newConnection(c)
if err != nil {
return err return err
}
c.closed = false
c.conn = conn
c.reader = bufio.NewReader(c.conn)
err = c.write(fmt.Sprintf("START %s %s", c.channel, c.Password))
if err != nil {
return err
}
_, err = c.read()
_, err = c.read()
if err != nil {
return err
}
return nil
} }
func (c *Driver) Quit() error { func (c *driver) Quit() error {
err := c.write("QUIT") err := c.write("QUIT")
if err != nil { if err != nil {
return err return err
@ -76,11 +50,11 @@ func (c *Driver) Quit() error {
// should get ENDED // should get ENDED
_, err = c.read() _, err = c.read()
c.clean() c.close()
return err return err
} }
func (c Driver) Ping() error { func (c driver) Ping() error {
err := c.write("PING") err := c.write("PING")
if err != nil { if err != nil {
return err return err
@ -93,46 +67,3 @@ func (c Driver) Ping() error {
} }
return nil return nil
} }
func (c *Driver) read() (string, error) {
if c.closed {
return "", ErrClosed
}
buffer := bytes.Buffer{}
for {
line, isPrefix, err := c.reader.ReadLine()
buffer.Write(line)
if err != nil {
if err == io.EOF {
c.clean()
}
return "", err
}
if !isPrefix {
break
}
}
str := buffer.String()
if strings.HasPrefix(str, "ERR ") {
return "", errors.New(str[4:])
}
return str, nil
}
func (c Driver) write(str string) error {
if c.closed {
return ErrClosed
}
_, err := c.conn.Write([]byte(str + "\r\n"))
return err
}
func (c *Driver) clean() {
if c.conn != nil {
_ = c.conn.Close()
c.conn = nil
}
c.closed = true
c.reader = nil
}

View file

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"strconv" "strconv"
"strings" "strings"
"sync"
) )
type IngestBulkRecord struct { type IngestBulkRecord struct {
@ -25,7 +26,7 @@ type Ingestable interface {
// dispatch the records at best. // dispatch the records at best.
// If parallelRoutines <= 0; parallelRoutines will be equal to 1. // If parallelRoutines <= 0; parallelRoutines will be equal to 1.
// If parallelRoutines > len(records); parallelRoutines will be equal to len(records). // If parallelRoutines > len(records); parallelRoutines will be equal to len(records).
BulkPush(collection, bucket string, records []IngestBulkRecord) []IngestBulkError BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) ([]IngestBulkError, error)
// Pop search data from the index. // Pop search data from the index.
// Command syntax POP <collection> <bucket> <object> "<text>". // Command syntax POP <collection> <bucket> <object> "<text>".
@ -35,7 +36,7 @@ type Ingestable interface {
// dispatch the records at best. // dispatch the records at best.
// If parallelRoutines <= 0; parallelRoutines will be equal to 1. // If parallelRoutines <= 0; parallelRoutines will be equal to 1.
// If parallelRoutines > len(records); parallelRoutines will be equal to len(records). // If parallelRoutines > len(records); parallelRoutines will be equal to len(records).
BulkPop(collection, bucket string, records []IngestBulkRecord) []IngestBulkError BulkPop(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) ([]IngestBulkError, error)
// Count indexed search data. // Count indexed search data.
// bucket and object are optionals, empty string ignore it. // bucket and object are optionals, empty string ignore it.
@ -57,7 +58,7 @@ type Ingestable interface {
// Quit refer to the Base interface // Quit refer to the Base interface
Quit() (err error) Quit() (err error)
// Quit refer to the Base interface // Ping refer to the Base interface
Ping() (err error) Ping() (err error)
} }
@ -72,12 +73,14 @@ const (
flusho ingesterCommands = "FLUSHO" flusho ingesterCommands = "FLUSHO"
) )
type IngesterChannel struct { type ingesterChannel struct {
*Driver *driver
} }
// NewIngester create a new driver instance with a ingesterChannel instance.
// Only way to get a Ingestable implementation.
func NewIngester(host string, port int, password string) (Ingestable, error) { func NewIngester(host string, port int, password string) (Ingestable, error) {
driver := &Driver{ driver := &driver{
Host: host, Host: host,
Port: port, Port: port,
Password: password, Password: password,
@ -87,12 +90,12 @@ func NewIngester(host string, port int, password string) (Ingestable, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
return IngesterChannel{ return ingesterChannel{
Driver: driver, driver: driver,
}, nil }, nil
} }
func (i IngesterChannel) Push(collection, bucket, object, text string) (err error) { func (i ingesterChannel) Push(collection, bucket, object, text string) (err error) {
err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, object, text)) err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, object, text))
if err != nil { if err != nil {
return err return err
@ -106,19 +109,55 @@ func (i IngesterChannel) Push(collection, bucket, object, text string) (err erro
return nil return nil
} }
func (i IngesterChannel) BulkPush(collection, bucket string, records []IngestBulkRecord) []IngestBulkError { func (i ingesterChannel) BulkPush(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) (errs []IngestBulkError, err error) {
errs := make([]IngestBulkError, 0) if parallelRoutines <= 0 {
parallelRoutines = 1
for _, v := range records {
if err := i.Push(collection, bucket, v.Object, v.Text); err != nil {
errs = append(errs, IngestBulkError{v.Object, err})
}
} }
return errs err = nil
errs = make([]IngestBulkError, 0)
errMutex := sync.Mutex{}
// chunk array into N (parallelRoutines) parts
divided := i.divideIngestBulkRecords(records, parallelRoutines)
// dispatch each records array into N goroutines
group := sync.WaitGroup{}
group.Add(len(divided))
for _, r := range divided {
go func(recs []IngestBulkRecord) {
var conn *connection
errMutex.Lock()
conn, err = newConnection(i.driver)
errMutex.Unlock()
for _, rec := range recs {
err := conn.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, rec.Object, rec.Text))
if err != nil {
errMutex.Lock()
errs = append(errs, IngestBulkError{rec.Object, err})
errMutex.Unlock()
continue
}
// sonic should sent OK
_, err = conn.read()
if err != nil {
errMutex.Lock()
errs = append(errs, IngestBulkError{rec.Object, err})
errMutex.Unlock()
}
}
conn.close()
group.Done()
}(r)
}
group.Wait()
return errs, err
} }
func (i IngesterChannel) Pop(collection, bucket, object, text string) (err error) { func (i ingesterChannel) Pop(collection, bucket, object, text string) (err error) {
err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", pop, collection, bucket, object, text)) err = i.write(fmt.Sprintf("%s %s %s %s \"%s\"", pop, collection, bucket, object, text))
if err != nil { if err != nil {
return err return err
@ -132,19 +171,55 @@ func (i IngesterChannel) Pop(collection, bucket, object, text string) (err error
return nil return nil
} }
func (i IngesterChannel) BulkPop(collection, bucket string, records []IngestBulkRecord) []IngestBulkError { func (i ingesterChannel) BulkPop(collection, bucket string, parallelRoutines int, records []IngestBulkRecord) (errs []IngestBulkError, err error) {
errs := make([]IngestBulkError, 0) if parallelRoutines <= 0 {
parallelRoutines = 1
for _, v := range records {
if err := i.Push(collection, bucket, v.Object, v.Text); err != nil {
errs = append(errs, IngestBulkError{v.Object, err})
}
} }
return errs err = nil
errs = make([]IngestBulkError, 0)
errMutex := sync.Mutex{}
// chunk array into N (parallelRoutines) parts
divided := i.divideIngestBulkRecords(records, parallelRoutines)
// dispatch each records array into N goroutines
group := sync.WaitGroup{}
group.Add(len(divided))
for _, r := range divided {
go func(recs []IngestBulkRecord) {
var conn *connection
errMutex.Lock()
conn, err = newConnection(i.driver)
errMutex.Unlock()
for _, rec := range recs {
err := conn.write(fmt.Sprintf("%s %s %s %s \"%s\"", push, collection, bucket, rec.Object, rec.Text))
if err != nil {
errMutex.Lock()
errs = append(errs, IngestBulkError{rec.Object, err})
errMutex.Unlock()
continue
}
// sonic should sent OK
_, err = conn.read()
if err != nil {
errMutex.Lock()
errs = append(errs, IngestBulkError{rec.Object, err})
errMutex.Unlock()
}
}
conn.close()
group.Done()
}(r)
}
group.Wait()
return errs, err
} }
func (i IngesterChannel) Count(collection, bucket, object string) (cnt int, err error) { func (i ingesterChannel) Count(collection, bucket, object string) (cnt int, err error) {
err = i.write(fmt.Sprintf("%s %s %s", count, collection, buildCountQuery(bucket, object))) err = i.write(fmt.Sprintf("%s %s %s", count, collection, buildCountQuery(bucket, object)))
if err != nil { if err != nil {
return 0, err return 0, err
@ -169,7 +244,7 @@ func buildCountQuery(bucket, object string) string {
return builder.String() return builder.String()
} }
func (i IngesterChannel) FlushCollection(collection string) (err error) { func (i ingesterChannel) FlushCollection(collection string) (err error) {
err = i.write(fmt.Sprintf("%s %s", flushc, collection)) err = i.write(fmt.Sprintf("%s %s", flushc, collection))
if err != nil { if err != nil {
return err return err
@ -183,7 +258,7 @@ func (i IngesterChannel) FlushCollection(collection string) (err error) {
return nil return nil
} }
func (i IngesterChannel) FlushBucket(collection, bucket string) (err error) { func (i ingesterChannel) FlushBucket(collection, bucket string) (err error) {
err = i.write(fmt.Sprintf("%s %s %s", flushb, collection, bucket)) err = i.write(fmt.Sprintf("%s %s %s", flushb, collection, bucket))
if err != nil { if err != nil {
return err return err
@ -197,7 +272,7 @@ func (i IngesterChannel) FlushBucket(collection, bucket string) (err error) {
return nil return nil
} }
func (i IngesterChannel) FlushObject(collection, bucket, object string) (err error) { func (i ingesterChannel) FlushObject(collection, bucket, object string) (err error) {
err = i.write(fmt.Sprintf("%s %s %s %s", flusho, collection, bucket, object)) err = i.write(fmt.Sprintf("%s %s %s %s", flusho, collection, bucket, object))
if err != nil { if err != nil {
return err return err
@ -211,7 +286,7 @@ func (i IngesterChannel) FlushObject(collection, bucket, object string) (err err
return nil return nil
} }
func (i IngesterChannel) divideIngestBulkRecords(records []IngestBulkRecord, parallelRoutines int) [][]IngestBulkRecord { func (i ingesterChannel) divideIngestBulkRecords(records []IngestBulkRecord, parallelRoutines int) [][]IngestBulkRecord {
var divided [][]IngestBulkRecord var divided [][]IngestBulkRecord
chunkSize := (len(records) + parallelRoutines - 1) / parallelRoutines chunkSize := (len(records) + parallelRoutines - 1) / parallelRoutines
for i := 0; i < len(records); i += chunkSize { for i := 0; i < len(records); i += chunkSize {

90
sonic/ingester_test.go Normal file
View file

@ -0,0 +1,90 @@
package sonic
import (
"math/rand"
"runtime"
"testing"
"time"
)
var records = make([]IngestBulkRecord, 0)
var ingester, err = NewIngester("localhost", 1491, "SecretPassword")
func BenchmarkIngesterChannel_BulkPushMaxCPUs(b *testing.B) {
if err != nil {
return
}
cpus := runtime.NumCPU()
for n := 0; n < b.N; n++ {
e := ingester.FlushBucket("test", "testMaxCpus")
if e != nil {
b.Log(e)
b.Fail()
}
be, e := ingester.BulkPush("test", "testMaxCpus", cpus, records)
if len(be) > 0 || e != nil {
b.Log(be, e)
b.Fail()
}
}
}
func BenchmarkIngesterChannel_BulkPush10(b *testing.B) {
if err != nil {
return
}
for n := 0; n < b.N; n++ {
e := ingester.FlushBucket("test", "test10")
if e != nil {
b.Log(e)
b.Fail()
}
be, e := ingester.BulkPush("test", "test10", 10, records)
if len(be) > 0 || err != nil {
b.Log(be, err)
b.Fail()
}
}
}
func BenchmarkIngesterChannel_Push(b *testing.B) {
if err != nil {
return
}
for n := 0; n < b.N; n++ {
e := ingester.FlushBucket("test", "testBulk")
if e != nil {
b.Log(e)
b.Fail()
}
for _, v := range records {
e := ingester.Push("test", "testBulk", v.Object, v.Text)
if e != nil {
b.Log(e)
b.Fail()
}
}
}
}
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
var seededRand = rand.New(rand.NewSource(time.Now().UnixNano()))
func randStr(length int, charset string) string {
b := make([]byte, length)
for i := range b {
b[i] = charset[seededRand.Intn(len(charset))]
}
return string(b)
}
func init() {
for n := 0; n < 3000; n++ {
records = append(records, IngestBulkRecord{randStr(10, charset), randStr(10, charset)})
}
}

View file

@ -20,7 +20,7 @@ type Searchable interface {
// Quit refer to the Base interface // Quit refer to the Base interface
Quit() (err error) Quit() (err error)
// Quit refer to the Base interface // Ping refer to the Base interface
Ping() (err error) Ping() (err error)
} }
@ -31,12 +31,14 @@ const (
suggest searchCommands = "SUGGEST" suggest searchCommands = "SUGGEST"
) )
type SearchChannel struct { type searchChannel struct {
*Driver *driver
} }
// NewIngester create a new driver instance with a searchChannel instance.
// Only way to get a Searchable implementation.
func NewSearch(host string, port int, password string) (Searchable, error) { func NewSearch(host string, port int, password string) (Searchable, error) {
driver := &Driver{ driver := &driver{
Host: host, Host: host,
Port: port, Port: port,
Password: password, Password: password,
@ -46,12 +48,12 @@ func NewSearch(host string, port int, password string) (Searchable, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
return SearchChannel{ return searchChannel{
Driver: driver, driver: driver,
}, nil }, nil
} }
func (s SearchChannel) Query(collection, bucket, term string, limit, offset int) (results []string, err error) { func (s searchChannel) Query(collection, bucket, term string, limit, offset int) (results []string, err error) {
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)", query, collection, bucket, term, limit, offset)) err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d) OFFSET(%d)", query, collection, bucket, term, limit, offset))
if err != nil { if err != nil {
return nil, err return nil, err
@ -71,7 +73,7 @@ func (s SearchChannel) Query(collection, bucket, term string, limit, offset int)
return getSearchResults(read, string(query)), nil return getSearchResults(read, string(query)), nil
} }
func (s SearchChannel) Suggest(collection, bucket, word string, limit int) (results []string, err error) { func (s searchChannel) Suggest(collection, bucket, word string, limit int) (results []string, err error) {
err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d)", suggest, collection, bucket, word, limit)) err = s.write(fmt.Sprintf("%s %s %s \"%s\" LIMIT(%d)", suggest, collection, bucket, word, limit))
if err != nil { if err != nil {
return nil, err return nil, err