diff --git a/go.mod b/go.mod index 2ea0e5c..f92e742 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.24.0 toolchain go1.24.1 require ( + github.com/blevesearch/bleve/v2 v2.5.7 github.com/dslipak/pdf v0.0.2 github.com/go-git/go-git/v5 v5.16.0 github.com/jackc/pgx/v5 v5.8.0 @@ -22,6 +23,25 @@ require ( dario.cat/mergo v1.0.2 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.1.6 // indirect + github.com/RoaringBitmap/roaring/v2 v2.4.5 // indirect + github.com/bits-and-blooms/bitset v1.22.0 // indirect + github.com/blevesearch/bleve_index_api v1.2.11 // indirect + github.com/blevesearch/geo v0.2.4 // indirect + github.com/blevesearch/go-faiss v1.0.26 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/gtreap v0.1.1 // indirect + github.com/blevesearch/mmap-go v1.0.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.3.13 // indirect + github.com/blevesearch/segment v0.9.1 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect + github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect + github.com/blevesearch/vellum v1.1.0 // indirect + github.com/blevesearch/zapx/v11 v11.4.2 // indirect + github.com/blevesearch/zapx/v12 v12.4.2 // indirect + github.com/blevesearch/zapx/v13 v13.4.2 // indirect + github.com/blevesearch/zapx/v14 v14.4.2 // indirect + github.com/blevesearch/zapx/v15 v15.4.2 // indirect + github.com/blevesearch/zapx/v16 v16.2.8 // indirect github.com/cloudflare/circl v1.6.1 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect @@ -30,17 +50,21 @@ require ( github.com/go-logr/logr v1.4.2 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/uuid v1.6.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect + github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/labstack/gommon v0.4.2 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.9 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect @@ -49,6 +73,7 @@ require ( github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect + go.etcd.io/bbolt v1.4.0 // indirect golang.org/x/crypto v0.43.0 // indirect golang.org/x/net v0.45.0 // indirect golang.org/x/sync v0.17.0 // indirect @@ -56,6 +81,7 @@ require ( golang.org/x/text v0.30.0 // indirect golang.org/x/time v0.8.0 // indirect golang.org/x/tools v0.37.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 61a7d7d..5a643df 100644 --- a/go.sum +++ b/go.sum @@ -5,10 +5,51 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw= github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= +github.com/RoaringBitmap/roaring/v2 v2.4.5 h1:uGrrMreGjvAtTBobc0g5IrW1D5ldxDQYe2JW2gggRdg= +github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/hVXDS2dXi7/eUFE0= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4= +github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/blevesearch/bleve/v2 v2.5.7 h1:2d9YrL5zrX5EBBW++GOaEKjE+NPWeZGaX77IM26m1Z8= +github.com/blevesearch/bleve/v2 v2.5.7/go.mod h1:yj0NlS7ocGC4VOSAedqDDMktdh2935v2CSWOCDMHdSA= +github.com/blevesearch/bleve_index_api v1.2.11 h1:bXQ54kVuwP8hdrXUSOnvTQfgK0KI1+f9A0ITJT8tX1s= +github.com/blevesearch/bleve_index_api v1.2.11/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0= +github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk= +github.com/blevesearch/geo v0.2.4/go.mod h1:K56Q33AzXt2YExVHGObtmRSFYZKYGv0JEN5mdacJJR8= +github.com/blevesearch/go-faiss v1.0.26 h1:4dRLolFgjPyjkaXwff4NfbZFdE/dfywbzDqporeQvXI= +github.com/blevesearch/go-faiss v1.0.26/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk= +github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= +github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= +github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= +github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= +github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= +github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= +github.com/blevesearch/scorch_segment_api/v2 v2.3.13 h1:ZPjv/4VwWvHJZKeMSgScCapOy8+DdmsmRyLmSB88UoY= +github.com/blevesearch/scorch_segment_api/v2 v2.3.13/go.mod h1:ENk2LClTehOuMS8XzN3UxBEErYmtwkE7MAArFTXs9Vc= +github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= +github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= +github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= +github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= +github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= +github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= +github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w= +github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y= +github.com/blevesearch/zapx/v11 v11.4.2 h1:l46SV+b0gFN+Rw3wUI1YdMWdSAVhskYuvxlcgpQFljs= +github.com/blevesearch/zapx/v11 v11.4.2/go.mod h1:4gdeyy9oGa/lLa6D34R9daXNUvfMPZqUYjPwiLmekwc= +github.com/blevesearch/zapx/v12 v12.4.2 h1:fzRbhllQmEMUuAQ7zBuMvKRlcPA5ESTgWlDEoB9uQNE= +github.com/blevesearch/zapx/v12 v12.4.2/go.mod h1:TdFmr7afSz1hFh/SIBCCZvcLfzYvievIH6aEISCte58= +github.com/blevesearch/zapx/v13 v13.4.2 h1:46PIZCO/ZuKZYgxI8Y7lOJqX3Irkc3N8W82QTK3MVks= +github.com/blevesearch/zapx/v13 v13.4.2/go.mod h1:knK8z2NdQHlb5ot/uj8wuvOq5PhDGjNYQQy0QDnopZk= +github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT7fWYz0= +github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= +github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= +github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= +github.com/blevesearch/zapx/v16 v16.2.8 h1:SlnzF0YGtSlrsOE3oE7EgEX6BIepGpeqxs1IjMbHLQI= +github.com/blevesearch/zapx/v16 v16.2.8/go.mod h1:murSoCJPCk25MqURrcJaBQ1RekuqSCSfMjXH4rHyA14= github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0= github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= @@ -38,10 +79,14 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= @@ -52,6 +97,8 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede h1:YrgBGwxMRK0Vq0WSCWFaZUnTsrA/PZE/xs1QZh+/edg= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -72,6 +119,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/mudler/xlog v0.0.5 h1:2unBuVC5rNGhCC86UaA94TElWFml80NL5XLK+kAmNuU= github.com/mudler/xlog v0.0.5/go.mod h1:39f5vcd05Qd6GWKM8IjyHNQ7AmOx3ZM0YfhfIGhC18U= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= @@ -114,6 +163,8 @@ github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQ github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= +go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk= +go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= @@ -127,6 +178,7 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -145,6 +197,8 @@ golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= @@ -154,6 +208,7 @@ gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRN gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 h1:6YFJoB+0fUH6X3xU/G2tQqCYg+PkGtnZ5nMR5rpw72g= diff --git a/rag/engine/chromem.go b/rag/engine/chromem.go index a80311f..5e33688 100644 --- a/rag/engine/chromem.go +++ b/rag/engine/chromem.go @@ -1,11 +1,21 @@ package engine import ( + "bytes" "context" + "encoding/json" "fmt" + "io" + "net/http" + "os" + "path/filepath" "runtime" + "strconv" + "time" + "github.com/blevesearch/bleve/v2" "github.com/mudler/localrecall/rag/types" + "github.com/mudler/xlog" "github.com/philippgille/chromem-go" "github.com/sashabaranov/go-openai" ) @@ -17,6 +27,15 @@ type ChromemDB struct { client *openai.Client db *chromem.DB embeddingsModel string + bleveIndex bleve.Index + bleveIndexPath string + bm25Weight float64 + vectorWeight float64 + bleveAnalyzer string + rerankerModel string + localAIBaseURL string + localAIAPIKey string + httpClient *http.Client } func NewChromemDBCollection(collection, path string, openaiClient *openai.Client, embeddingsModel string) (*ChromemDB, error) { @@ -25,26 +44,95 @@ func NewChromemDBCollection(collection, path string, openaiClient *openai.Client return nil, err } - chromem := &ChromemDB{ + // Get hybrid search weights from environment + bm25Weight := 0.5 + vectorWeight := 0.5 + if w := os.Getenv("HYBRID_SEARCH_BM25_WEIGHT"); w != "" { + if parsed, err := strconv.ParseFloat(w, 64); err == nil { + bm25Weight = parsed + } + } + if w := os.Getenv("HYBRID_SEARCH_VECTOR_WEIGHT"); w != "" { + if parsed, err := strconv.ParseFloat(w, 64); err == nil { + vectorWeight = parsed + } + } + + bleveAnalyzer := "en" + if a := os.Getenv("BLEVE_ANALYZER"); a != "" { + bleveAnalyzer = a + } + + // Get reranker configuration + rerankerModel := os.Getenv("RERANKER_MODEL") + localAIBaseURL := os.Getenv("OPENAI_BASE_URL") + localAIAPIKey := os.Getenv("OPENAI_API_KEY") + + chromemDB := &ChromemDB{ collectionName: collection, index: 1, db: db, client: openaiClient, embeddingsModel: embeddingsModel, + bm25Weight: bm25Weight, + vectorWeight: vectorWeight, + bleveAnalyzer: bleveAnalyzer, + rerankerModel: rerankerModel, + localAIBaseURL: localAIBaseURL, + localAIAPIKey: localAIAPIKey, + httpClient: &http.Client{Timeout: 30 * time.Second}, } - c, err := db.GetOrCreateCollection(collection, nil, chromem.embedding()) + c, err := db.GetOrCreateCollection(collection, nil, chromemDB.embedding()) if err != nil { return nil, err } - chromem.collection = c + chromemDB.collection = c count := c.Count() if count > 0 { - chromem.index = count + 1 + chromemDB.index = count + 1 + } + + // Initialize bleve index + bleveIndexPath := filepath.Join(path, "bleve", collection) + chromemDB.bleveIndexPath = bleveIndexPath + + // Try to open existing index, or create new one + bleveIndex, err := bleve.Open(bleveIndexPath) + if err != nil { + // Index doesn't exist, create it + mapping := bleve.NewIndexMapping() + + // Configure English analyzer for text fields + englishTextFieldMapping := bleve.NewTextFieldMapping() + englishTextFieldMapping.Analyzer = chromemDB.bleveAnalyzer + + // Create document mapping + docMapping := bleve.NewDocumentMapping() + docMapping.AddFieldMappingsAt("content", englishTextFieldMapping) + docMapping.AddFieldMappingsAt("title", englishTextFieldMapping) + + // Add metadata as stored field + metadataMapping := bleve.NewDocumentDisabledMapping() + docMapping.AddSubDocumentMapping("metadata", metadataMapping) + + mapping.AddDocumentMapping("_default", docMapping) + mapping.DefaultAnalyzer = chromemDB.bleveAnalyzer + + bleveIndex, err = bleve.New(bleveIndexPath, mapping) + if err != nil { + xlog.Warn("Failed to create bleve index, continuing with chromem-only", "error", err) + // Continue without bleve - graceful degradation + chromemDB.bleveIndex = nil + } else { + chromemDB.bleveIndex = bleveIndex + } + } else { + chromemDB.bleveIndex = bleveIndex } - return chromem, nil + return chromemDB, nil } func (c *ChromemDB) Count() int { @@ -52,6 +140,7 @@ func (c *ChromemDB) Count() int { } func (c *ChromemDB) Reset() error { + // Reset chromem collection if err := c.db.DeleteCollection(c.collectionName); err != nil { return fmt.Errorf("error deleting collection: %v", err) } @@ -61,6 +150,39 @@ func (c *ChromemDB) Reset() error { } c.collection = collection + // Reset bleve index if available + if c.bleveIndex != nil { + // Close existing index + if err := c.bleveIndex.Close(); err != nil { + xlog.Warn("Failed to close bleve index", "error", err) + } + + // Remove index directory + if err := os.RemoveAll(c.bleveIndexPath); err != nil { + xlog.Warn("Failed to remove bleve index directory", "error", err) + } + + // Recreate index + mapping := bleve.NewIndexMapping() + englishTextFieldMapping := bleve.NewTextFieldMapping() + englishTextFieldMapping.Analyzer = c.bleveAnalyzer + docMapping := bleve.NewDocumentMapping() + docMapping.AddFieldMappingsAt("content", englishTextFieldMapping) + docMapping.AddFieldMappingsAt("title", englishTextFieldMapping) + metadataMapping := bleve.NewDocumentDisabledMapping() + docMapping.AddSubDocumentMapping("metadata", metadataMapping) + mapping.AddDocumentMapping("_default", docMapping) + mapping.DefaultAnalyzer = c.bleveAnalyzer + + bleveIndex, err := bleve.New(c.bleveIndexPath, mapping) + if err != nil { + xlog.Warn("Failed to recreate bleve index", "error", err) + c.bleveIndex = nil + } else { + c.bleveIndex = bleveIndex + } + } + return nil } @@ -110,18 +232,48 @@ func (c *ChromemDB) Store(s string, metadata map[string]string) (Result, error) return Result{}, fmt.Errorf("empty string") } + docID := fmt.Sprint(c.index) + + // Store in chromem if err := c.collection.AddDocuments(context.Background(), []chromem.Document{ { Metadata: metadata, Content: s, - ID: fmt.Sprint(c.index), + ID: docID, }, }, runtime.NumCPU()); err != nil { return Result{}, err } + // Index in bleve if available + if c.bleveIndex != nil { + title := metadata["title"] + if title == "" { + title = metadata["source"] + } + + bleveDoc := map[string]interface{}{ + "id": docID, + "content": s, + "title": title, + } + + // Store metadata as JSON string for filtering + if len(metadata) > 0 { + metadataJSON, err := json.Marshal(metadata) + if err == nil { + bleveDoc["metadata"] = string(metadataJSON) + } + } + + if err := c.bleveIndex.Index(docID, bleveDoc); err != nil { + xlog.Warn("Failed to index document in bleve", "id", docID, "error", err) + // Continue even if bleve indexing fails + } + } + return Result{ - ID: fmt.Sprint(c.index), + ID: docID, }, nil } @@ -137,25 +289,80 @@ func (c *ChromemDB) StoreDocuments(s []string, metadata map[string]string) ([]Re results := make([]Result, len(s)) documents := make([]chromem.Document, len(s)) for i, content := range s { + docID := fmt.Sprint(c.index + i) documents[i] = chromem.Document{ Metadata: metadata, Content: content, - ID: fmt.Sprint(c.index + i), + ID: docID, } results[i] = Result{ - ID: fmt.Sprint(c.index + i), + ID: docID, } } + // Store in chromem if err := c.collection.AddDocuments(context.Background(), documents, runtime.NumCPU()); err != nil { return nil, err } + // Index in bleve if available + if c.bleveIndex != nil { + title := metadata["title"] + if title == "" { + title = metadata["source"] + } + + metadataJSON, _ := json.Marshal(metadata) + + for i, content := range s { + docID := fmt.Sprint(c.index + i) + bleveDoc := map[string]interface{}{ + "id": docID, + "content": content, + "title": title, + } + + if len(metadata) > 0 { + bleveDoc["metadata"] = string(metadataJSON) + } + + if err := c.bleveIndex.Index(docID, bleveDoc); err != nil { + xlog.Warn("Failed to index document in bleve", "id", docID, "error", err) + // Continue even if bleve indexing fails + } + } + } + return results, nil } func (c *ChromemDB) Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error { - return c.collection.Delete(context.Background(), where, whereDocuments, ids...) + // Delete from chromem + if err := c.collection.Delete(context.Background(), where, whereDocuments, ids...); err != nil { + return err + } + + // Delete from bleve if available + if c.bleveIndex != nil { + // If deleting by IDs, delete those IDs from bleve + if len(ids) > 0 { + for _, id := range ids { + if err := c.bleveIndex.Delete(id); err != nil { + xlog.Warn("Failed to delete document from bleve", "id", id, "error", err) + // Continue even if bleve deletion fails + } + } + } else { + // If deleting by metadata filters, we need to search and delete + // This is more complex - for now, we'll log a warning + // In practice, chromem handles metadata filtering, so this should be rare + if len(where) > 0 || len(whereDocuments) > 0 { + xlog.Warn("Bleve delete by metadata filters not fully supported, may need manual cleanup") + } + } + } + + return nil } func (c *ChromemDB) GetByID(id string) (types.Result, error) { @@ -167,23 +374,412 @@ func (c *ChromemDB) GetByID(id string) (types.Result, error) { return types.Result{ID: res.ID, Metadata: res.Metadata, Content: res.Content}, nil } -func (c *ChromemDB) Search(s string, similarEntries int) ([]types.Result, error) { - res, err := c.collection.Query(context.Background(), s, similarEntries, nil, nil) +// Reranker API types matching JINA reranker schema +type rerankRequest struct { + Model string `json:"model"` + Query string `json:"query"` + Documents []string `json:"documents"` + TopN *int `json:"top_n,omitempty"` +} + +type rerankDocumentResult struct { + Index int `json:"index"` + Document textDoc `json:"document"` + RelevanceScore float64 `json:"relevance_score"` +} + +type textDoc struct { + Text string `json:"text"` +} + +type rerankResponse struct { + Model string `json:"model"` + Usage usageInfo `json:"usage"` + Results []rerankDocumentResult `json:"results"` +} + +type usageInfo struct { + TotalTokens int `json:"total_tokens"` + PromptTokens int `json:"prompt_tokens"` +} + +type rerankResult struct { + Index int + Document string + RelevanceScore float64 +} + +// rerankDocuments calls LocalAI's reranker API to rerank documents by relevance to a query +func (c *ChromemDB) rerankDocuments(ctx context.Context, query string, documents []string, topN int) ([]rerankResult, error) { + if c.localAIBaseURL == "" { + return nil, fmt.Errorf("LocalAI base URL not configured") + } + if c.rerankerModel == "" { + return nil, fmt.Errorf("reranker model not configured") + } + + url := fmt.Sprintf("%s/v1/rerank", c.localAIBaseURL) + + reqBody := rerankRequest{ + Model: c.rerankerModel, + Query: query, + Documents: documents, + TopN: &topN, + } + + jsonData, err := json.Marshal(reqBody) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to marshal rerank request: %w", err) } - var results []types.Result + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create rerank request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + if c.localAIAPIKey != "" { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.localAIAPIKey)) + } - for _, r := range res { - results = append(results, types.Result{ - ID: r.ID, - Metadata: r.Metadata, - Content: r.Content, + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to execute rerank request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("rerank API returned status %d: %s", resp.StatusCode, string(body)) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read rerank response: %w", err) + } + + var rerankResp rerankResponse + if err := json.Unmarshal(body, &rerankResp); err != nil { + return nil, fmt.Errorf("failed to unmarshal rerank response: %w", err) + } + + results := make([]rerankResult, len(rerankResp.Results)) + for i, r := range rerankResp.Results { + results[i] = rerankResult{ + Index: r.Index, + Document: r.Document.Text, + RelevanceScore: r.RelevanceScore, + } + } + + return results, nil +} + +// searchWithReranker collects candidates from chromem and bleve, then uses reranker to order results +func (c *ChromemDB) searchWithReranker(ctx context.Context, query string, topN int, chromemResults []chromem.Result, bleveSearchResult *bleve.SearchResult) ([]types.Result, error) { + // Collect all candidate documents + candidateMap := make(map[string]types.Result) + + // Add chromem results + for _, r := range chromemResults { + candidateMap[r.ID] = types.Result{ + ID: r.ID, + Metadata: r.Metadata, + Content: r.Content, Similarity: r.Similarity, - }) + } + } + + // Add bleve results if available + if bleveSearchResult != nil { + for _, hit := range bleveSearchResult.Hits { + id := hit.ID + if _, exists := candidateMap[id]; !exists { + // Extract fields from search hit + var content, title string + var metadata map[string]string + + if contentVal, ok := hit.Fields["content"]; ok { + if str, ok := contentVal.(string); ok { + content = str + } else if arr, ok := contentVal.([]interface{}); ok && len(arr) > 0 { + if str, ok := arr[0].(string); ok { + content = str + } + } + } + if titleVal, ok := hit.Fields["title"]; ok { + if str, ok := titleVal.(string); ok { + title = str + } else if arr, ok := titleVal.([]interface{}); ok && len(arr) > 0 { + if str, ok := arr[0].(string); ok { + title = str + } + } + } + if metadataVal, ok := hit.Fields["metadata"]; ok { + if str, ok := metadataVal.(string); ok { + if err := json.Unmarshal([]byte(str), &metadata); err != nil { + metadata = make(map[string]string) + } + } else if arr, ok := metadataVal.([]interface{}); ok && len(arr) > 0 { + if str, ok := arr[0].(string); ok { + if err := json.Unmarshal([]byte(str), &metadata); err != nil { + metadata = make(map[string]string) + } + } + } + } + + if metadata == nil { + metadata = make(map[string]string) + } + if title != "" { + metadata["title"] = title + } + + candidateMap[id] = types.Result{ + ID: id, + Content: content, + Metadata: metadata, + } + } + } + } + + // Convert to ordered list for reranker (preserve order for mapping back) + candidateList := make([]types.Result, 0, len(candidateMap)) + documents := make([]string, 0, len(candidateMap)) + + for _, candidate := range candidateMap { + candidateList = append(candidateList, candidate) + documents = append(documents, candidate.Content) + } + + if len(documents) == 0 { + return []types.Result{}, nil + } + + // Call reranker + rerankResults, err := c.rerankDocuments(ctx, query, documents, topN) + if err != nil { + // Fallback to combined score approach if reranker fails + xlog.Warn("Reranker API call failed, falling back to combined score", "error", err) + return c.fallbackToCombinedScore(chromemResults, bleveSearchResult, topN) + } + + // Map reranker results back to document IDs + results := make([]types.Result, 0, len(rerankResults)) + for _, rerankResult := range rerankResults { + if rerankResult.Index >= 0 && rerankResult.Index < len(candidateList) { + result := candidateList[rerankResult.Index] + result.Similarity = float32(rerankResult.RelevanceScore) + results = append(results, result) + } } return results, nil } + +// fallbackToCombinedScore implements the original combined score approach as fallback +func (c *ChromemDB) fallbackToCombinedScore(chromemResults []chromem.Result, bleveSearchResult *bleve.SearchResult, similarEntries int) ([]types.Result, error) { + // Create maps to combine results + chromemMap := make(map[string]types.Result) + bleveMap := make(map[string]float64) + + // Map chromem results by ID + for _, r := range chromemResults { + chromemMap[r.ID] = types.Result{ + ID: r.ID, + Metadata: r.Metadata, + Content: r.Content, + Similarity: r.Similarity, + } + } + + // Map bleve results by ID if available + if bleveSearchResult != nil { + for _, hit := range bleveSearchResult.Hits { + normalizedScore := hit.Score + if normalizedScore > 1.0 { + normalizedScore = 1.0 + } + bleveMap[hit.ID] = normalizedScore + } + } + + // Combine results: merge by ID and calculate combined score + combinedResults := make(map[string]types.Result) + + // Process chromem results + for id, result := range chromemMap { + combinedResult := result + bleveScore, hasBleve := bleveMap[id] + + if hasBleve { + // Both chromem and bleve have this result - combine scores + combinedScore := (float64(bleveScore) * c.bm25Weight) + (float64(result.Similarity) * c.vectorWeight) + combinedResult.Similarity = float32(combinedScore) + } else { + // Only chromem has this result - use vector weight only + combinedResult.Similarity = result.Similarity * float32(c.vectorWeight) + } + + combinedResults[id] = combinedResult + } + + // Process bleve-only results if available + if bleveSearchResult != nil { + for _, hit := range bleveSearchResult.Hits { + id := hit.ID + if _, exists := chromemMap[id]; !exists { + var content, title string + var metadata map[string]string + + if contentVal, ok := hit.Fields["content"]; ok { + if str, ok := contentVal.(string); ok { + content = str + } else if arr, ok := contentVal.([]interface{}); ok && len(arr) > 0 { + if str, ok := arr[0].(string); ok { + content = str + } + } + } + if titleVal, ok := hit.Fields["title"]; ok { + if str, ok := titleVal.(string); ok { + title = str + } else if arr, ok := titleVal.([]interface{}); ok && len(arr) > 0 { + if str, ok := arr[0].(string); ok { + title = str + } + } + } + if metadataVal, ok := hit.Fields["metadata"]; ok { + if str, ok := metadataVal.(string); ok { + if err := json.Unmarshal([]byte(str), &metadata); err != nil { + metadata = make(map[string]string) + } + } else if arr, ok := metadataVal.([]interface{}); ok && len(arr) > 0 { + if str, ok := arr[0].(string); ok { + if err := json.Unmarshal([]byte(str), &metadata); err != nil { + metadata = make(map[string]string) + } + } + } + } + + if metadata == nil { + metadata = make(map[string]string) + } + if title != "" { + metadata["title"] = title + } + + bleveScore := bleveMap[id] + combinedScore := bleveScore * c.bm25Weight + combinedResults[id] = types.Result{ + ID: id, + Content: content, + Metadata: metadata, + Similarity: float32(combinedScore), + } + } + } + } + + // Convert to slice and sort by combined score + results := make([]types.Result, 0, len(combinedResults)) + for _, result := range combinedResults { + results = append(results, result) + } + + // Sort by similarity descending + for i := 0; i < len(results)-1; i++ { + for j := i + 1; j < len(results); j++ { + if results[i].Similarity < results[j].Similarity { + results[i], results[j] = results[j], results[i] + } + } + } + + // Limit to requested number + if len(results) > similarEntries { + results = results[:similarEntries] + } + + return results, nil +} + +func (c *ChromemDB) Search(s string, similarEntries int) ([]types.Result, error) { + ctx := context.Background() + + // Get vector similarity results from chromem + chromemResults, err := c.collection.Query(ctx, s, similarEntries*2, nil, nil) + if err != nil { + return nil, err + } + + // If bleve is not available, check if we should use reranker or return chromem results only + if c.bleveIndex == nil { + // If reranker is configured, we still need to collect candidates for reranking + // But without bleve, we only have chromem results + if c.rerankerModel != "" && c.localAIBaseURL != "" { + // Use reranker with chromem-only results + return c.searchWithReranker(ctx, s, similarEntries, chromemResults, nil) + } + + // No reranker, no bleve - return chromem results only + results := make([]types.Result, 0, len(chromemResults)) + for _, r := range chromemResults { + results = append(results, types.Result{ + ID: r.ID, + Metadata: r.Metadata, + Content: r.Content, + Similarity: r.Similarity, + }) + } + // Limit to requested number + if len(results) > similarEntries { + results = results[:similarEntries] + } + return results, nil + } + + // Perform full-text search with bleve + query := bleve.NewMatchQuery(s) + searchRequest := bleve.NewSearchRequest(query) + searchRequest.Size = similarEntries * 2 + searchRequest.Fields = []string{"content", "title", "metadata"} + searchRequest.IncludeLocations = false + + bleveSearchResult, err := c.bleveIndex.Search(searchRequest) + if err != nil { + // Fallback to chromem-only search if bleve fails + xlog.Warn("Bleve search failed, falling back to chromem-only", "error", err) + // Check if reranker is configured for chromem-only fallback + if c.rerankerModel != "" && c.localAIBaseURL != "" { + return c.searchWithReranker(ctx, s, similarEntries, chromemResults, nil) + } + results := make([]types.Result, 0, len(chromemResults)) + for _, r := range chromemResults { + results = append(results, types.Result{ + ID: r.ID, + Metadata: r.Metadata, + Content: r.Content, + Similarity: r.Similarity, + }) + } + if len(results) > similarEntries { + results = results[:similarEntries] + } + return results, nil + } + + // Check if reranker is configured - if so, use reranker instead of combined scores + if c.rerankerModel != "" && c.localAIBaseURL != "" { + return c.searchWithReranker(ctx, s, similarEntries, chromemResults, bleveSearchResult) + } + + // No reranker - use combined score approach (existing behavior) + return c.fallbackToCombinedScore(chromemResults, bleveSearchResult, similarEntries) +} diff --git a/rag/engine/chromem_test.go b/rag/engine/chromem_test.go index b1bd55e..b68a711 100644 --- a/rag/engine/chromem_test.go +++ b/rag/engine/chromem_test.go @@ -233,4 +233,321 @@ var _ = Describe("ChromemDB", func() { Expect(dims).To(BeNumerically(">", 0)) }) }) + + Describe("Bleve Integration and Hybrid Search", func() { + var db *ChromemDB + + BeforeEach(func() { + var err error + db, err = NewChromemDBCollection(collectionName, tempDir, openaiClient, "granite-embedding-107m-multilingual") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should create bleve index during initialization", func() { + // Verify bleve index was created (may be nil if creation failed, but that's ok) + // The important thing is that initialization doesn't fail + Expect(db).ToNot(BeNil()) + }) + + It("should store documents in both chromem and bleve", func() { + result, err := db.Store("The quick brown fox jumps over the lazy dog", map[string]string{ + "title": "Fox Story", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(result.ID).ToNot(BeEmpty()) + + // Verify document can be retrieved (chromem) + doc, err := db.GetByID(result.ID) + Expect(err).ToNot(HaveOccurred()) + Expect(doc.Content).To(ContainSubstring("fox")) + + // If bleve is available, search should work with both + results, err := db.Search("fox", 1) + Expect(err).ToNot(HaveOccurred()) + Expect(len(results)).To(BeNumerically(">=", 1)) + }) + + It("should perform hybrid search combining bleve and chromem", func() { + // Store multiple documents with different content + _, err := db.Store("Python is a programming language", map[string]string{ + "title": "Python Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + _, err = db.Store("Go is a programming language developed by Google", map[string]string{ + "title": "Go Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + _, err = db.Store("JavaScript is used for web development", map[string]string{ + "title": "JavaScript Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + // Search for "programming" - should find Python and Go + results, err := db.Search("programming", 3) + Expect(err).ToNot(HaveOccurred()) + Expect(len(results)).To(BeNumerically(">=", 2)) + + // Results should have similarity scores + for _, result := range results { + Expect(result.Similarity).To(BeNumerically(">=", 0)) + } + }) + + It("should handle search with keyword matching (bleve) and semantic matching (chromem)", func() { + // Store documents + _, err := db.Store("The cat sat on the mat", map[string]string{ + "title": "Cat Story", + }) + Expect(err).ToNot(HaveOccurred()) + + _, err = db.Store("A feline creature rested on a rug", map[string]string{ + "title": "Feline Story", + }) + Expect(err).ToNot(HaveOccurred()) + + // Search for "cat" - should find both via hybrid search + // First document has exact keyword match (bleve) + // Second document has semantic match (chromem) + results, err := db.Search("cat", 2) + Expect(err).ToNot(HaveOccurred()) + Expect(len(results)).To(BeNumerically(">=", 1)) + }) + + It("should reset both chromem and bleve indexes", func() { + // Store documents + _, err := db.Store("Document 1", map[string]string{}) + Expect(err).ToNot(HaveOccurred()) + _, err = db.Store("Document 2", map[string]string{}) + Expect(err).ToNot(HaveOccurred()) + + Expect(db.Count()).To(Equal(2)) + + // Reset + err = db.Reset() + Expect(err).ToNot(HaveOccurred()) + + // Verify both are cleared + Expect(db.Count()).To(Equal(0)) + + // Verify search returns no results + results, err := db.Search("Document", 10) + Expect(err).ToNot(HaveOccurred()) + Expect(len(results)).To(Equal(0)) + }) + + It("should delete from both chromem and bleve", func() { + // Store a document + result, err := db.Store("Document to delete", map[string]string{ + "category": "test", + }) + Expect(err).ToNot(HaveOccurred()) + + // Verify it exists + doc, err := db.GetByID(result.ID) + Expect(err).ToNot(HaveOccurred()) + Expect(doc.Content).To(ContainSubstring("delete")) + + // Delete by ID + err = db.Delete(map[string]string{}, map[string]string{}, result.ID) + Expect(err).ToNot(HaveOccurred()) + + // Verify it's deleted from chromem + _, err = db.GetByID(result.ID) + Expect(err).To(HaveOccurred()) + + // Verify it's not in search results + results, err := db.Search("delete", 10) + Expect(err).ToNot(HaveOccurred()) + found := false + for _, r := range results { + if r.ID == result.ID { + found = true + break + } + } + Expect(found).To(BeFalse()) + }) + + It("should store multiple documents in both indexes", func() { + results, err := db.StoreDocuments( + []string{ + "First document about programming", + "Second document about coding", + "Third document about software", + }, + map[string]string{"category": "tech"}, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(3)) + + // Verify all are searchable + searchResults, err := db.Search("programming", 10) + Expect(err).ToNot(HaveOccurred()) + Expect(len(searchResults)).To(BeNumerically(">=", 1)) + }) + + It("should work with chromem-only when bleve is unavailable", func() { + // This test verifies backward compatibility + // Even if bleve fails to initialize, chromem should still work + _, err := db.Store("Test document", map[string]string{}) + Expect(err).ToNot(HaveOccurred()) + + results, err := db.Search("Test", 1) + Expect(err).ToNot(HaveOccurred()) + Expect(len(results)).To(BeNumerically(">=", 1)) + }) + }) + + Describe("Reranker Integration", func() { + var db *ChromemDB + var originalRerankerModel string + var originalBaseURL string + var originalAPIKey string + + BeforeEach(func() { + // Save original environment variables + originalRerankerModel = os.Getenv("RERANKER_MODEL") + originalBaseURL = os.Getenv("OPENAI_BASE_URL") + originalAPIKey = os.Getenv("OPENAI_API_KEY") + + var err error + db, err = NewChromemDBCollection(collectionName, tempDir, openaiClient, "granite-embedding-107m-multilingual") + Expect(err).ToNot(HaveOccurred()) + }) + + AfterEach(func() { + // Restore original environment variables + if originalRerankerModel != "" { + os.Setenv("RERANKER_MODEL", originalRerankerModel) + } else { + os.Unsetenv("RERANKER_MODEL") + } + if originalBaseURL != "" { + os.Setenv("OPENAI_BASE_URL", originalBaseURL) + } else { + os.Unsetenv("OPENAI_BASE_URL") + } + if originalAPIKey != "" { + os.Setenv("OPENAI_API_KEY", originalAPIKey) + } else { + os.Unsetenv("OPENAI_API_KEY") + } + }) + + It("should use combined score when reranker is not configured", func() { + // Store documents + _, err := db.Store("Python programming language", map[string]string{ + "title": "Python Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + _, err = db.Store("Go programming language", map[string]string{ + "title": "Go Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + // Search should work with combined scores (no reranker) + results, err := db.Search("programming", 2) + Expect(err).ToNot(HaveOccurred()) + Expect(len(results)).To(BeNumerically(">=", 1)) + + // Results should have similarity scores + for _, result := range results { + Expect(result.Similarity).To(BeNumerically(">=", 0)) + } + }) + + It("should fallback to combined score when reranker fails", func() { + // Set invalid reranker configuration to trigger fallback + os.Setenv("RERANKER_MODEL", "invalid-model") + os.Setenv("OPENAI_BASE_URL", "http://invalid-url:9999") + + // Recreate DB with new config + var err error + db, err = NewChromemDBCollection(collectionName, tempDir, openaiClient, "granite-embedding-107m-multilingual") + Expect(err).ToNot(HaveOccurred()) + + // Store documents + _, err = db.Store("Test document about programming", map[string]string{}) + Expect(err).ToNot(HaveOccurred()) + + // Search should fallback to combined score approach + results, err := db.Search("programming", 1) + Expect(err).ToNot(HaveOccurred()) + // Should still return results (fallback works) + Expect(results).ToNot(BeNil()) + }) + + It("should work with reranker when properly configured", func() { + // Get LocalAI endpoint from test setup + localAIEndpoint := os.Getenv("LOCALAI_ENDPOINT") + if localAIEndpoint == "" { + localAIEndpoint = "http://localhost:8081" + } + + // Set reranker configuration (if available) + os.Setenv("RERANKER_MODEL", "jina-reranker-v1-base-en") + os.Setenv("OPENAI_BASE_URL", localAIEndpoint) + os.Setenv("OPENAI_API_KEY", "sk-test") + + // Recreate DB with reranker config + var err error + db, err = NewChromemDBCollection(collectionName, tempDir, openaiClient, "granite-embedding-107m-multilingual") + Expect(err).ToNot(HaveOccurred()) + + // Store documents + _, err = db.Store("Python is a programming language", map[string]string{ + "title": "Python Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + _, err = db.Store("Go is a programming language developed by Google", map[string]string{ + "title": "Go Guide", + }) + Expect(err).ToNot(HaveOccurred()) + + // Search - will attempt to use reranker, but may fallback if reranker not available + results, err := db.Search("programming", 2) + Expect(err).ToNot(HaveOccurred()) + // Should return results (either from reranker or fallback) + Expect(len(results)).To(BeNumerically(">=", 1)) + + // Results should have similarity scores + for _, result := range results { + Expect(result.Similarity).To(BeNumerically(">=", 0)) + } + }) + + It("should respect top_n limit when using reranker", func() { + // Get LocalAI endpoint from test setup + localAIEndpoint := os.Getenv("LOCALAI_ENDPOINT") + if localAIEndpoint == "" { + localAIEndpoint = "http://localhost:8081" + } + + // Set reranker configuration + os.Setenv("RERANKER_MODEL", "jina-reranker-v1-base-en") + os.Setenv("OPENAI_BASE_URL", localAIEndpoint) + os.Setenv("OPENAI_API_KEY", "sk-test") + + // Recreate DB with reranker config + var err error + db, err = NewChromemDBCollection(collectionName, tempDir, openaiClient, "granite-embedding-107m-multilingual") + Expect(err).ToNot(HaveOccurred()) + + // Store multiple documents + for i := 0; i < 5; i++ { + _, err = db.Store(fmt.Sprintf("Document %d about programming", i), map[string]string{}) + Expect(err).ToNot(HaveOccurred()) + } + + // Search with limit + results, err := db.Search("programming", 3) + Expect(err).ToNot(HaveOccurred()) + // Should respect the limit (or return what's available) + Expect(len(results)).To(BeNumerically("<=", 3)) + }) + }) }) diff --git a/routes.go b/routes.go index a124fa1..acfc6c9 100644 --- a/routes.go +++ b/routes.go @@ -182,9 +182,9 @@ func deleteEntryFromCollection(collections collectionList) func(c echo.Context) remainingEntries := collection.ListDocuments() response := successResponse("Entry deleted successfully", map[string]interface{}{ - "deleted_entry": r.Entry, + "deleted_entry": r.Entry, "remaining_entries": remainingEntries, - "entry_count": len(remainingEntries), + "entry_count": len(remainingEntries), }) return c.JSON(http.StatusOK, response) } @@ -321,8 +321,8 @@ func uploadFile(collections collectionList, fileAssets string) func(c echo.Conte } response := successResponse("File uploaded successfully", map[string]interface{}{ - "filename": file.Filename, - "collection": name, + "filename": file.Filename, + "collection": name, "uploaded_at": time.Now().Format(time.RFC3339), }) return c.JSON(http.StatusOK, response)