# README
scanner
Semantic Scanner implements semantic chunking.
Quick Example
package main
import (
"bufio"
"fmt"
"os"
"strings"
"github.com/kshard/embeddings/bedrock"
"github.com/kshard/embeddings/scanner"
)
func main() {
// Semantic scanner requires text embedding model
embeddings, err := bedrock.New(
bedrock.WithModel(bedrock.TITAN_EMBED_TEXT_V2),
bedrock.WithDimension(256),
)
if err != nil {
panic(err)
}
fd, err := os.Open("path to your files")
if err != nil {
panic(err)
}
// create and config scanner instance
s := scanner.New(embeddings, scanner.NewSentences(fd))
s.Similarity(scanner.HighSimilarity)
s.Window(96)
// scan through text
for s.Scan() {
text := s.Text()
fmt.Printf("%s\n", strings.Join(text, " "))
}
if err := s.Err(); err != nil {
panic(err)
}
# Functions
Dissimilar is cosine distance (0.8, 1.0].
High Similarity is cosine distance [0, 0.2].
Medium Similarity is cosine distance (0.2, 0.5].
Creates new instance of Scanner to read from io.Reader and using embedding.
Creates instance of [bufio.Scanner] configured for naïve sentence scanning.
Creates new instance of Sorter to read from seq.Seq[T] and using embedding.
ScanSentence is a split function for a [bufio.Scanner] that returns each sentence.
Weak Similarity is cosine distance (0.5, 0.8].
# Constants
No description provided by the author
# Interfaces
Reader is an interface similar to [bufio.Scanner].