From c3ea064bf098671daf6faacf85f1abe220666a1e Mon Sep 17 00:00:00 2001 From: Matthew Rich Date: Wed, 28 Aug 2024 19:40:17 -0700 Subject: [PATCH] add trie/bst --- internal/search/binary.go | 404 +++++++++++++++++++++++++++++++++ internal/search/binary_test.go | 117 ++++++++++ internal/search/trie.go | 93 ++++++++ internal/search/trie_test.go | 16 ++ 4 files changed, 630 insertions(+) create mode 100644 internal/search/binary.go create mode 100644 internal/search/binary_test.go create mode 100644 internal/search/trie.go create mode 100644 internal/search/trie_test.go diff --git a/internal/search/binary.go b/internal/search/binary.go new file mode 100644 index 0000000..b3d5ed6 --- /dev/null +++ b/internal/search/binary.go @@ -0,0 +1,404 @@ +// Copyright 2024 Matthew Rich . All rights reserved. + +package search + +import ( + "log/slog" + "cmp" + "math" +) + +var ( +) + +const ( + BSTDATAOFFSET = 1 + BSTROOTNODEID = 0 + LEFTOFFSET = 1 + RIGHTOFFSET = 2 +) + +/* + * Binary Search Tree using a slice + */ + +type Binary[Key cmp.Ordered] []Key + +type BinaryTree[Key cmp.Ordered] struct { + *Binary[Key] + zeroIndex int + least, greatest, lastNode int +} + +func NewBinaryTree[Key cmp.Ordered](capacity int) (bst *BinaryTree[Key]) { + tree := make(Binary[Key], 1, capacity) + bst = &BinaryTree[Key]{ Binary: &tree } + return +} + +func (b *Binary[Key]) Empty() Key { + return (*b)[0] +} + +func (b *Binary[Key]) IsInternal(index int) bool { + childIdx := 2 * index + return ! b.IsEmpty(childIdx + LEFTOFFSET) && ! b.IsEmpty(childIdx + RIGHTOFFSET) + //return (*b)[leftIdx] != (*b)[0] && (*b)[leftIdx + 1] != (*b)[0] +} + +func (b *Binary[Key]) IsRight(index int) bool { + return index % 2 == 0 +} + +func (b *Binary[Key]) Left(index int) int { + return (2 * index) + LEFTOFFSET +} + +func (b *Binary[Key]) Right(index int) int { + return (2 * index) + RIGHTOFFSET +} + +func (b *Binary[Key]) Get(index int) (Key) { + return (*b)[index + BSTDATAOFFSET] +} + +func (b *Binary[Key]) Set(index int, v Key) { + (*b)[index + BSTDATAOFFSET] = v +} + +func (b *BinaryTree[Key]) Set(index int, v Key) { + b.Binary.Set(index, v) + if v == b.Empty() { + b.setZeroIndex(index) + } + b.setLastNode(index) +} + +func (b *Binary[Key]) SetEmpty(index int) { + (*b)[index + BSTDATAOFFSET] = b.Empty() +} + +func (b *BinaryTree[Key]) SetEmpty(index int) { + b.Binary.SetEmpty(index) + if index == b.zeroIndex { + b.setZeroIndex(-1) + } + b.setLastNode(index) +} + +func (b *Binary[Key]) SetEmptyRange(start int, end int) { + empty := b.Empty() + for i := start; i <= end; i++ { + (*b)[i + BSTDATAOFFSET] = empty + } +} + +func (b *BinaryTree[Key]) SetEmptyRange(start int, end int) { + b.Binary.SetEmptyRange(start, end) + if b.zeroIndex >= start && b.zeroIndex <= end { + b.setZeroIndex(-1) + } + b.setLastNode(start) +} + +func (b *BinaryTree[Key]) setZeroIndex(index int) { + b.zeroIndex = index +} + +func (b *BinaryTree[Key]) setLastNode(index int) { + b.lastNode = max(b.lastNode, index) +} + +func (b *BinaryTree[key]) LastNode() int { + return b.lastNode +} + +func (b *BinaryTree[key]) MaxDepth() int { + return b.Depth(b.lastNode) +} + +/* +func (b *Binary[Key]) Median(index int) { + + length := len(*b) +} +*/ + +func (b *Binary[Key]) Copy(from int, to int) { + (*b)[to + BSTDATAOFFSET] = (*b)[from + BSTDATAOFFSET] +} + +func (b *BinaryTree[Key]) Copy(from, to int) { + b.Binary.Copy(from, to) + if b.zeroIndex == from { + b.setZeroIndex(to) + } + slog.Info("Copy()", "from", from, "to", to, "value", b.Get(from), "zero", b.zeroIndex) +} + +func (b *Binary[Key]) LevelRange(index int, treeIndex int) (start int, end int) { + treeDepth := b.Depth(treeIndex) + depth := b.Depth(index) + diff := depth - treeDepth + switch diff { + case 0: + if index == treeIndex { + return index, index + } + case 1: + leftIdx := (2 * treeIndex) + 1 + if index >= leftIdx && index <= leftIdx + 1 { + return leftIdx, leftIdx + 1 + } + default: + exp := int(math.Exp2(float64(diff))) + level := exp * treeIndex + childOffset := exp - 1 + + left := level + childOffset + right := level + (childOffset * 2) + return left, right + } + return -1, -1 +} + +func (b *Binary[Key]) InSubTree(index, treeIndex int) bool { + if treeIndex > index { + return false + } + left, right := b.LevelRange(index, treeIndex) + if index >= left && index <= right { + return true + } + return false +} + +func (b *BinaryTree[Key]) MoveSubTree(from, to int) { + if ! b.IsLeaf(to) { + panic("this move would overwrite the target subtree") + } + srccursor := b.Left(from) + dstcursor := b.Left(to) + for srccursor < b.lastNode && srccursor < len(*b.Binary) { + srcstart, srcend := b.LevelRange(srccursor, from) + dststart, dstend := b.LevelRange(dstcursor, to) + slog.Info("MoveSubTree()", "srcstart", srcstart, "srcend", srcend, "dststart", dststart, "dstend", dstend, "source", (*b.Binary)[srcstart + BSTDATAOFFSET:srcend + BSTDATAOFFSET + 1], "dest", (*b.Binary)[dststart + BSTDATAOFFSET:dstend + BSTDATAOFFSET + 1]) + copy((*b.Binary)[dststart + BSTDATAOFFSET:dstend + BSTDATAOFFSET + 1], (*b.Binary)[srcstart + BSTDATAOFFSET:srcend + BSTDATAOFFSET + 1]) + b.SetEmptyRange(srcstart, srcend) + + srccursor = b.Left(srccursor) + dstcursor = b.Left(dstcursor) + slog.Info("MoveSubTree() post-copy", "srcstart", srcstart, "srcend", srcend, "dststart", dststart, "dstend", dstend, "data", b.Binary) + } + b.Copy(from, to) +} + +func (b *Binary[Key]) IsLeaf(index int) bool { + childIdx := 2 * (index) + return (*b)[childIdx + LEFTOFFSET + BSTDATAOFFSET] == (*b)[childIdx + RIGHTOFFSET + BSTDATAOFFSET] +} + +func (b *BinaryTree[Key]) IsLeaf(index int) bool { + childIdx := 2 * (index) + leftIdx := childIdx + LEFTOFFSET + rightIdx := childIdx + RIGHTOFFSET + return b.Get(leftIdx) == b.Get(rightIdx) && leftIdx != b.zeroIndex && rightIdx != b.zeroIndex +} + +func (b *Binary[Key]) ParentIndex(index int) (parent int) { + parent = index + if b.IsRight(index) { + parent -= RIGHTOFFSET + } else { + parent -= LEFTOFFSET + } + parent = parent / 2 + return +} + +func (b *Binary[Key]) Depth(index int) int { + return int(math.Ceil(math.Log2(float64(index + 2)))) +} + +func (b *Binary[Key]) Width(index int) int { + return int(math.Exp2(float64(b.Depth(index) - 1))) +} + +func (b *Binary[Key]) IsFullLevel(index int) bool { + width := b.Width(index) + start := width - 1 + for _, v := range (*b)[start + BSTDATAOFFSET: start + width + BSTDATAOFFSET] { + if v == b.Empty() { + return false + } + } + return true +} + +func (b *Binary[Key]) IsEmpty(index int) bool { + return (*b)[index + BSTDATAOFFSET] == b.Empty() +} + +func (b *Binary[Key]) Ascend(idx int, item Key) int { + if item < b.Get(idx) { + return b.Left(idx) + } + return b.Right(idx) +} + +func (b *Binary[Key]) Grow(index int) { + size := len(*b) - BSTDATAOFFSET + slog.Info("Grow()", "size", size, "index", index, "cap", cap(*b)) + if index >= size / 2 { + targetSize := (size + (2 * index + 1) * 3) + BSTDATAOFFSET + if cap(*b) < targetSize { + expandArray := make(Binary[Key], ((targetSize - cap(*b)) * 2) + BSTDATAOFFSET) + *b = append(*b, expandArray...) + slog.Info("Grow()", "size", size, "index", index, "targetsize", targetSize, "cap", cap(*b), "new", cap(expandArray)) + } + *b = (*b)[:targetSize] + } + slog.Info("Grow()", "size", len(*b)) +} + +func (b *BinaryTree[Key]) RotateRight(index, right, left int) int { + slog.Info("PreRotateRight", "index", index, "leftindex", left, "rightindex", right, "left", b.Get(left), "node", b.Get(index), "right", b.Get(right)) + b.Copy(index, right) + b.Copy(left, index) + b.SetEmpty(left) + slog.Info("RotateRight", "index", index, "leftindex", left, "rightindex", right, "left", b.Get(left), "node", b.Get(index), "right", b.Get(right)) + return left +} + +func (b *BinaryTree[Key]) RotateLeft(index, left, right int) int { + slog.Info("PreRotateLeft", "index", index, "leftindex", left, "rightindex", right, "left", b.Get(left), "node", b.Get(index), "right", b.Get(right)) + b.Copy(index, left) + b.Copy(right, index) + b.SetEmpty(right) + slog.Info("RotateLeft", "index", index, "leftindex", left, "rightindex", right, "left", b.Get(left), "node", b.Get(index), "right", b.Get(right)) + return right +} +/* +func (b *Binary[Key]) Insert(item Key) (idx int) { + idx = BSTROOTNODEID + + slog.Info("Insert()", "item", item) + for { + b.Grow(idx) + + if b.IsEmpty(idx) { + b.Set(idx, item) + return + } else { + rightIdx := b.Right(idx) + leftIdx := b.Left(idx) + if b.IsInternal(idx) { + + idx = b.Ascend(idx, item) + } else if b.IsLeaf(idx) { + + idx = b.Ascend(idx, item) + } else { + slog.Info("Insert()", "item", item, "index", idx) + if item < b.Get(idx) { + if ! b.IsEmpty(rightIdx) { + idx = leftIdx + } else { + if ! b.IsLeaf(leftIdx) { + panic("the left node is not a leaf") + } + idx = b.RotateRight(idx, rightIdx, leftIdx) + } + } else { + if ! b.IsEmpty(leftIdx) { + idx = rightIdx + } else { + if ! b.IsLeaf(rightIdx) { + panic("the right node is not a leaf") + } + idx = b.RotateLeft(idx, leftIdx, rightIdx) + } + } + } + } + } +} +*/ + +func (b *BinaryTree[Key]) Insert(item Key) (idx int) { + idx = BSTROOTNODEID + slog.Info("Insert()", "item", item) + + for { + b.Grow(idx) + + if b.IsEmpty(idx) { + b.Set(idx, item) + return + } else { + rightIdx := b.Right(idx) + leftIdx := b.Left(idx) + if b.IsInternal(idx) { + idx = b.Ascend(idx, item) + } else if b.IsLeaf(idx) {// && b.IsFullLevel(idx) { + /* + if ! b.IsFullLevel(idx) { + parent := b.ParentIndex(idx) + if b.IsRight(parent) { + idx = b.RotateLeft(parent, b.Left(parent), b.Right(parent)) + } else { + idx = b.RotateRight(parent, b.Right(parent), b.Left(parent)) + } + } + */ + idx = b.Ascend(idx, item) + } else { + + slog.Info("Insert()", "item", item, "index", idx) + if item < b.Get(idx) { // left + slog.Info("Insert()", "item", item, "index", idx) + if ! b.IsEmpty(rightIdx) { + idx = leftIdx + } else { + if ! b.IsLeaf(leftIdx) { + panic("the left node is not a leaf") + } + idx = b.RotateRight(idx, rightIdx, leftIdx) + } + } else { // right + if ! b.IsEmpty(leftIdx) { + idx = rightIdx + } else { + if ! b.IsLeaf(rightIdx) { + idx = rightIdx + //panic("the right node is not a leaf") + } else { + idx = b.RotateLeft(idx, leftIdx, rightIdx) + } + } + } + } + } + } +} + +type BinaryBFSItem[Key cmp.Ordered] func(index int, depth int, v Key) + +func (b *Binary[Key]) BFS(c BinaryBFSItem[Key]) (depth int) { + queue := []int{ BSTROOTNODEID } + for len(queue) > 0 { + x := queue[0] + queue = queue[1:] + depth = b.Depth(x) + c(x, depth, b.Get(x)) + leftIdx := b.Left(x) + rightIdx := b.Right(x) + if ! b.IsEmpty(leftIdx) { + queue = append(queue, leftIdx) + } + if ! b.IsEmpty(rightIdx) { + queue = append(queue, b.Right(x)) + } + } + return +} diff --git a/internal/search/binary_test.go b/internal/search/binary_test.go new file mode 100644 index 0000000..15350ac --- /dev/null +++ b/internal/search/binary_test.go @@ -0,0 +1,117 @@ +// Copyright 2024 Matthew Rich . All rights reserved. + +package search + +import ( + "github.com/stretchr/testify/assert" + "testing" + "log/slog" +) + +func TestNewBinary(t *testing.T) { + b := NewBinaryTree[int](9) + assert.NotNil(t, b) + + for _, v := range []struct { expectedindex , value int } { + { expectedindex: BSTROOTNODEID, value: 7 }, + { expectedindex: 2*BSTROOTNODEID + LEFTOFFSET, value: 4 }, + { expectedindex: 2*BSTROOTNODEID + RIGHTOFFSET, value: 9 }, + { expectedindex: 4 + RIGHTOFFSET, value: 18 }, + { expectedindex: 2 + RIGHTOFFSET, value: 5 }, + { expectedindex: 4 + RIGHTOFFSET, value: 20 }, + { expectedindex: 4 - LEFTOFFSET, value: 3 }, + { expectedindex: 8 - LEFTOFFSET, value: 2 }, + { expectedindex: 8 - LEFTOFFSET, value: 1 }, + { expectedindex: 16 - LEFTOFFSET, value: 0 }, + { expectedindex: 16 - LEFTOFFSET, value: -1 }, + } { + idx := b.Insert(v.value) + slog.Info("TestInsert()", "index", idx, "value", v, "b", b, "data", b.Binary) + assert.Equal(t, v.expectedindex, idx) + assert.Equal(t, v.value, b.Get(v.expectedindex)) + } + assert.Equal(t, 7, b.zeroIndex) + b.BFS(func(index int, depth int, v int) { + slog.Info("BFS()", "index", index, "depth", depth, "value", v, "b", b, "data", b.Binary) + assert.Equal(t, v, b.Get(index)) + }) +} + +func TestBinaryDepth(t *testing.T) { + b := NewBinaryTree[int](9) + assert.NotNil(t, b) + for _, v := range []struct{ expecteddepth, index int }{ + { expecteddepth: 1, index: 0 }, + { expecteddepth: 2, index: 1 }, + { expecteddepth: 2, index: 2 }, + { expecteddepth: 3, index: 5 }, + } { + assert.Equal(t, v.expecteddepth, b.Depth(v.index)) + } +} + +func TestBinaryWidth(t *testing.T) { + b := NewBinaryTree[int](9) + assert.NotNil(t, b) + for _, v := range []struct{ expectedwidth, index int }{ + { expectedwidth: 1, index: 0 }, + { expectedwidth: 2, index: 1 }, + { expectedwidth: 2, index: 2 }, + { expectedwidth: 4, index: 5 }, + { expectedwidth: 8, index: 8 }, + { expectedwidth: 16, index: 17 }, + } { + assert.Equal(t, v.expectedwidth, b.Width(v.index)) + } +} + +func TestBinaryInSubTree(t *testing.T) { + b := NewBinaryTree[int](9) + assert.NotNil(t, b) + for _, v := range []struct{ expected bool; tree, index, start, end int }{ + { expected: true, tree: 0, index: 0, start: 0, end: 0 }, + { expected: true, tree: 4, index: 19, start: 19, end: 22 }, + { expected: false, tree: 4, index: 13, start: -1, end: -1 }, + { expected: false, tree: 5, index: 6, start: -1, end: -1 }, + } { + start, end := b.LevelRange(v.index, v.tree) + assert.Equal(t, v.start, start) + assert.Equal(t, v.end, end) + assert.Equal(t, v.expected, b.InSubTree(v.index, v.tree)) + } +} + +func TestBinaryMoveSubTree(t *testing.T) { + expected := []int{ 0, 4, 0, 7, 0, 0, 5, 18, 0, 0, 0, 0, 0, 0, 9, 20 } + b := NewBinaryTree[int](9) + assert.NotNil(t, b) + + b.Insert(7) + b.Insert(4) + b.Insert(9) + b.Insert(18) + b.Insert(20) + b.MoveSubTree(2, 6) + b.RotateRight(0, 2, 1) + b.Insert(5) + assert.EqualValues(t, expected, (*b.Binary)[:16]) +} + +func TestBinaryMaxDepth(t *testing.T) { + //expected := []int{ 0, 4, 0, 7, 0, 0, 5, 18, 0, 0, 0, 0, 0, 0, 9, 20 } + b := NewBinaryTree[int](9) + assert.NotNil(t, b) + + b.Insert(7) + b.Insert(4) + b.Insert(9) + b.Insert(18) + b.Insert(20) + b.Insert(5) + b.Insert(11) + b.Insert(-1) + b.Insert(33) + b.Insert(15) + b.Insert(13) + slog.Info("TestBinaryMaxDepth", "maxdepth", b.MaxDepth(), "last", b.LastNode(), "data", (*b.Binary)) +} diff --git a/internal/search/trie.go b/internal/search/trie.go new file mode 100644 index 0000000..fdba017 --- /dev/null +++ b/internal/search/trie.go @@ -0,0 +1,93 @@ +// Copyright 2024 Matthew Rich . All rights reserved. + +package search + +import ( +) + +var ( + AlphabetSize uint = 26 + FirstAlphabetChar = uint8('a') +) + +type String string + +type TrieNode struct { + Children []*TrieNode + Words int +} + +func NewTrieNode() *TrieNode { + return &TrieNode{ Children: make([]*TrieNode, AlphabetSize), Words: 0 } +} + +func (r *TrieNode) Insert(key string) { + current := r + for _, c := range key { + charIndex := uint8(c) - FirstAlphabetChar + if current.Children[charIndex] == nil { + current.Children[charIndex] = NewTrieNode() + } + current = current.Children[charIndex] + } + current.Words++ +} + +func (s String) Index(root *TrieNode) { + root.Insert(string(s)) +} + +func (r *TrieNode) Search(key string) bool { + current := r + for _, c := range key { + charIndex := uint8(c) - FirstAlphabetChar + if current.Children[charIndex] == nil { + return false + } + current = current.Children[charIndex] + } + return (current.Words > 0) +} + +func (r *TrieNode) Delete(word string) bool { + current := r + var last *TrieNode = nil + var lastChar rune + for _, c := range word { + charIndex := uint8(c) - FirstAlphabetChar + if current.Children[charIndex] == nil { + return false + } else { + count := 0 + for i := uint(0); i < AlphabetSize; i++ { + if current.Children[i] != nil { + count++ + } + } + if count > 1 { + last = current + lastChar = c + } + current = current.Children[charIndex] + } + } + count := 0 + for i := uint(0); i < AlphabetSize; i++ { + if current.Children[i] != nil { + count++ + } + } + + if (count > 0) { // is prefix + current.Words-- + return true + } + + if last != nil { // shares a common prefix with other words + last.Children[lastChar] = nil + return true + } else { // does not share a common prefix + r.Children[word[0]] = nil + return true + } +} diff --git a/internal/search/trie_test.go b/internal/search/trie_test.go new file mode 100644 index 0000000..3d932b8 --- /dev/null +++ b/internal/search/trie_test.go @@ -0,0 +1,16 @@ +// Copyright 2024 Matthew Rich . All rights reserved. + +package search + +import ( + "github.com/stretchr/testify/assert" + "testing" +_ "log/slog" +) + +func TestNewTrie(t *testing.T) { + n := NewTrieNode() + assert.NotNil(t, n) +} + +