概述
Gin的路由算法是采用压缩字典树实现的,基数树(Radix Tree)又称为PAT位树(Patricia Trie or crit bit tree),是一种更节省空间的前缀树(Trie Tree)。对于基数树的每个节点,如果该节点是唯一的子树的话,就和父节点合并。下图为一个基数树示例:
https://www.cnblogs.com/randysun/p/15841366.html
算法模拟
结构体组成:
type TrieNode struct {
indices string
value string
children []*TrieNode
}
func NewTrieTree() *TrieNode {
return &TrieNode{}
}
新增节点,主要考虑的难点是存在部分共同前缀时需要进行分裂:
func (n *TrieNode) InsertNode(str string) (inserted bool) {
lenn := len(n.value)
if lenn < len(str) && n.value == str[0:lenn] || n.value == "" {
c := str[lenn]
if strings.IndexByte(n.indices, c) == -1 {
n.indices += string(c)
newNode := TrieNode{
indices: "",
value: str[lenn:],
children: nil,
}
if n.children == nil {
n.children = []*TrieNode{
&newNode,
}
} else {
n.children = append(n.children, &newNode)
}
return true
} else if n.children != nil {
for _, child := range n.children {
strSub := str[lenn:]
if strSub[0] != child.value[0] {
continue
}
inserted = child.InsertNode(strSub)
if inserted {
return
}
}
}
}
index := FindPrefix(n.value, str)
if index != -1 {
i := index + 1
p := n.value[0:i]
s := n.value[i:]
s2 := str[i:]
n.value = p
newNode := TrieNode{
indices: n.indices,
value: s,
children: n.children,
}
newNode2 := TrieNode{
indices: "",
value: s2,
children: nil,
}
n.indices = s[0:1]
s2Idx := s2[0:1]
if n.indices != s2Idx {
n.indices += s2Idx
}
n.children = []*TrieNode{
&newNode, &newNode2,
}
return true
}
return false
}
节点查找:递归版本
func (n *TrieNode) FindNode(str string) (found bool) {
if str == "" {
return false
}
if str == n.value {
return true
}
lenn := len(n.value)
if len(str) > lenn && str[0:lenn] == n.value && n.children != nil {
str = str[lenn:]
idx := str[0]
if strings.IndexByte(n.indices, idx) != -1 {
for _, child := range n.children {
if child.value[0] == idx {
if child.value == str {
return true
}
found = child.FindNode(str)
if found {
return
}
} else {
continue
}
}
}
}
return
}
格式化打印树:
// FormatTree 格式化树结构
// --
// |__ a
// |__ bd
// |__ d
func (n *TrieNode) FormatTree() string {
var buf bytes.Buffer
if n.value == "" {
buf.WriteString("\n-- \n")
} else {
buf.WriteString("|__ " + n.value + "\n")
}
if n.children != nil {
for _, child := range n.children {
childStr := child.FormatTree()
// 增加前缀
// 分割行
sps := strings.Split(childStr, "\n")
for _, sp := range sps {
if sp != "" {
buf.WriteString(" ")
buf.WriteString(sp)
buf.WriteString("\n")
}
}
}
}
return buf.String()
}
非递归版本查找算法:
func (n *TrieNode) FindNode2(str string) (found bool) {
nowNode := n
walk:
if str == "" {
return false
}
if str == nowNode.value {
return true
}
lenn := len(nowNode.value)
if len(str) > lenn && str[0:lenn] == nowNode.value && nowNode.children != nil {
str = str[lenn:]
idx := str[0]
if strings.IndexByte(nowNode.indices, idx) != -1 {
for _, child := range nowNode.children {
if child.value[0] == idx {
nowNode = child
goto walk
} else {
continue
}
}
}
}
return
}
算法性能测试和对比
新增节点的耗时和内存消耗对比,因为gin的节点包含更多信息,所以gin的内存占用是最多的,占用内存最少的是List存储,其次是hashmap,然后是压缩字典树,为什么压缩字典树内存消耗这么多呢,很大程度是因为索引。索引使得每个节点需要存储的信息大大增加。
var size = 1000_0000
func Test_InsertNodeMem_TrieTree(t *testing.T) {
mem := runtime.MemStats{}
runtime.ReadMemStats(&mem)
startMem := mem.TotalAlloc/MiB
t.Logf("startMem usage:%d MB", startMem)
tree := NewTrieTree()
for i := 0; i < size; i++ {
str := strconv.Itoa(rand.Int())
tree.InsertNode(str)
}
runtime.ReadMemStats(&mem)
curMem := mem.TotalAlloc/MiB - startMem
t.Logf("memory usage:%d MB", curMem)
}
func Test_InsertNodeMem_HashMap(t *testing.T) {
mem := runtime.MemStats{}
runtime.ReadMemStats(&mem)
startMem := mem.TotalAlloc/MiB
t.Logf("startMem usage:%d MB", startMem)
m := make(map[string]bool, size)
for i := 0; i < size; i++ {
str := strconv.Itoa(rand.Int())
m[str] = true
}
runtime.ReadMemStats(&mem)
curMem := mem.TotalAlloc/MiB - startMem
t.Logf("memory usage:%d MB", curMem)
}
func Test_InsertNodeMem_List(t *testing.T) {
mem := runtime.MemStats{}
runtime.ReadMemStats(&mem)
startMem := mem.TotalAlloc/MiB
t.Logf("startMem usage:%d MB", startMem)
l := make([]string, size)
for i := 0; i < size; i++ {
str := strconv.Itoa(rand.Int())
l[i] = str
}
runtime.ReadMemStats(&mem)
curMem := mem.TotalAlloc/MiB - startMem
t.Logf("memory usage:%d MB", curMem)
}
func Test_InsertNode_GinTree(t *testing.T) {
mem := runtime.MemStats{}
runtime.ReadMemStats(&mem)
startMem := mem.TotalAlloc/MiB
t.Logf("startMem usage:%d MB", startMem)
tree := &node{}
for i := 0; i < size; i++ {
str := strconv.Itoa(rand.Int())
tree.addRoute(str, []HandlerFunc{func(*gin.Context) {}})
}
runtime.ReadMemStats(&mem)
curMem := mem.TotalAlloc/MiB - startMem
t.Logf("memory usage:%d MB", curMem)
}
对比查找性能:性能不如hashmap也情有可原,毕竟字典树的优点最主要是支持通配符路由,gin的性能不错(因为实现了优先级,查找的平均用时会更短)。但是距离hashmap还有五倍的落后。
每个树级别上的子节点都按Priority(优先级)排序,其中优先级(最左列)就是在子节点(子节点、子子节点等等)中注册的句柄的数量。这样做有两个好处:
首先优先匹配被大多数路由路径包含的节点。这样可以让尽可能多的路由快速被定位。
类似于成本补偿。最长的路径可以被优先匹配,补偿体现在最长的路径需要花费更长的时间来定位,如果最长路径的节点能被优先匹配(即每次拿子节点都命中),那么路由匹配所花的时间不一定比短路径的路由长。下面展示了节点(每个-可以看做一个节点)匹配的路径:从左到右,从上到下。
func Benchmark_FindNode(b *testing.B) {
b.StopTimer()
tree := NewTrieTree()
ginTree := &node{}
size := 1000_0000
m := make(map[string]bool, size)
l := make([]string, size)
for i := 0; i < size; i++ {
str := strconv.Itoa(rand.Int())
tree.InsertNode(str)
ginTree.addRoute(str, []HandlerFunc{func(*gin.Context) {}})
l[i] = str
m[str] = true
}
b.StartTimer()
b.Run("TrieTree", func(b *testing.B) {
for i := 0; i < b.N; i++ {
str := strconv.Itoa(rand.Int())
tree.FindNode(str)
}
})
b.Run("TrieTree2", func(b *testing.B) {
for i := 0; i < b.N; i++ {
str := strconv.Itoa(rand.Int())
tree.FindNode2(str)
}
})
b.Run("HashMap", func(b *testing.B) {
for i := 0; i < b.N; i++ {
str := strconv.Itoa(rand.Int())
_ = m[str] == true
}
})
b.Run("List", func(b *testing.B) {
for i := 0; i < b.N; i++ {
str := strconv.Itoa(rand.Int())
for j, length := 0, len(l); j < length; j++ {
if l[j] == str {
break
}
}
}
})
b.Run("Gin", func(b *testing.B) {
for i := 0; i < b.N; i++ {
str := strconv.Itoa(rand.Int())
ginTree.getValue(str, nil, &[]skippedNode{}, false)
}
})
}
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)