-
Notifications
You must be signed in to change notification settings - Fork 680
Description
Bug Report
Noticed during one of my audits and security vulnerability hunts that this code
Lines 34 to 39 in f4d73cd
func NewPeerSet() *PeerSet { | |
return &PeerSet{ | |
lookup: make(map[ID]*peerSetItem), | |
list: make([]Peer, 0, 256), | |
} | |
} |
Line 57 in f4d73cd
ps.list = append(ps.list, peer) |
However, take a look at .Remove
Lines 123 to 140 in f4d73cd
// Create a new copy of the list but with one less item. | |
// (we must copy because we'll be mutating the list). | |
newList := make([]Peer, len(ps.list)-1) | |
copy(newList, ps.list) | |
// If it's the last peer, that's an easy special case. | |
if index == len(ps.list)-1 { | |
ps.list = newList | |
delete(ps.lookup, peer.ID()) | |
return true | |
} | |
// Replace the popped item with the last item in the old list. | |
lastPeer := ps.list[len(ps.list)-1] | |
lastPeerKey := lastPeer.ID() | |
lastPeerItem := ps.lookup[lastPeerKey] | |
newList[index] = lastPeer | |
lastPeerItem.index = index | |
ps.list = newList |
This code has been like this since 8 years ago, essentially the birth of Tendermint per first commit abc3a2c
Remedy
Given that that code hasn't been touched since Tendermint's first commit, we can deduce that perhaps it was that we didn't understand Go more, because re-slicing is a common operation in Go per https://go.dev/blog/slices-intro
PeerSet.Remove should simply use re-slicing like this
diff --git a/p2p/peer_set.go b/p2p/peer_set.go
index ba43f6417..4f1501d31 100644
--- a/p2p/peer_set.go
+++ b/p2p/peer_set.go
@@ -119,25 +119,25 @@ func (ps *PeerSet) Remove(peer Peer) bool {
return false
}
+ if len(ps.list) == 0 {
+ return false
+ }
+
index := item.index
- // Create a new copy of the list but with one less item.
- // (we must copy because we'll be mutating the list).
- newList := make([]Peer, len(ps.list)-1)
- copy(newList, ps.list)
- // If it's the last peer, that's an easy special case.
+ lastPeer := ps.list[len(ps.list)-1]
+ ps.list[len(ps.list)-1] = nil // nil the array's entry to ensure it isn't reachable in the array, hence can be GC'd.
if index == len(ps.list)-1 {
- ps.list = newList
+ ps.list = ps.list[:len(ps.list)-1]
delete(ps.lookup, peer.ID())
return true
}
// Replace the popped item with the last item in the old list.
- lastPeer := ps.list[len(ps.list)-1]
lastPeerKey := lastPeer.ID()
lastPeerItem := ps.lookup[lastPeerKey]
- newList[index] = lastPeer
+ ps.list[index] = lastPeer
+ ps.list = ps.list[:len(ps.list)-1]
lastPeerItem.index = index
- ps.list = newList
delete(ps.lookup, peer.ID())
return true
}
which produces these performance improvements
we get back these improved performance
$ benchstat before.txt after.txt
name old time/op new time/op delta
PeerSetRemoveOne-8 100µs ± 9% 96µs ±13% ~ (p=0.218 n=10+10)
PeerSetRemoveMany-8 1.56ms ± 2% 1.50ms ± 1% -3.76% (p=0.000 n=9+8)
name old alloc/op new alloc/op delta
PeerSetRemoveOne-8 8.48kB ± 0% 7.92kB ± 0% -6.60% (p=0.000 n=9+10)
PeerSetRemoveMany-8 149kB ± 0% 65kB ± 0% -56.44% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
PeerSetRemoveOne-8 85.0 ± 0% 73.0 ± 0% -14.12% (p=0.000 n=10+10)
PeerSetRemoveMany-8 1.32k ± 0% 1.22k ± 0% -7.51% (p=0.000 n=10+10)
that are reproducible from down below:
Exhibit
If we run such a benchmark
package p2p
import (
"testing"
)
var sink any = nil
func BenchmarkPeerSetRemoveOne(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
testPeerSetAddRemoveOne(b)
sink = i
}
if sink == nil {
b.Fatal("Benchmark did not run!")
}
// Reset the sink.
sink = nil
}
func BenchmarkPeerSetRemoveMany(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
testPeerSetAddRemoveMany(b)
sink = i
}
if sink == nil {
b.Fatal("Benchmark did not run!")
}
// Reset the sink.
sink = nil
}
func testPeerSetAddRemoveOne(tb testing.TB) {
peerSet := NewPeerSet()
var peerList []Peer
for i := 0; i < 5; i++ {
p := newMockPeer(net.IP{127, 0, 0, byte(i)})
if err := peerSet.Add(p); err != nil {
tb.Error(err)
}
peerList = append(peerList, p)
}
n := len(peerList)
// 1. Test removing from the front
for i, peerAtFront := range peerList {
removed := peerSet.Remove(peerAtFront)
assert.True(tb, removed)
wantSize := n - i - 1
for j := 0; j < 2; j++ {
assert.Equal(tb, false, peerSet.Has(peerAtFront.ID()), "#%d Run #%d: failed to remove peer", i, j)
assert.Equal(tb, wantSize, peerSet.Size(), "#%d Run #%d: failed to remove peer and decrement size", i, j)
// Test the route of removing the now non-existent element
removed := peerSet.Remove(peerAtFront)
assert.False(tb, removed)
}
}
// 2. Next we are testing removing the peer at the end
// a) Replenish the peerSet
for _, peer := range peerList {
if err := peerSet.Add(peer); err != nil {
tb.Error(err)
}
}
// b) In reverse, remove each element
for i := n - 1; i >= 0; i-- {
peerAtEnd := peerList[i]
removed := peerSet.Remove(peerAtEnd)
assert.True(tb, removed)
assert.Equal(tb, false, peerSet.Has(peerAtEnd.ID()), "#%d: failed to remove item at end", i)
assert.Equal(tb, i, peerSet.Size(), "#%d: differing sizes after peerSet.Remove(atEndPeer)", i)
}
}
func testPeerSetAddRemoveMany(tb testing.TB) {
peerSet := NewPeerSet()
peers := []Peer{}
N := 100
for i := 0; i < N; i++ {
peer := newMockPeer(net.IP{127, 0, 0, byte(i)})
if err := peerSet.Add(peer); err != nil {
tb.Errorf("failed to add new peer")
}
if peerSet.Size() != i+1 {
tb.Errorf("failed to add new peer and increment size")
}
peers = append(peers, peer)
}
for i, peer := range peers {
removed := peerSet.Remove(peer)
assert.True(tb, removed)
if peerSet.Has(peer.ID()) {
tb.Errorf("failed to remove peer")
}
if peerSet.Size() != len(peers)-i-1 {
tb.Errorf("failed to remove peer and decrement size")
}
}
}
FYI & security considerations
There is a possible security issue/vector due to this behavior but that requires a whole new discussion and a different mitigation that would be slightly mitigated by the fix I suggested above, but to completely solve it, it would require a design addition which I I privately communicated.
Kindly cc-ing @elias-orijtech @ValarDragon