Skip to content

crypto/merkle: innerHash is unnecessarily inefficient by creating an entire fresh byteslice, copying just to invoke tmhash.Sum once yet it cloud invoke sha256.Hash.Write multiple times #1881

@odeke-em

Description

@odeke-em

Bug Report

Noticed while in a security audit, if we look at this code

func innerHash(left []byte, right []byte) []byte {
data := make([]byte, len(innerPrefix)+len(left)+len(right))
n := copy(data, innerPrefix)
n += copy(data[n:], left)
copy(data[n:], right)
return tmhash.Sum(data)
}

  • creates a fresh byteslice with make(...)
  • copies 3 byte slices in
  • invokes tmhash.Sum(data)

meanwhile that code could simply be self contained and much simpler and more efficient just by using crypto/sha256.New then .Write on each byte slice like this

Suggested change

// returns tmhash(0x01 || left || right).
func innerHash(left []byte, right []byte) []byte {
        h := sha256.New()
        h.Write(innerPrefix)
        h.Write(left)
        h.Write(right)
        return h.Sum(nil)
}

or as a diff

diff --git a/crypto/merkle/hash.go b/crypto/merkle/hash.go
index 2acbb1ef3..523064842 100644
--- a/crypto/merkle/hash.go
+++ b/crypto/merkle/hash.go
@@ -1,6 +1,7 @@
 package merkle
 
 import (
+	"crypto/sha256"
 	"hash"
 
 	"github.com/cometbft/cometbft/crypto/tmhash"
@@ -32,11 +33,11 @@ func leafHashOpt(s hash.Hash, leaf []byte) []byte {
 
 // returns tmhash(0x01 || left || right).
 func innerHash(left []byte, right []byte) []byte {
-	data := make([]byte, len(innerPrefix)+len(left)+len(right))
-	n := copy(data, innerPrefix)
-	n += copy(data[n:], left)
-	copy(data[n:], right)
-	return tmhash.Sum(data)
+	h := sha256.New()
+	h.Write(innerPrefix)
+	h.Write(left)
+	h.Write(right)
+	return h.Sum(nil)
 }
 
 func innerHashOpt(s hash.Hash, left []byte, right []byte) []byte {

Exhibit

Benchmarking code

package merkle

import (
	"crypto/sha256"
	"strings"
	"testing"
)

var sink any = nil

type innerHashTest struct {
	left, right string
}

var innerHashTests = []*innerHashTest{
	{"aaaaaaaaaaaaaaa", "                    "},
	{"", ""},
	{"                        ", "a    ff     b    f1    a"},
	{"ffff122fff", "ffff122fff"},
	{"😎💡✅alalalalalalalalalallalallaallalaallalalalalalalalaallalalalalalala", "😎💡✅alalalalalalalalalallalallaallalaallalalalalalalalaallalalalalalalaffff122fff"},
	{strings.Repeat("ff", 1<<10), strings.Repeat("00af", 4<<10)},
	{strings.Repeat("f", sha256.Size), strings.Repeat("00af", 10<<10)},
	{"aaaaaaaaaaaaaaaaaaaaaaaaaaaffff122fffaaaaaaaaa", "aaaaaaaaaffff1aaaaaaaaaaaaaaaaaa22fffaaaaaaaaa"},
}

func BenchmarkInnerHash(b *testing.B) {
	b.ReportAllocs()

	for i := 0; i < b.N; i++ {
		for _, tt := range innerHashTests {
			got := innerHash([]byte(tt.left), []byte(tt.right))
			if g, w := len(got), sha256.Size; g != w {
				b.Fatalf("size discrepancy: got %d, want %d", g, w)
			}
			sink = got
		}
	}

	if sink == nil {
		b.Fatal("Benchmark did not run!")
	}
}

Benchmark results

$ benchstat before.txt after.txt 
name         old time/op    new time/op    delta
InnerHash-8     357µs ± 3%     353µs ± 2%     ~     (p=0.129 n=9+10)

name         old alloc/op   new alloc/op   delta
InnerHash-8    69.1kB ± 0%    60.1kB ± 0%  -12.98%  (p=0.000 n=9+9)

name         old allocs/op  new allocs/op  delta
InnerHash-8      24.0 ± 0%      23.0 ± 0%   -4.17%  (p=0.000 n=10+10)

Have you tried the latest version: yes

/cc @elias-orijtech @thanethomson @lasarojc

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingneeds-triageThis issue/PR has not yet been triaged by the team.

    Type

    No type

    Projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions