Skip to content

Regression: Adding a lot of files to MFS will slow ipfs down significantly #8694

@RubenKelevra

Description

@RubenKelevra

Checklist

Installation method

built from source

Version

go-ipfs version: 0.13.0-dev-2a871ef01
Repo version: 12
System version: amd64/linux
Golang version: go1.17.6

Config

{
  "API": {
    "HTTPHeaders": {}
  },
  "Addresses": {
    "API": "/ip4/127.0.0.1/tcp/5001",
    "Announce": [],
    "AppendAnnounce": null,
    "Gateway": "/ip4/127.0.0.1/tcp/80",
    "NoAnnounce": [
      "/ip4/10.0.0.0/ipcidr/8",
      "/ip4/100.64.0.0/ipcidr/10",
      "/ip4/169.254.0.0/ipcidr/16",
      "/ip4/172.16.0.0/ipcidr/12",
      "/ip4/192.0.0.0/ipcidr/24",
      "/ip4/192.0.0.0/ipcidr/29",
      "/ip4/192.0.0.8/ipcidr/32",
      "/ip4/192.0.0.170/ipcidr/32",
      "/ip4/192.0.0.171/ipcidr/32",
      "/ip4/192.0.2.0/ipcidr/24",
      "/ip4/192.168.0.0/ipcidr/16",
      "/ip4/198.18.0.0/ipcidr/15",
      "/ip4/198.51.100.0/ipcidr/24",
      "/ip4/203.0.113.0/ipcidr/24",
      "/ip4/240.0.0.0/ipcidr/4",
      "/ip6/100::/ipcidr/64",
      "/ip6/2001:2::/ipcidr/48",
      "/ip6/2001:db8::/ipcidr/32",
      "/ip6/fc00::/ipcidr/7",
      "/ip6/fe80::/ipcidr/10"
    ],
    "Swarm": [
      "/ip4/0.0.0.0/tcp/443",
      "/ip6/::/tcp/443",
      "/ip4/0.0.0.0/udp/443/quic",
      "/ip6/::/udp/443/quic"
    ]
  },
  "AutoNAT": {},
  "Bootstrap": [
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN",
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmQCU2EcMqAqQPR2i9bChDtGNJchTbq5TbXJJ16u19uLTa",
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb",
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt",
    "/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ",
    "/ip4/104.131.131.82/udp/4001/quic/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ"
  ],
  "DNS": {
    "Resolvers": null
  },
  "Datastore": {
    "BloomFilterSize": 0,
    "GCPeriod": "1h",
    "HashOnRead": false,
    "Spec": {
      "mounts": [
        {
          "child": {
            "path": "blocks",
            "shardFunc": "/repo/flatfs/shard/v1/next-to-last/2",
            "sync": false,
            "type": "flatfs"
          },
          "mountpoint": "/blocks",
          "prefix": "flatfs.datastore",
          "type": "measure"
        },
        {
          "child": {
            "compression": "none",
            "path": "datastore",
            "type": "levelds"
          },
          "mountpoint": "/",
          "prefix": "leveldb.datastore",
          "type": "measure"
        }
      ],
      "type": "mount"
    },
    "StorageGCWatermark": 90,
    "StorageMax": "500GB"
  },
  "Discovery": {
    "MDNS": {
      "Enabled": false,
      "Interval": 10
    }
  },
  "Experimental": {
    "AcceleratedDHTClient": false,
    "FilestoreEnabled": false,
    "GraphsyncEnabled": false,
    "Libp2pStreamMounting": false,
    "P2pHttpProxy": false,
    "StrategicProviding": false,
    "UrlstoreEnabled": false
  },
  "Gateway": {
    "APICommands": [],
    "HTTPHeaders": {
      "Access-Control-Allow-Headers": [
        "X-Requested-With",
        "Range",
        "User-Agent"
      ],
      "Access-Control-Allow-Methods": [
        "GET"
      ],
      "Access-Control-Allow-Origin": [
        "*"
      ]
    },
    "NoDNSLink": false,
    "NoFetch": false,
    "PathPrefixes": [],
    "PublicGateways": null,
    "RootRedirect": "",
    "Writable": false
  },
  "Identity": {
    "PeerID": "xxx"
  },
  "Internal": {},
  "Ipns": {
    "RecordLifetime": "96h",
    "RepublishPeriod": "",
    "ResolveCacheSize": 2048
  },
  "Migration": {
    "DownloadSources": null,
    "Keep": ""
  },
  "Mounts": {
    "FuseAllowOther": false,
    "IPFS": "/ipfs",
    "IPNS": "/ipns"
  },
  "Peering": {
    "Peers": null
  },
  "Pinning": {},
  "Plugins": {
    "Plugins": null
  },
  "Provider": {
    "Strategy": ""
  },
  "Pubsub": {
    "DisableSigning": false,
    "Router": "gossipsub"
  },
  "Reprovider": {
    "Interval": "12h",
    "Strategy": "all"
  },
  "Routing": {
    "Type": "dhtserver"
  },
  "Swarm": {
    "AddrFilters": [
      "/ip4/10.0.0.0/ipcidr/8",
      "/ip4/100.64.0.0/ipcidr/10",
      "/ip4/169.254.0.0/ipcidr/16",
      "/ip4/172.16.0.0/ipcidr/12",
      "/ip4/192.0.0.0/ipcidr/24",
      "/ip4/192.0.0.0/ipcidr/29",
      "/ip4/192.0.0.8/ipcidr/32",
      "/ip4/192.0.0.170/ipcidr/32",
      "/ip4/192.0.0.171/ipcidr/32",
      "/ip4/192.0.2.0/ipcidr/24",
      "/ip4/192.168.0.0/ipcidr/16",
      "/ip4/198.18.0.0/ipcidr/15",
      "/ip4/198.51.100.0/ipcidr/24",
      "/ip4/203.0.113.0/ipcidr/24",
      "/ip4/240.0.0.0/ipcidr/4",
      "/ip6/100::/ipcidr/64",
      "/ip6/2001:2::/ipcidr/48",
      "/ip6/2001:db8::/ipcidr/32",
      "/ip6/fc00::/ipcidr/7",
      "/ip6/fe80::/ipcidr/10"
    ],
    "ConnMgr": {
      "GracePeriod": "3m",
      "HighWater": 700,
      "LowWater": 500,
      "Type": "basic"
    },
    "DisableBandwidthMetrics": false,
    "DisableNatPortMap": true,
    "RelayClient": {},
    "RelayService": {},
    "Transports": {
      "Multiplexers": {},
      "Network": {
        "QUIC": false
      },
      "Security": {}
    }
  }
}

Description

I'm running 2a871ef compiled by go 1.17.6 on Arch Linux for some days on one of my servers.

I had trouble with my MFS datastore after updating (I couldn't delete a file). So I reset my datastore and started importing the data again.

I'm using a shell script that adds the files and folders individually. Because of #7532, I can't use ipfs files write but instead use ipfs add, followed by an ipfs files cp /ipfs/$cid /path/to/file and an ipfs pin rm $cid.

For the ipfs add is set size-65536 as the chunker, blake2b-256 as the hashing algorithm, and use raw-leaves.


After the 3 days, there was basically no IO on the machine and ipfs was using around 1.6 cores pretty consistently without any progress real progress. At that time only this one script was running against the API with no concurrency. The automatic garbage collector of ipfs is off.

There are no experimental settings activated and I'm using flatfs.

I did some debugging, all operations were still working, just extremely slow:

$ time /usr/sbin/ipfs --api=/ip4/127.0.0.1/tcp/5001 files stat --hash --offline /x86-64.archlinux.pkg.pacman.store/community
bafybeianfwoujqfauris6eci6nclgng72jttdp5xtyeygmkivzyss4xhum

real	0m59.164s
user	0m0.299s
sys	0m0.042s

and

$ time /usr/sbin/ipfs --api=/ip4/127.0.0.1/tcp/5001 files stat --hash --offline --with-local /x86-64.archlinux.pkg.pacman.store/community
bafybeie5kkzcg6ftmppbuauy3tgtx2f4gyp7nhfdfsveca7loopufbijxu
Local: 20 GB of 20 GB (100.00%)

real	4m55.298s
user	0m0.378s
sys	0m0.031s

This is while my script was still running on the API and waiting minutes on each response.

Here's my memory dump etc. while the issue occurred: /ipfs/QmPJ1ec2CywWLFeaHFaTeo6g56S5Bqi3g3MEF1a3JrL8zk

Here's a dump after I stopped the import of files and the CPU usage dropped down to like 0.3 cores: /ipfs/QmbotJhgzc2SBxuvGA9dsCFLbxd836QBNFYkLhdqTCZwrP

Here's what the memory looked like as the issue occurred (according to atop 1):

MEM
tot    31.4G
free    6.6G
cache   1.1G
dirty   0.1M
buff   48.9M
slab    7.1G
slrec   3.7G
shmem   2.0M
shrss   0.0M
vmbal   0.0M
zfarc  15.6G
hptot   0.0M
hpuse   0.0M

The machine got 10 dedicated cores from a AMD EPYC 7702 and 1 TB SSD storage via NAS.

Metadata

Metadata

Assignees

No one assigned

    Labels

    P1High: Likely tackled by core team if no one steps upkind/bugA bug in existing code (including security flaws)need/analysisNeeds further analysis before proceedingtopic/MFSTopic MFStopic/shardingTopic about Sharding (HAMT etc)

    Type

    No type

    Projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions