-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Description
Description
If the containerd process ended abruptly, and restarted afterwards, any previously running Containers / Pods can no longer de stopped / removed. Attempting to do so, on the next containerd restart, the following message can be seen by using crictl:
time="2021-06-18T15:51:17Z" level=fatal msg="listing pod sandboxes: rpc error: code = Unknown desc = server is not initialized yet"
ctr can still be used, and it can be used to kill the remaining tasks. Doing so, on the next containerd restart, it will become usable again.
Steps to reproduce the issue:
On windows:
# create pod / container.
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd pull docker.io/claudiubelu/pause:3.4.1
$POD_ID=(crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd runp .\pod.json)
$CONTAINER_ID=(crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd create $POD_ID .\container.json .\pod.json)
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd start $CONTAINER_ID
# before restart.
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
tskill containerd
# after restart.
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
# try to stopp and delete the container.
bash -c "timeout 10s crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd stopp $POD_ID"
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
bash -c "timeout 10s crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd stop $CONTAINER_ID"
bash -c "timeout 10s crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd rmp --force $POD_ID"
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
# restart containerd again.
tskill containerd
# crictl cannot be used now.
crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd pods
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task kill $POD_ID
ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
# final restart.
tskill containerd
crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd pods
crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd stopp $POD_ID
crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd rmp $POD_ID
Files:
pod.json:
{
"metadata": {
"name": "ready-sandbox",
"namespace": "default",
"attempt": 0,
"uid": ""52fdfc072182654f163f5f0f9a621d729566c74d10037c4d7bbb0407d1e2c649""
},
"logDirectory": "/tmp"
}
container.json:
{
"metadata": {
"name": "busybox"
},
"image":{
"image": "docker.io/claudiubelu/pause:3.4.1"
},
"command": [],
"args": []
}
Describe the results you received:
Output on Windows:
PS C:\tmp> # create pod / container.
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd pull docker.io/claudiubelu/pause:3.4.1
Image is up to date for sha256:a233cb888fa372899fd8cd02bad651820809c11303d8537525e53c159d1a7e1e
PS C:\tmp> $POD_ID=(crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd runp .\pod.json)
PS C:\tmp> $CONTAINER_ID=(crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd create $POD_ID .\container.json .\pod.json)
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd start $CONTAINER_ID
PS C:\tmp> # before restart.
PS C:\tmp> ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
ce1f2fa22beed3f635dc464c9ddf449bbb261f3aaf1730e3c30bce03a864f405 6108 RUNNING
81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87 7136 RUNNING
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID
ce1f2fa22beed docker.io/claudiubelu/pause:3.4.1 14 seconds ago Running busybox 0 81fba9b46124f
PS C:\tmp> # try to stopp and delete the container.
PS C:\tmp> bash -c "timeout 10s crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd stopp $POD_ID"
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID
ce1f2fa22beed docker.io/claudiubelu/pause:3.4.1 25 seconds ago Running busybox 0 81fba9b46124f
PS C:\tmp> ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
TASK PID STATUS
81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87 7136 RUNNING
ce1f2fa22beed3f635dc464c9ddf449bbb261f3aaf1730e3c30bce03a864f405 6108 STOPPED
PS C:\tmp> bash -c "timeout 10s crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd stop $CONTAINER_ID"
PS C:\tmp> bash -c "timeout 10s crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd rmp --force $POD_ID"
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID
ce1f2fa22beed docker.io/claudiubelu/pause:3.4.1 45 seconds ago Running busybox 0 81fba9b46124f
PS C:\tmp> ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
TASK PID STATUS
81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87 7136 RUNNING
ce1f2fa22beed3f635dc464c9ddf449bbb261f3aaf1730e3c30bce03a864f405 6108 STOPPED
PS C:\tmp> # restart containerd again.
PS C:\tmp> tskill containerd
PS C:\tmp> # crictl cannot be used now.
PS C:\tmp> crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd pods
time="2021-06-18T15:51:17Z" level=fatal msg="listing pod sandboxes: rpc error: code = Unknown desc = server is not initialized yet"
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
time="2021-06-18T15:51:17Z" level=fatal msg="listing containers: rpc error: code = Unknown desc = server is not initialized yet"
PS C:\tmp> ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
TASK PID STATUS
81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87 7136 RUNNING
ce1f2fa22beed3f635dc464c9ddf449bbb261f3aaf1730e3c30bce03a864f405 6108 STOPPED
PS C:\tmp> ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task kill $POD_ID
PS C:\tmp> ctr.exe --address //./pipe//run/containerd-test/containerd --namespace k8s.io task list
TASK PID STATUS
81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87 7136 STOPPED
ce1f2fa22beed3f635dc464c9ddf449bbb261f3aaf1730e3c30bce03a864f405 6108 STOPPED
PS C:\tmp> crictl --runtime-endpoint=npipe://./pipe/run/containerd-test/containerd ps -a
time="2021-06-18T15:51:17Z" level=fatal msg="listing containers: rpc error: code = Unknown desc = server is not initialized yet"
PS C:\tmp> # final restart.
PS C:\tmp> tskill containerd
PS C:\tmp> crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd pods
POD ID CREATED STATE NAME NAMESPACE ATTEMPT RUNTIME
81fba9b46124f About a minute ago NotReady ready-sandbox default 0 (default)
PS C:\tmp> crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd stopp $POD_ID
Stopped sandbox 81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87
PS C:\tmp> crictl.exe --runtime-endpoint npipe://./pipe//run/containerd-test/containerd rmp $POD_ID
Removed sandbox 81fba9b46124f721665de02ae9535953b9a055aefb6181f1595131696c1fbe87
Describe the results you expected:
On Linux, this isn't an issue, the test TestContainerdRestart
ensures that it works properly.
What version of containerd are you using:
$ containerd --version
containerd github.com/containerd/containerd v1.5.0-153-g0a3a77bc4 0a3a77bc445379851a5c737955411fc9e630c3ce
Any other relevant information (runC version, CRI configuration, OS/Kernel version, etc.):
runc --version
$ runc --version
crictl info
$ crictl info{ "status": { "conditions": [ { "type": "RuntimeReady", "status": true, "reason": "", "message": "" }, { "type": "NetworkReady", "status": true, "reason": "", "message": "" } ] }, "cniconfig": { "PluginDirs": [ "c:/k/azurecni/bin" ], "PluginConfDir": "c:/k/azurecni/netconf", "PluginMaxConfNum": 1, "Prefix": "eth", "Networks": [ { "Config": { "Name": "azure", "CNIVersion": "0.3.0", "Plugins": [ { "Network": { "type": "azure-vnet", "capabilities": { "dns": true, "portMappings": true }, "ipam": { "type": "azure-vnet-ipam" }, "dns": { "nameservers": [ "10.0.0.10", "168.63.129.16" ], "search": [ "svc.cluster.local" ] } }, "Source": "{\"AdditionalArgs\":[{\"Name\":\"EndpointPolicy\",\"Value\":{\"ExceptionList\":[\"10.0.0.0 /8\",\"10.240.0.0/12\"],\"Type\":\"OutBoundNAT\"}},{\"Name\":\"EndpointPolicy\",\"Value\":{\"DestinationPrefix\":\" 10.0.0.0/16\",\"NeedEncap\":true,\"Type\":\"ROUTE\"}},{\"Name\":\"EndpointPolicy\",\"Value\":{\"Action\":\"Block\", \"Direction\":\"Out\",\"Priority\":200,\"Protocols\":\"6\",\"RemoteAddresses\":\"168.63.129.16/32\",\"RemotePorts\" :\"80\",\"RuleType\":\"Switch\",\"Type\":\"ACL\"}},{\"Name\":\"EndpointPolicy\",\"Value\":{\"Action\":\"Allow\",\"D irection\":\"In\",\"Priority\":65500,\"Type\":\"ACL\"}},{\"Name\":\"EndpointPolicy\",\"Value\":{\"Action\":\"Allow\ ",\"Direction\":\"Out\",\"Priority\":65500,\"Type\":\"ACL\"}}],\"bridge\":\"azure0\",\"capabilities\":{\"dns\":true ,\"portMappings\":true},\"dns\":{\"Nameservers\":[\"10.0.0.10\",\"168.63.129.16\"],\"Search\":[\"svc.cluster.local\ "]},\"ipam\":{\"type\":\"azure-vnet-ipam\"},\"mode\":\"bridge\",\"type\":\"azure-vnet\"}" } ], "Source": "{\r\n \"cniVersion\": \"0.3.0\",\r\n \"name\": \"azure\",\r\n \"adapterName\": \"\ ",\r\n \"plugins\": [\r\n {\r\n \"type\": \"azure-vnet\",\r\n \"mode\": \"bridge\",\r\n \"bridge\": \"azure0\",\r\n \"capabilities\": {\r\n \"portMappings\": true,\r\n \"dns\": true\r\n },\r\n \"ipam\": {\r\n \"type\": \"azure-vnet-ipam\"\r\n },\r\n \"dns\": {\r\n \"Nameservers\": [\r\n \"10.0.0.10\",\r\n \"168.63.129.16\"\r\n ],\r\n \"Search\": [\r\n \"svc.cluster.local\"\r\n ]\r\n },\r\n \"Additi onalArgs\": [\r\n {\r\n \"Name\": \"EndpointPolicy\",\r\n \"Value\": {\r\n \"Type\": \"OutBoundNAT\",\r\n \"ExceptionList\": [\r\n \"10.0.0.0/8\",\r\n \"10.240.0.0/12\"\r\n ]\r\n }\r\n },\r\n {\r\n \"Name\": \"EndpointPolicy\",\r\n \"Value\": {\r\n \"Type\": \"ROUTE\",\r\n \"DestinationPrefix\": \"10.0.0.0/16\",\r\n \"NeedEncap\": true\r\n }\r\n },\r\n {\r\n \"Name\": \"EndpointPolicy\",\r\n \"Value\": {\r\n \"Type\": \" ACL\",\r\n \"Protocols\": \"6\",\r\n \"Action\": \"Block\",\r\n \"Direction\": \"Out\",\r\n \"RemoteAddresses\": \"168.63.129.16/32\",\r\n \"RemotePorts\": \"80\",\r\n \"Priority\": 200 ,\r\n \"RuleType\": \"Switch\"\r\n }\r\n },\r\n {\r\n \"Name\": \"EndpointPol icy\",\r\n \"Value\": {\r\n \"Type\": \"ACL\",\r\n \"Action\": \"Allow\",\r\n \"Direction\": \"In\" ,\r\n \"Priority\": 65500\r\n }\r\n },\r\n {\r\n \"Name\": \"EndpointPolicy\" ,\r\n \"Value\": {\r\n \"Type\": \"ACL\",\r\n \"Ac tion\": \"Allow\",\r\n \"Direction\": \"Out\",\r\ n \"Priority\": 65500\r\n }\r\n }\r\n ]\r\n }\r\n ]\r\n}\r\n" }, "IFName": "eth0" } ] }, "config": { "containerd": { "snapshotter": "windows", "defaultRuntimeName": "default", "defaultRuntime": { "runtimeType": "io.containerd.runhcs.v1", "runtimeEngine": "", "PodAnnotations": null, "ContainerAnnotations": null, "runtimeRoot": "", "options": { "Debug": true, "DebugType": 2, "SandboxImage": "mcr.microsoft.com/oss/kubernetes/pause:3.4.1-windows-1809-amd64", "SandboxIsolation": 0, "SandboxPlatform": "windows/amd64" }, "privileged_without_host_devices": false, "baseRuntimeSpec": "" }, "untrustedWorkloadRuntime": { "runtimeType": "", "runtimeEngine": "", "PodAnnotations": null, "ContainerAnnotations": null, "runtimeRoot": "", "options": null, "privileged_without_host_devices": false, "baseRuntimeSpec": "" }, "runtimes": { "default": { "runtimeType": "io.containerd.runhcs.v1", "runtimeEngine": "", "PodAnnotations": null, "ContainerAnnotations": null, "runtimeRoot": "", "options": { "Debug": true, "DebugType": 2, "SandboxImage": "mcr.microsoft.com/oss/kubernetes/pause:3.4.1-windows-1809-amd64", "SandboxIsolation": 0, "SandboxPlatform": "windows/amd64" }, "privileged_without_host_devices": false, "baseRuntimeSpec": "" }, "runhcs-wcow-process": { "runtimeType": "io.containerd.runhcs.v1", "runtimeEngine": "", "PodAnnotations": null, "ContainerAnnotations": null, "runtimeRoot": "", "options": { "Debug": true, "DebugType": 2, "SandboxImage": "mcr.microsoft.com/oss/kubernetes/pause:3.4.1-windows-1809-amd64", "SandboxPlatform": "windows/amd64" }, "privileged_without_host_devices": false, "baseRuntimeSpec": "" } }, "noPivot": false, "disableSnapshotAnnotations": false, "discardUnpackedLayers": true }, "cni": { "binDir": "c:/k/azurecni/bin", "confDir": "c:/k/azurecni/netconf", "maxConfNum": 1, "confTemplate": "" }, "registry": { "configPath": "", "mirrors": { "docker.io": { "endpoint": [ "https://registry-1.docker.io" ] } }, "configs": null, "auths": null, "headers": null }, "imageDecryption": { "keyModel": "node" }, "disableTCPService": true, "streamServerAddress": "127.0.0.1", "streamServerPort": "0", "streamIdleTimeout": "4h0m0s", "enableSelinux": false, "selinuxCategoryRange": 0, "sandboxImage": "mcr.microsoft.com/oss/kubernetes/pause:3.4.1-windows-1809-amd64", "statsCollectPeriod": 10, "systemdCgroup": false, "enableTLSStreaming": false, "x509KeyPairStreaming": { "tlsCertFile": "", "tlsKeyFile": "" }, "maxContainerLogSize": 16384, "disableCgroup": false, "disableApparmor": false, "restrictOOMScoreAdj": false, "maxConcurrentDownloads": 3, "disableProcMount": false, "unsetSeccompProfile": "", "tolerateMissingHugetlbController": false, "disableHugetlbController": false, "ignoreImageDefinedVolumes": false, "netnsMountsUnderStateDir": false, "containerdRootDir": "C:\\Program Files\\Git\\var\\lib\\containerd-test", "containerdEndpoint": "//./pipe//run/containerd-test/containerd", "rootDir": "C:\\Program Files\\Git\\var\\lib\\containerd-test\\io.containerd.grpc.v1.cri", "stateDir": "C:\\Program Files\\Git\\run\\containerd-test\\io.containerd.grpc.v1.cri" }, "golang": "go1.16.4", "lastCNILoadStatus": "OK" }
uname -a
$ uname -a