虚拟机rootfs
root@25a725e7599e:/# ls bin dev home lost+found mnt proc run srv tmp var boot etc lib media opt root sbin sys usr root@25a725e7599e:/# ls lib/ aarch64-linux-gnu ld-linux-aarch64.so.1 modprobe.d terminfo init lsb systemd udev root@25a725e7599e:/# ls bin dev home lost+found mnt proc run srv tmp var boot etc lib media opt root sbin sys usr root@25a725e7599e:/# ls bin/ps bin/ps root@25a725e7599e:/# ps -elf | grep nginx 4 S root 71 50 0 80 0 - 2069 arm64_ Oct31 ? 00:00:00 nginx: master process nginx -g daemon off; 5 S systemd+ 99 71 0 80 0 - 2164 ep_pol Oct31 ? 00:00:00 nginx: worker process 0 S root 201 57 0 80 0 - 676 pipe_w 04:54 hvc0 00:00:00 grep --color=auto nginx root@25a725e7599e:/# ls run/ kata-containers kata1.txt libcontainer lock mount sandbox-ns systemd root@25a725e7599e:/#
容器 rootfs
root@fa55c7478feb:/# ls bin/ps ls: cannot access 'bin/ps': No such file or directory root@fa55c7478feb:/# ls bin docker-entrypoint.d home mnt root srv usr boot docker-entrypoint.sh lib opt run sys var dev etc media proc sbin tmp root@fa55c7478feb:/# ps -elf | grep nginx bash: ps: command not found root@fa55c7478feb:/# ls run/ lock nginx.pid utmp root@fa55c7478feb:/#
func (a *agentGRPC) CreateSandbox(ctx context.Context, req *pb.CreateSandboxRequest) (*gpb.Empty, error) { if a.sandbox.running { return emptyResp, grpcStatus.Error(codes.AlreadyExists, "Sandbox already started, impossible to start again") } a.sandbox.hostname = req.Hostname a.sandbox.containers = make(map[string]*container) a.sandbox.network.ifaces = make(map[string]*types.Interface) a.sandbox.network.dns = req.Dns a.sandbox.running = true a.sandbox.sandboxPidNs = req.SandboxPidns a.sandbox.storages = make(map[string]*sandboxStorage) a.sandbox.guestHooks = &specs.Hooks{} a.sandbox.guestHooksPresent = false for _, m := range req.KernelModules { if err := loadKernelModule(m); err != nil { return emptyResp, err } } if req.GuestHookPath != "" { a.sandbox.scanGuestHooks(req.GuestHookPath) } if req.SandboxId != "" { a.sandbox.id = req.SandboxId agentLog = agentLog.WithField("sandbox", a.sandbox.id) } // Set up shared UTS and IPC namespaces if err := a.sandbox.setupSharedNamespaces(ctx); err != nil { return emptyResp, err } if req.SandboxPidns { if err := a.sandbox.setupSharedPidNs(); err != nil { return emptyResp, err } } mountList, err := addStorages(ctx, req.Storages, a.sandbox) if err != nil { return emptyResp, err } a.sandbox.mounts = mountList if err := setupDNS(a.sandbox.network.dns); err != nil { return emptyResp, err } return emptyResp, nil }
func (a *agentGRPC) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (resp *gpb.Empty, err error) { if err := a.createContainerChecks(req); err != nil { return emptyResp, err } // re-scan PCI bus // looking for hidden devices if err = rescanPciBus(); err != nil { agentLog.WithError(err).Warn("Could not rescan PCI bus") } // Some devices need some extra processing (the ones invoked with // --device for instance), and that's what this call is doing. It // updates the devices listed in the OCI spec, so that they actually // match real devices inside the VM. This step is necessary since we // cannot predict everything from the caller. if err = addDevices(ctx, req.Devices, req.OCI, a.sandbox); err != nil { return emptyResp, err } // Both rootfs and volumes (invoked with --volume for instance) will // be processed the same way. The idea is to always mount any provided // storage to the specified MountPoint, so that it will match what's // inside oci.Mounts. // After all those storages have been processed, no matter the order // here, the agent will rely on libcontainer (using the oci.Mounts // list) to bind mount all of them inside the container. mountList, err := addStorages(ctx, req.Storages, a.sandbox) if err != nil { return emptyResp, err } ctr := &container{ id: req.ContainerId, processes: make(map[string]*process), mounts: mountList, useSandboxPidNs: req.SandboxPidns, agentPidNs: req.AgentPidns, ctx: ctx, } // In case the container creation failed, make sure we cleanup // properly by rolling back the actions previously performed. defer func() { if err != nil { a.rollbackFailingContainerCreation(ctr) } }() // Add the nvdimm root partition to the device cgroup to prevent access updateDeviceCgroupForGuestRootfs(req.OCI) // Convert the spec to an actual OCI specification structure. ociSpec, err := pb.GRPCtoOCI(req.OCI) if err != nil { return emptyResp, err } if err := a.handleCPUSet(ociSpec); err != nil { return emptyResp, err } if err := a.applyNetworkSysctls(ociSpec); err != nil { return emptyResp, err } if a.sandbox.guestHooksPresent { // Add any custom OCI hooks to the spec a.sandbox.addGuestHooks(ociSpec) // write the OCI spec to a file so that hooks can read it err = writeSpecToFile(ociSpec, req.ContainerId) if err != nil { return emptyResp, err } // Change cwd because libcontainer assumes the bundle path is the cwd: // https://github.com/opencontainers/runc/blob/v1.0.0-rc5/libcontainer/specconv/spec_linux.go#L157 oldcwd, err := changeToBundlePath(ociSpec, req.ContainerId) if err != nil { return emptyResp, err } defer os.Chdir(oldcwd) } // Convert the OCI specification into a libcontainer configuration. config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ CgroupName: req.ContainerId, NoNewKeyring: true, Spec: ociSpec, NoPivotRoot: a.sandbox.noPivotRoot, }) if err != nil { return emptyResp, err } // apply rlimits config.Rlimits = posixRlimitsToRlimits(ociSpec.Process.Rlimits) // Update libcontainer configuration for specific cases not handled // by the specconv converter. if err = a.updateContainerConfig(ociSpec, config, ctr); err != nil { return emptyResp, err } return a.finishCreateContainer(ctr, req, config) }