# docker集群搭建和应用示例

docker+docker-compose+docker-swarm+nfs搭建docker集群。

# 一.演示硬件+软件环境

  • 演示硬件:三台CentOS 7.6机器,ip分别为192.168.100.10(dev10),192.168.100.11(dev11),192.168.100.12(dev12),空闲空间最大的目录在/data下。

  • 相关软件:docker,docker-compose,nfs,registry

# 二. 目录设置

  • 为了方便管理,一般把docker软件及相关的脚本数据等等都放在一个目录下,而且这些都很占用空间,所以有必要在空闲空间最大的目录下创建一个docker的根目录。相关脚本命令:

    mkdir /data/docker
    
  • docker的默认安装目录在/var/lib/docker下,docker镜像默认下载到docker的安装目录下,容器默认也在/var/lib/containerd安装目录下,往往需要占用很大的空间,所以建议把docker安装到空闲空间比较大的目录,采用软链接方式指向即可。相关脚本命令:

    mkdir -p /data/docker/lib/docker /data/docker/lib/containerd
    ln -s /data/docker/lib/docker /var/lib/docker
    ln -s /data/docker/lib/containerd /var/lib/containerd
    
  • 一般情况下,还会设置普通的docker应用目录base,docker-compose应用目录docker-compose,docker-stack应用目录docker-stack,共享目录share。相关脚本命令:

    mkdir -p /data/docker/base /data/docker/docker-compose /data/docker/docker-stack /data/docker/share
    
  • 可选:安装docker私有库registry所需的目录

    mkdir -p /data/docker/lib/registry
    ln -s /data/docker/lib/registry /var/lib/registry
    
  • 目录树图示

    └── docker
        ├── base
        ├── docker-compose
        ├── docker-stack
        ├── lib
        │   ├── containerd
        │   ├── docker
        │   └── registry
        └── share
    

# 三. 共享软件NFS

  • 192.168.100.10上安装共享的nfs

    yum install -y nfs-utils
    systemctl start rpcbind nfs-server
    systemctl enable rpcbind nfs-server
    
  • 设置共享目录,docker volume要用到。

    ##设置NFS配置目录 注册ip地址,权限
    echo /data/docker/share 192.168.100.10/24(rw,sync,insecure,all_squash) > /etc/exports
    #加载配置
    exportfs -rv
    
  • 配置防火墙(nfs:2049,mountd:20048,rpc-bind(portmapper):111)

    firewall-cmd --permanent --add-service=nfs --add-service=mountd --add-service=rpc-bind
    firewall-cmd --reload
    nc 192.168.100.10 111
    nc 192.168.100.11 2049
    nc 192.168.100.12 20048
    
  • 检查nfs是否正确安装

    #检查nfs是否启动成功
    rpcinfo -p
    #检查目录是否配置正确(在10/11/12三台服务器上)
    showmount -e 192.168.100.10
    

# 四. 安装docker

  • 直接采用一键安装get-docker.sh

    curl -sSL https://get.docker.com/ | sh
    
    docker run hello-world
    
  • 修改docker的配置daemon.json:Ⅰ.把源改为国内源(若服务器在墙外请忽略),Ⅱ.把IP192.168.100.10的镜像可以使用http访问(registry打算安装在10机器上)。

    cat > /etc/docker/daemon.json <<EOF
    {
      "registry-mirrors": [
        "https://a0ina6fy.mirror.aliyuncs.com"
      ],
      "insecure-registries": [
        "192.168.100.10:5000"
      ]
    }
    EOF
    
    systemctl restart docker.service
    

# 五. 配置docker的防火墙(根据需要配置)

  • 把docker的docker0网卡添加到防火墙的信任(trusted)域

    firewall-cmd --permanent --zone=trusted --add-interface=docker0
    firewall-cmd --reload
    
  • 添加内网ip网段(192.168.100.0/24)到防火墙的公共(public)域

    firewall-cmd --permanent --zone=public --add-rich-rule='rule family="ipv4" source address="192.168.100.0/24" accept'
    firewall-cmd --reload
    
  • 添加容器的ip段(172.0.0.0/8)到防火墙的公共(public)域

    firewall-cmd --permanent --zone=public --add-rich-rule='rule family="ipv4" source address="172.0.0.0/8" accept'
    firewall-cmd --reload
    

# 六. 验证docker防火墙是否配置正确(用ncat来检测)

  • 在每台服务器上运行checkport-server镜像,映射到9999

    docker run -id --rm --name chkport-srv -p 9999:9999 soul0328/checkport-server
    
  • 在每台服务器宿主机和容器上访问9999访问,看是否返回连接成功。

    # -s:指定使用哪个ip访问,在多网卡的情况下
    nc -s 172.17.0.1 192.168.100.10 9999
    nc -s 172.17.0.1 192.168.100.11 9999
    nc -s 172.17.0.1 192.168.100.12 9999
    
  • 若容器没有nc命令,则需要安装nmap-ncat软件包

    #alpine
    apk add nmap-ncat
    #或 ubuntu
    apt install -y nmap-ncat
    #或 CentOS
    yum install -y nmap-ncat
    
  • 注意nmap-ncat的版本,一般镜像里的版本比较低,用作服务器时,监听端口要用-p,高版本没有这个选项,直接写端口即可。

    #ncat可以作为服务器
    nc -lk -p 9999 -e echo "连接成功!"
    

# 七.安装registry

  • 安装脚本

    docker run -d -p 5000:5000 --restart always --name=registry -v /var/lib/registry:/var/lib/registry registry
    

# 八. 安装docker compose

  • github (opens new window)上查看docker compose的版本,下载最新的compose。

    curl -L "https://github.com/docker/compose/releases/download/v2.4.1/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
    
  • 配置docker compose。

    ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
    
  • 验证docker compose是否安装正确

    docker-compose --version
    

# 九. 构建集群docker swarm

  • 192.168.100.10作为管理机(manager)

    docker swarm init --advertise-addr 192.168.100.10
    docker swarm join-token worker
    
  • 把192.168.100.11/12作为工作机(worker)

    #复制上个步骤docker swarm join-token worker的命令结果
    docker swarm join --token SWMTKN-1-4gxu55n2m5rjp1g73gsyhbpg6hwkwi59zvt74sq1dqotxouzvw-0ec8309h4gopill3w5y6rc8gi 192.168.100.10:2377
    
  • 当然也可以直接添加为管理机

    #manager上执行
    docker swarm join-token manager
    #worker执行
    docker swarm join --token SWMTKN-1-4gxu55n2m5rjp1g73gsyhbpg6hwkwi59zvt74sq1dqotxouzvw-dfnff0f0tcax5ui9x60qzg0cl 192.168.100.201:2377
    
  • worker与manager转变

    #worker升级为manager,在manager上执行
    docker node ls
    docker node promote <node>
    #manager要降成worker,在manager上执行
    docker node demote <node>
    
  • 若初始化swarm时,提示端口不可访问,则把2377添加到防火墙。

    firewall-cmd --permanent --zone=public --add-port=2377/tcp
    firewall-cmd --reload
    

# 十. 示例脚本

  • 192.168.100.10上执行的脚本

    #!/usr/bin/env bash
    
    setup_dir=/data/docker
    docker_compose_ver=v2.4.1
    
    echo "===目录准备==="
    mkdir -p ${setup_dir}/lib/docker \
    		${setup_dir}/lib/containerd \
    		${setup_dir}/lib/registry \
    		${setup_dir}/base \
    		${setup_dir}/share \
    		${setup_dir}/docker-compose \
    		${setup_dir}/docker-stack \
    		${setup_dir}/base/script \
    		${setup_dir}/base/data \
    		${setup_dir}/base/logs \
    		${setup_dir}/base/conf \
    		${setup_dir}/base/dockerfile \
    		${setup_dir}/base/tools
    
    ln -s ${setup_dir}/lib/docker /var/lib/docker
    ln -s ${setup_dir}/lib/containerd /var/lib/containerd
    ln -s ${setup_dir}/lib/registry /var/lib/registry
    
    # <=======共享软件nfs==========>
    echo "===安装共享软件nfs==="
    yum install -y nfs-utils
    echo "===启动nfs服务并设置开机启动==="
    systemctl start rpcbind nfs-server
    systemctl enable rpcbind nfs-server
    echo "===设置NFS共享目录 注册ip地址,权限==="
    echo /home/docker/share 192.168.100.10/24(rw,sync,insecure,no_root_squash) > /etc/exports
    echo "===重新加载配置==="
    exportfs -rv
    echo "===检查nfs是否启动成功==="
    rpcinfo -p
    echo "===设置防火墙(nfs:2049,mountd:20048,rpc-bind(portmapper):111)==="
    firewall-cmd --permanent --add-service=nfs --add-service=mountd --add-service=rpc-bind
    firewall-cmd --reload
    echo "===检查目录是否配置正确==="
    showmount -e 192.168.100.10
    
    echo "===安装docker==="
    curl -sSL https://get.docker.com/ | sh
    echo "===修改docker的配置文件<daemon.json>==="
    cat > /etc/docker/daemon.json <<EOF
    {
      "registry-mirrors": [
        "https://a0ina6fy.mirror.aliyuncs.com"
      ],
      "insecure-registries": [
        "192.168.100.10:5000",
        "127.0.0.1:5000"
      ]
    }
    EOF
    echo "===重启docker==="
    systemctl restart docker
    echo "===测试docker<hello world>==="
    docker run --rm hello-world
    echo "===配置docker防火墙(根据实际调整,add<->remove)==="
    firewall-cmd --permanent --zone=trusted --add-interface=docker0
    firewall-cmd --permanent --zone=public --add-rich-rule='rule family="ipv4" source address="192.168.100.0/24" accept'
    firewall-cmd --permanent --zone=public --add-port=5000/tcp
    firewall-cmd --reload
    
    echo "===(根据实际选装)安装registry==="
    docker run -d -p 5000:5000 --restart always --name=registry -v /var/lib/registry:/var/lib/registry registry
    
    echo "===docker-compose安装==="
    curl -L "https://github.com/docker/compose/releases/download/${docker_compose_ver}/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
    chmod +x /usr/local/bin/docker-compose
    ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
    docker-compose --version
    
    echo "===docker-swarm初始化==="
    docker swarm init --advertise-addr 192.168.100.10
    #docker swarm join-token worker|manager
    #配置docker swarm的防火墙
    firewall-cmd --permanent --zone=public --add-port=2377/tcp
    firewall-cmd --reload
    
  • 192.168.100.11/12上执行的脚本

    #!/usr/bin/env bash
    
    setup_dir="/data/docker"
    docker_compose_ver="v2.4.1"
    join_token="SWMTKN-1-4gxu55n2m5rjp1g73gsyhbpg6hwkwi59zvt74sq1dqotxouzvw-0ec8309h4gopill3w5y6rc8gi"
    
    echo "===目录准备==="
    mkdir -p ${setup_dir}
    cd ${setup_dir}
    mkdir -p lib/docker
    ln -s ${setup_dir}/lib/docker /var/lib/docker
    mkdir base docker-compose docker-stack
    
    echo "===安装docker==="
    curl -sSL https://get.docker.com/ | sh
    echo "===修改docker的配置文件<daemon.json>==="
    cat > /etc/docker/daemon.json <<EOF
    {
      "registry-mirrors": [
        "https://a0ina6fy.mirror.aliyuncs.com"
      ],
      "insecure-registries": [
        "192.168.100.10:5000"
      ]
    }
    EOF
    echo "===重启docker==="
    systemctl restart docker
    echo "===测试docker<hello world>==="
    docker run --rm hello-world
    echo "===配置docker防火墙(根据实际调整,add<->remove)==="
    firewall-cmd --permanent --zone=trusted --add-interface=docker0
    firewall-cmd --permanent --zone=public --add-rich-rule='rule family="ipv4" source address="192.168.100.0/24" accept'
    firewall-cmd --reload
    
    echo "===加入docker-swarm==="
    docker swarm join --token ${join_token} 192.168.100.10:2377
    
    
  • 192.168.100.10/11/12检测是否安装正确(创建监听服务)

    echo "===检查防火墙是否配置正确==="
    echo "===服务监听==="
    docker run -id --rm --name chkport-srv -p 9999:9999 busybox nc -kl -p 9999 -e echo "连接成功"
    
  • 192.168.100.10/11/12检测是否安装正确(检测端口)

    echo "===测试连通状态==="
    nc 192.168.100.10 9999
    nc 192.168.100.10 111
    nc 192.168.100.10 2377
    nc 192.168.100.10 2049
    nc 192.168.100.10 20048
    
  • 192.168.100.10/11/12检测是否安装正确(删除监听服务)

    docker stop chkport-srv
    

# 十一. docker-stack.yml示例

  • docker-stack.yml模板

    version: "3.8"
    
    services:
      nginx: 
        image: nginx:latest
        networks:
          - backend-network
        depends_on:
          - redis
          - mongo
          - rabbitmq
        deploy:
          restart_policy:
            condition: on-failure
            delay: 10s
            max_attempts: 10
            window: 120s
          placement:
            constraints:
              - "node.hostname==dev10"
        restart: on-failure:3
        volumes:
          - nginx-html-nfs:/usr/share/nginx/html
          - nginx-conf-nfs:/usr/local/etc/nginx
          - nginx-logs-nfs:/var/log/nginx 
        ports:
          - "8280:80"
        command: ['nginx','-g','daemon off;','-c','/usr/local/etc/nginx/nginx.conf']
      redis:
        image: redis:6.0.5
        networks:
          - backend-network
        restart: on-failure
        volumes:
          - redis-data-nfs:/data
          - redis-conf-nfs:/usr/local/etc/redis
        command:
          redis-server /usr/local/etc/redis/redis.conf
      rabbitmq:
        image: rabbitmq:3-management
        networks:
          - backend-network
        restart: on-failure
        hostname: rabbitmq-base-server
        volumes:
          - rabbitmq-data-nfs:/var/lib/rabbitmq
      mongo:
        image: mongo:4.2.8
        networks:
          - backend-network
        restart: on-failure
        volumes:
          - mongo-data-nfs:/data/db
    networks:
      backend-network:
    volumes:
      nginx-conf-nfs:
        driver: local
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/conf/nginx"
      nginx-html-nfs:
        driver: local
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/data/nginx"
      nginx-logs-nfs:
        driver: local
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/logs/nginx"
    
      redis-conf-nfs:
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/conf/redis"
      redis-data-nfs:
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/data/redis"
      mongo-data-nfs:
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/data/mongo"
      rabbitmq-data-nfs:
        driver_opts:
          type: "nfs"
          o: "addr=192.168.100.10,rw"
          device: ":/data/docker/share/my-project/data/rabbitmq"
    
    
  • 192.168.100.10上执行的命令

    mkdir -p /data/docker/share/my-project/conf/nginx \
    	  /data/docker/share/my-project/data/nginx \
    	  /data/docker/share/my-project/logs/nginx \
    	  /data/docker/share/my-project/conf/redis \
    	  /data/docker/share/my-project/data/redis \
    	  /data/docker/share/my-project/data/mongo \
    	  /data/docker/share/my-project/data/rabbitmq
    
    docker stack deploy -c /data/docker/docker-stack/my-project/docker-stack.yml --with-registry-auth my-project
    
    #docker stack rm my-project
    

# 十二. 反安装(uninstall)

  • 192.168.100.10上执行反安装。

    setup_dir="/data/docker"
    del_dir=0
    
    echo "===停止docker服务,并禁用开机启动==="
    systemctl stop docker
    systemctl disable docker
    echo "===删除软链接==="
    rm -f /var/lib/docker /var/lib/registry /usr/bin/docker-compose
    echo "===删除docker-compose执行文件=="
    rm -f /usr/local/bin/docker-compose
    echo "===删除docker==="
    yum remove -y docker-ce docker-ce-cli containerd.io
    
    echo "===删除docker的配置文件==="
    rm -rf /etc/docker/
    
    if [ ${del_dir} -eq 1]; then
    echo "===删除docker镜像,容器,卷==="
     rm -rf ${setup_dir}/lib/*
    fi
    
    echo "===停止nfs服务,并禁用开机启动==="
    systemctl stop rpcbind nfs-server
    systemctl disable rpcbind nfs-server
    echo "===删除nfs==="
    yum remove -y nfs-utils
    echo "===删除nfs的配置文件==="
    rm -f /etc/exports
    
  • 192.168.100.11/12 上执行反安装

    setup_dir="/data/docker"
    del_dir=0
    
    echo "===停止docker服务,并禁用开机启动==="
    systemctl stop docker
    systemctl disable docker
    echo "===删除软链接==="
    rm -f /var/lib/docker /var/lib/registry /usr/bin/docker-compose
    echo "===删除docker-compose执行文件=="
    rm -f /usr/local/bin/docker-compose
    echo "===删除docker==="
    yum remove -y docker-ce docker-ce-cli containerd.io
    
    echo "===删除docker的配置文件==="
    rm -rf /etc/docker/
    
    if [ ${del_dir} -eq 1]; then
    echo "===删除docker镜像,容器,卷==="
     rm -rf ${setup_dir}/lib/*
    fi
    

# 十三. 相关的命令

  • docker相关命令

    docker stack ps -f DESIRED-STATE=running my_project
    docker node ls
    docker swarm join-token worker
    
    docker stack deploy
    docker stack ls
    docker stack services my_project
    docker stack rm my_project
    
    docker update dev10
    docker node promote dev11
    docker node demote dev11
    docker node inspect dev10
    docker node ps dev10
    docker node rm dev11
    
    docker swarm init
    docker swarm join-token worker
    docker swarm join-token manager
    
    docker network ls
    docker volume ls
    
    
  • linux相关命令

    ls
    mkdir
    
    cat > myfile.txt <<EOF
    ...
    EOF
    
    nc
    echo
    yum install/remove
    firewall-cmd
    systemctl
    ln