sless-primer/VM/functions/install-docker/handler.py

159 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 2026-03-29 — handler.py: установка Docker CE на ВМ по SSH.
# sless_job runtime: python3.11, entrypoint: handler.install
#
# Метод установки: официальный Docker apt-репозиторий (best practices).
# НЕ используется curl | sh — небезопасно для продакшена.
#
# event_json:
# compose: true/false — ставить ли docker-compose-plugin (default: true)
#
# env_vars:
# VM_IP: внешний IP ВМ
# SSH_USER: логин (ubuntu)
# SSH_KEY: содержимое приватного SSH-ключа (PEM)
import os, io, time
import paramiko
def _load_key(content):
for cls in (paramiko.Ed25519Key, paramiko.RSAKey, paramiko.ECDSAKey):
try:
return cls.from_private_key(io.StringIO(content))
except Exception:
pass
raise ValueError("Неподдерживаемый тип SSH-ключа")
def _ssh_connect(retries=5, delay=10):
key = _load_key(os.environ["SSH_KEY"])
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
last_err = None
for attempt in range(retries):
try:
client.connect(
hostname=os.environ["VM_IP"],
username=os.environ["SSH_USER"],
pkey=key,
timeout=15,
)
return client
except Exception as e:
last_err = e
if attempt < retries - 1:
time.sleep(delay)
raise RuntimeError(f"SSH не удалось после {retries} попыток: {last_err}")
def _run(client, cmd, timeout=120, check=True):
_, stdout, stderr = client.exec_command(cmd, timeout=timeout)
code = stdout.channel.recv_exit_status()
out = stdout.read().decode(errors="replace").strip()
err = stderr.read().decode(errors="replace").strip()
if check and code != 0:
raise RuntimeError(f"Ошибка (exit {code}):\n{cmd}\nstderr: {err}")
return code, out, err
def _wait_apt_lock(client, attempts=20, delay=10):
"""Ждать завершения cloud-init и убить авто-обновления. Ubuntu 22.04+."""
# Шаг 1: Ждём завершения cloud-init — он держит apt при первом старте VM
_run(client, "timeout 300 sudo cloud-init status --wait 2>/dev/null; true", check=False, timeout=310)
# Шаг 2: Mask (не просто disable) — systemd не сможет перезапустить
_run(client, "sudo systemctl mask unattended-upgrades apt-daily.service apt-daily-upgrade.service apt-daily.timer apt-daily-upgrade.timer 2>/dev/null; true", check=False)
_run(client, "sudo systemctl stop unattended-upgrades apt-daily.service apt-daily-upgrade.service 2>/dev/null; true", check=False)
# Шаг 3: Добить оставшиеся apt/dpkg процессы
_run(client, "sudo pkill -9 -x unattended-upgrades apt-get apt dpkg 2>/dev/null; true", check=False)
_run(client, "sudo kill -9 $(sudo lsof -t /var/lib/dpkg/lock-frontend 2>/dev/null) 2>/dev/null; true", check=False)
# Шаг 4: Убрать стейл-локи и починить dpkg
_run(client, "sudo rm -f /var/lib/dpkg/lock-frontend /var/lib/dpkg/lock /var/cache/apt/archives/lock /var/lib/apt/lists/lock 2>/dev/null; true", check=False)
_run(client, "sudo dpkg --configure -a 2>/dev/null; true", check=False)
time.sleep(3)
locks = ["/var/lib/dpkg/lock-frontend", "/var/lib/dpkg/lock", "/var/lib/apt/lists/lock"]
for i in range(attempts):
all_free = all(
_run(client, f"sudo flock -n {lock} true 2>/dev/null", check=False)[0] == 0
for lock in locks
)
if all_free:
return
_run(client, "sudo pkill -9 -x apt-get apt dpkg 2>/dev/null; true", check=False)
_run(client, "sudo kill -9 $(sudo lsof -t /var/lib/dpkg/lock-frontend 2>/dev/null) 2>/dev/null; true", check=False)
if i < attempts - 1:
time.sleep(delay)
raise RuntimeError("apt lock занят слишком долго — проверьте процессы на ВМ")
# Команды установки Docker CE через официальный apt-репозиторий.
# Источник: https://docs.docker.com/engine/install/ubuntu/
_DOCKER_INSTALL_CMDS = [
# Зависимости для добавления внешнего репозитория
"sudo DEBIAN_FRONTEND=noninteractive apt-get -o DPkg::Lock::Timeout=600 install -y -qq ca-certificates curl gnupg",
# Директория для ключей
"sudo install -m 0755 -d /etc/apt/keyrings",
# GPG-ключ Docker
"curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/docker.gpg",
"sudo chmod a+r /etc/apt/keyrings/docker.gpg",
# Docker apt-репозиторий
(
'echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] '
'https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" '
"| sudo tee /etc/apt/sources.list.d/docker.list > /dev/null"
),
# Обновить индекс с новым репо
"sudo DEBIAN_FRONTEND=noninteractive apt-get -o DPkg::Lock::Timeout=600 update -qq",
# Установить Docker CE
"sudo DEBIAN_FRONTEND=noninteractive apt-get -o DPkg::Lock::Timeout=600 install -y -qq docker-ce docker-ce-cli containerd.io",
]
def install(event):
"""Установить Docker CE. Если уже установлен — вернуть версию."""
install_compose = event.get("compose", True)
client = _ssh_connect()
try:
# Проверить: уже установлен?
code, ver_out, _ = _run(client, "docker --version 2>&1", check=False)
if code == 0 and "Docker version" in ver_out:
_, compose_out, _ = _run(client, "docker compose version 2>&1", check=False)
return {
"status": "already_installed",
"docker_version": ver_out,
"compose_version": compose_out if "Docker Compose" in compose_out else None,
}
_wait_apt_lock(client)
for cmd in _DOCKER_INSTALL_CMDS:
_run(client, cmd, timeout=180)
if install_compose:
_run(
client,
"sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq docker-compose-plugin",
timeout=120,
)
# Добавить пользователя в группу docker (чтобы запускать без sudo)
ssh_user = os.environ["SSH_USER"]
_run(client, f"sudo usermod -aG docker {ssh_user}", check=False)
# Проверка: запустить hello-world
# Используем sudo т.к. usermod не применится до переподключения
_run(client, "sudo docker run --rm hello-world", timeout=120)
_, ver_out, _ = _run(client, "docker --version", check=False)
_, compose_out, _ = _run(client, "docker compose version 2>&1", check=False)
return {
"status": "ok",
"docker_version": ver_out,
"compose_version": compose_out if "Docker Compose" in compose_out else None,
"note": f"user '{ssh_user}' added to docker group (reconnect to use without sudo)",
}
finally:
client.close()