Как запустить блокнот Jupyter в док-контейнере, на котором запущен веб-сервер airflow, работник и т. Д.? - PullRequest
1 голос
/ 15 мая 2019

Я хочу запустить ноутбук JuPyter из док-контейнера, который построен из образа следующим образом:


FROM    debian:stretch

# Never prompts the user for choices on installation/configuration of packages
ENV     DEBIAN_FRONTEND noninteractive
ENV     TERM linux

# Airflow
ARG     AIRFLOW_VERSION=1.10.1
ENV     AIRFLOW_HOME=/usr/local/airflow
ENV     EMBEDDED_DAGS_LOCATION=./dags
ENV     EMBEDDED_PLUGINS_LOCATION=./plugins
ENV     SLUGIFY_USES_TEXT_UNIDECODE=yes
ENV     PYTHONPATH=${PYTHONPATH}:${AIRFLOW_HOME}/athena-py

# Define en_US.
ENV     LANGUAGE en_US.UTF-8
ENV     LANG en_US.UTF-8
ENV     LC_ALL en_US.UTF-8
ENV     LC_CTYPE en_US.UTF-8
ENV     LC_MESSAGES en_US.UTF-8
ENV     LC_ALL en_US.UTF-8

WORKDIR /requirements
# Only copy needed files
COPY    ./requirements/airflow.txt /requirements/airflow.txt

RUN         set -ex \
    &&  buildDeps=' \
    build-essential \
    libblas-dev \
    libffi-dev \
    libkrb5-dev \
    liblapack-dev \
    libpq-dev \
    libxml2-dev \
    libxslt1-dev \
    python3-pip \
    zlib1g-dev \
    libcurl4-gnutls-dev \
    libssh2-1-dev \
    libldap2-dev \
    ' \
    &&  apt-get update -yqq \
    &&  apt-get install -yqq --no-install-recommends \
    $buildDeps \
    apt-utils \
    curl \
    git \
    locales \
    netcat \
    gcc \
    python3-dev \
    libssl-dev \
    libsasl2-dev \
    openssh-server \
    libsasl2-modules \
    \
    &&  sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
    &&  locale-gen \
    &&  update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \
    &&  useradd -ms /bin/bash -d ${AIRFLOW_HOME} -u 1002 airflow \
    &&  pip3 install --upgrade pip==9.0.3 'setuptools!=36.0.0' \
    &&  if [ ! -e /usr/bin/pip ]; then ln -s /usr/bin/pip3 /usr/bin/pip ; fi \
    &&  if [ ! -e /usr/bin/python ]; then ln -sf /usr/bin/python3 /usr/bin/python; fi \
    &&  pip3 install -r /requirements/airflow.txt \
    &&  apt-get remove --purge -yqq $buildDeps libpq-dev \
    &&  apt-get clean \
    &&  rm -rf \
    /var/lib/apt/lists/* \
    /tmp/* \
    /var/tmp/* \
    /usr/share/man \
    /usr/share/doc \
    /usr/share/doc-base

# Install env key
RUN     curl -s https://raw.githubusercontent.com/envkey/envkey-source/master/install.sh | bash

# install athena HEAD
WORKDIR ${AIRFLOW_HOME}
COPY    ./some-dir/requirements.txt some-dir/requirements.txt
RUN     pip3 install -r some-dir/requirements.txt
COPY    ./some-dir/ some-dir
COPY    ./tests/ tests

COPY    script/entrypoint.sh entrypoint.sh
COPY    script/setup_connections.py setup_connections.py
COPY    config/airflow.cfg airflow.cfg
COPY    ${EMBEDDED_PLUGINS_LOCATION} plugins
COPY    ${EMBEDDED_DAGS_LOCATION} dags

# Python3 Kernel for JuPyter notebooks
RUN     python3 -m pip install ipykernel
RUN     python3 -m ipykernel install --user
RUN     mkdir -p /usr/local/airflow/.ipython/profile_default/startup/
RUN     echo "import pandas as pd" > /usr/local/airflow/.ipython/profile_default/startup/athena.py


RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install jupyter
RUN         chown -R airflow ${AIRFLOW_HOME} \
    &&  chmod +x entrypoint.sh

EXPOSE  8080 5555 8793 8280 8888

USER        airflow
ENTRYPOINT  ["./entrypoint.sh"]

Обратите внимание на # start jupyter notebook "jupyter-notebook", "--ip=0.0.0.0", "--allow-root" в моем entrypoint.sh.

entrypoint.sh:

#!/usr/bin/env bash

echo "Setting up env vars..."
eval $(envkey-source)
echo "----------------------------------------------------------------------"

CMD="airflow"
TRY_LOOP="${TRY_LOOP:-10}"
POSTGRES_HOST="${POSTGRES_HOST:-postgres}"
POSTGRES_PORT=5432
POSTGRES_CREDS="${POSTGRES_CREDS:-airflow:airflow}"
RABBITMQ_HOST="${RABBITMQ_HOST:-rabbitmq}"
RABBITMQ_CREDS="${RABBITMQ_CREDS:-airflow:airflow}"
RABBITMQ_MANAGEMENT_PORT=15672
FLOWER_URL_PREFIX="${FLOWER_URL_PREFIX:-/flower}"
AIRFLOW_URL_PREFIX="${AIRFLOW_URL_PREFIX:-/airflow}"
LOAD_DAGS_EXAMPLES="${LOAD_DAGS_EXAMPLES:-false}"
REST_API_KEY="${REST_API_KEY:-airflow_api_key}"

if [ -z $FERNET_KEY ]; then
    FERNET_KEY=$(python3 -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")
fi

echo "Postgres host: $POSTGRES_HOST"
echo "RabbitMQ host: $RABBITMQ_HOST"
echo "Load DAG examples: $LOAD_DAGS_EXAMPLES"
echo $1

# Generate Fernet key
sed -i "s/{{ REST_API_KEY }}/${REST_API_KEY}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ FERNET_KEY }}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ POSTGRES_HOST }}/${POSTGRES_HOST}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ POSTGRES_CREDS }}/${POSTGRES_CREDS}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ RABBITMQ_HOST }}/${RABBITMQ_HOST}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ RABBITMQ_CREDS }}/${RABBITMQ_CREDS}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ LOAD_DAGS_EXAMPLES }}/${LOAD_DAGS_EXAMPLES}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s#{{ FLOWER_URL_PREFIX }}#${FLOWER_URL_PREFIX}#" $AIRFLOW_HOME/airflow.cfg
sed -i "s#{{ AIRFLOW_URL_PREFIX }}#${AIRFLOW_URL_PREFIX}#" $AIRFLOW_HOME/airflow.cfg

# wait for rabbitmq
if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then
  j=0
  while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:$RABBITMQ_MANAGEMENT_PORT/api/whoami |grep '200 OK'; do
    j=`expr $j + 1`
    if [ $j -ge $TRY_LOOP ]; then
      echo "$(date) - $RABBITMQ_HOST still not reachable, giving up"
      exit 1
    fi
    echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP"
    sleep 5
  done
fi

# wait for postgres
if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "test" ] ; then
  i=0
  while ! nc -z $POSTGRES_HOST $POSTGRES_PORT; do
    i=`expr $i + 1`
    if [ $i -ge $TRY_LOOP ]; then
      echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up"
      exit 1
    fi
    echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP"
    sleep 5
  done
  # TODO: move to a Helm hook
  #   https://github.com/kubernetes/helm/blob/master/docs/charts_hooks.md
  if [ "$1" = "webserver" ]; then
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    echo "Initialize database..."
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    $CMD initdb
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    echo "setting up connections..."
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    python setup_connections.py
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
  fi
  if [ "$1" = "test" ]; then
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    echo "Initialize database..."
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    $CMD initdb
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    echo "setting up connections..."
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    python setup_connections.py
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    echo "Running tests..."
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    python -m unittest discover
    echo "----------------------------------------------------------------------"
    echo "----------------------------------------------------------------------"
    echo "exiting.."
    exit 1
  fi
fi

# start jupyter notebook
"jupyter-notebook", "--ip=0.0.0.0", "--allow-root"

$CMD "$@"

Я попытался прочитать некоторые статьи и ответы, они указали способы запуска jupyter notebook с портом и ip, некоторые из них написали docker-compose, все заявленные методы оказались напрасными.

Я ожидаю использовать ноутбук Jupyter в качестве localhost:8888 точно так же, как я получаю доступ к веб-серверу воздушного потока через localhost:8080. Однако прямо сейчас я сталкиваюсь с ошибкой: Отказано в соединении

...