Skip to content

Commit

Permalink
Merge pull request #13 from aisingapore/0.4.1-updates
Browse files Browse the repository at this point in the history
0.4.1 updates
  • Loading branch information
Syakyr authored Sep 3, 2024
2 parents f0937c3 + 974c96d commit 57d6af8
Show file tree
Hide file tree
Showing 27 changed files with 190 additions and 160 deletions.
24 changes: 14 additions & 10 deletions coder-templates/gke/khull-gke.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ terraform {

locals {
namespace = "runai-proj"
common_pvc_name = "proj-pvc"
codeserver_image_repo = "asia-southeast1-docker.pkg.dev/machine-learning-ops/pub-images/code-server:v4.89.1-2"
common_pvc_path = "/proj-pvc"
common_pvc_name = "pvc-data"
codeserver_image_repo = "asia-southeast1-docker.pkg.dev/machine-learning-ops/pub-images/code-server:stable"
common_pvc_path = "/pvc-data"
# Uncomment the node_selector block in main.spec.template.spec if it is to be used
# node_selector_key = ""
# node_selector_value = ""
#node_selector_key = ""
#node_selector_value = ""
}

provider "coder" {
Expand Down Expand Up @@ -91,9 +91,9 @@ data "coder_workspace" "me" {}
data "coder_workspace_owner" "user" {}

resource "coder_agent" "main" {
os = "linux"
arch = "amd64"
startup_script =<<-EOT
os = "linux"
arch = "amd64"
startup_script =<<-EOT
#!/bin/bash
set -e
Expand Down Expand Up @@ -133,6 +133,11 @@ resource "coder_agent" "main" {
/miniconda3/bin/conda config --set env_prompt '({name})'
fi
if [[ ! -f /home/coder/.gitconfig ]]; then
echo "Unable to find git configuration in home directory, initialising gitconfig file..."
git config --global init.defaultBranch main
fi
/usr/bin/code-server --disable-telemetry --auth none --port 13337 >/tmp/code-server.log 2>&1 &
EOT
Expand Down Expand Up @@ -304,7 +309,7 @@ resource "kubernetes_deployment" "main" {
#}
init_container {
name = "runai-init"
image = "busybox:1.27"
image = "busybox:1.36"
command = ["/bin/sh", "-c", "cp /secrets/runai-sso.yaml /etc/runai/runai-sso.yaml && chmod 0766 /etc/runai/runai-sso.yaml"]
volume_mount {
mount_path = "/secrets"
Expand Down Expand Up @@ -419,4 +424,3 @@ resource "kubernetes_deployment" "main" {
}
}
}

20 changes: 12 additions & 8 deletions coder-templates/on-premise/khull-onprem.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ terraform {

locals {
namespace = "runai-proj"
common_pvc_name = "proj-pvc"
codeserver_image_repo = "registry.aisingapore.net/mlops-pub/code-server:v4.89.1-2"
common_pvc_path = "/proj-pvc"
common_pvc_name = "pvc-data"
codeserver_image_repo = "registry.aisingapore.net/mlops-pub/code-server:stable"
common_pvc_path = "/pvc-data"
# Uncomment the node_selector block in main.spec.template.spec if it is to be used
node_selector_key = ""
node_selector_value = ""
#node_selector_key = ""
#node_selector_value = ""
}

provider "coder" {
Expand Down Expand Up @@ -95,7 +95,7 @@ resource "coder_agent" "main" {
arch = "amd64"
startup_script =<<-EOT
#!/bin/bash
set -e
set -e
if [[ ! -f /home/coder/.bashrc ]]; then
echo "Unable to find user profile in home directory, initialising home directory..."
Expand Down Expand Up @@ -133,6 +133,11 @@ resource "coder_agent" "main" {
/miniconda3/bin/conda config --set env_prompt '({name})'
fi
if [[ ! -f /home/coder/.gitconfig ]]; then
echo "Unable to find git configuration in home directory, initialising gitconfig file..."
git config --global init.defaultBranch main
fi
if [[ ! -f /home/coder/config.json ]]; then
echo "Unable to find image repository credentials in home directory, writing credential file (read-only).."
echo -n $HARBOR_CREDENTIALS >> /home/coder/config.json
Expand Down Expand Up @@ -310,7 +315,7 @@ resource "kubernetes_deployment" "main" {
#}
init_container {
name = "runai-init"
image = "busybox:1.27"
image = "busybox:1.36"
command = ["/bin/sh", "-c", "cp /secrets/runai-sso.yaml /etc/runai/runai-sso.yaml && chmod 0766 /etc/runai/runai-sso.yaml"]
volume_mount {
mount_path = "/secrets"
Expand Down Expand Up @@ -445,4 +450,3 @@ resource "kubernetes_deployment" "main" {
}
}
}

5 changes: 2 additions & 3 deletions docker-builds/code-server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ ARG HOME_DIR="/home/${NON_ROOT_USER}"
ARG CONDA_HOME="/miniconda3"

# version arguments
ARG CODE_VER="4.89.1"
ARG CONDA_VER="py312_24.4.0-0"
ARG CODE_VER="4.92.2"
ARG CONDA_VER="py312_24.7.1-0"

RUN apt-get -qqq update \
&& apt-get install -y -qqq --no-install-recommends \
Expand Down Expand Up @@ -91,4 +91,3 @@ ENV PATH ${CONDA_HOME}/bin:${HOME_DIR}/.local/bin:${PATH}
EXPOSE 8080
ENTRYPOINT ["/usr/bin/code-server"]
CMD ["--disable-telemetry", "--bind-addr=0.0.0.0:8080", "."]

13 changes: 6 additions & 7 deletions docker-builds/mlflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM python:3.12-slim-bookworm
FROM python:3.12.5-slim-bookworm

LABEL version='2.13.0-python-3.12-gcs-2.16.0-boto-1.34.112'
LABEL version='2.15.1-python-3.12.5-gcs-2.18.2-boto-1.35.7'
LABEL decription='MLflow Server with basic auth and aws/gcp artefact storage'
LABEL author='Deon Chia'
LABEL author='AISG'

ARG USER=nonroot
ARG UID=1005
Expand All @@ -29,9 +29,9 @@ RUN groupadd -g ${GID} ${GROUP} \
USER ${USER}
WORKDIR /home/${USER}

ARG MLFLOW_VER="2.13.0"
ARG GCS_VER="2.16.0"
ARG BOTO_VER="1.34.112"
ARG MLFLOW_VER="2.15.1"
ARG GCS_VER="2.18.2"
ARG BOTO_VER="1.35.7"
RUN pip3 -qq install \
mlflow==${MLFLOW_VER} \
google-cloud-storage==${GCS_VER} \
Expand All @@ -44,4 +44,3 @@ COPY --chown=${USER}:${USER} --chmod=0744 entrypoint.sh scripts/
ENTRYPOINT [ "./scripts/entrypoint.sh" ]
CMD [ "--port 5005" ]
EXPOSE 5005

63 changes: 33 additions & 30 deletions docker-builds/mlflow/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# MLflow Server

This custom image of the MLflow Server builds on top of the
`python:3.10-slim-bullseye` image MLflow by adding in
`python:3.12-slim-bullseye` image MLflow by adding in
`google-cloud-storage`, `mlflow` and `boto3` libraries and supports GCS
and ECS artifact storage. Another reason for this image is to allow for
easier customisation of authentication credentials.
Expand All @@ -11,22 +11,21 @@ easier customisation of authentication credentials.
To build the image, run:

```bash
$ docker build \
-t mlflow-server \
--build-arg MLFLOW_VER=2.13.0 .
$ docker build -t mlflow-server .
```

> Note that the `MLFLOW_VER` argument must be specified for a successful
> build invocation. Also, to have authentication availiable, mlflow >=
> 2.5.0.
> For MLFlow to have authentication, the version has to be at least
> 2.5.0. Determine the versions for MLFlow, Python, GCS wrapper and
> Boto3 within the Dockerfile.
To run the mlflow server locally on port 5005:

```bash
$ docker run -d -p 5005:5005 mlflow-server
```

The MLflow server can then be accessed from `localhost:5005` on your browser.
The MLflow server can then be accessed from `localhost:5005` on your
browser.

## Entrypoint

Expand All @@ -38,8 +37,9 @@ For more information on the list of available environment variables in
MLflow, please consult the [official documentation][mlflow-env-var].

Primarily, the `--artifacts-destination` and `--backend-store-uri`
arguments are passed via the custom environment variables `ARTIFACT_URL`
and `DATABASE_URI` correspondingly within the entrypoint script.
arguments are passed via the custom environment variables
`ARTIFACT_URL` and `DATABASE_URI` correspondingly within the entrypoint
script.

To set up the server with authentication, the `--app-name basic-auth`
argument needs to passed to the image during runtime. Check out
Expand All @@ -63,8 +63,8 @@ to the image for further customisation.
| -------------------------------- | ---------------------------------------------------------------------------- |
| `ARTIFACT_BACKEND` | The backend artifact storage choice. [`ECS`, `GCS`] |
| `ARTIFACT_URL` | Custom path to MLflow artifact storage |
| `AUTH_USERNAME` | Default Admin username for MLflow, if authentication is enabled |
| `AUTH_PASSWORD` | Default Admin password for MLflow, if authentication is enabled |
| `AUTH_USERNAME` | Default admin username for MLflow, if authentication is enabled |
| `AUTH_PASSWORD` | Default admin password for MLflow, if authentication is enabled |
| `AUTH_DATABASE_URL` | Custom path to storage authentication database, if authentication is enabled |
| `AWS_ACCESS_KEY_ID` | The access key ID for S3 |
| `AWS_SECRET_ACCESS_KEY` | Secret key for S3 |
Expand Down Expand Up @@ -95,32 +95,36 @@ artifact storage.

### Custom Authentication

In the event that authentication is required, the `--app-name basic-auth`
argument must be passed when running the image.
In the event that authentication is required, the
`--app-name basic-auth` argument must be passed when running the image.

e.g. Running a simple MLflow tracking server with authentication on port 5005
e.g. Running a simple MLflow tracking server with authentication on
port 5005
```bash
$ docker run --detach --publish 5005:5005 mlflow-server --port 5005 --app-name basic-auth
```

To customise the initial administrative credentials, all three variables -
`AUTH_USERNAME`, `AUTH_PASSWORD` and `AUTH_DATABASE_URL` **must** be
provided. Else, the image will default to the default authentication settings.
To customise the initial administrative credentials, all three
variables - `AUTH_USERNAME`, `AUTH_PASSWORD` and `AUTH_DATABASE_URL`
**must** be provided. Else, the image will default to the default
authentication settings.

Once all three custom values for custom authentication are provided, the
MLflow server is started with all non-admin users having no permissions.
This means that until authorised by an admin, all users will not be able
to create/view/delete experiments nor create/log/delete runs.
Once all three custom values for custom authentication are provided,
the MLflow server is started with all non-admin users having no
permissions. This means that until authorised by an admin, all users
will not be able to create/view/delete experiments nor
create/log/delete runs.

> Out of the box, if you have authentication enabled but did not provide custom
authentication credentials, the default username and password are `admin`
and `password` respectively; the authentication database will be located at
`sqlite:///basic_auth.db`.
> Out of the box, if you have authentication enabled but did not
> provide custom authentication credentials, the default username and
> password are `admin` and `password` respectively; the authentication
> database will be located at `sqlite:///basic_auth.db`.
To configure the default values, the corresponding
values are to be changed within the `.ini` file at
To configure the default values, the corresponding values are to be
changed within the `.ini` file at
`$PWD/.local/lib/python3.12/site-packages/mlflow/server/auth/basic_auth.ini`.
For more information, refer to [MLflow's authentication documentation.](https://mlflow.org/docs/latest/auth/index.html)
For more information, refer to
[MLflow's authentication documentation.](https://mlflow.org/docs/latest/auth/index.html)

## Default Values

Expand All @@ -129,4 +133,3 @@ custom values are passed to the image:

* `ARTIFACT_URL`: `./mlruns`
* `DATABASE_URI`: `sqlite:///mlflow.db` (sqlite database at `./mlflow.db`)

12 changes: 6 additions & 6 deletions docker-builds/mlflow/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@ mlflow_config="$PWD/.local/lib/python3.12/site-packages/mlflow/server/auth/basic
case $ARTIFACT_BACKEND in
ECS)
if [[ -z "$AWS_ACCESS_KEY_ID" || -z "$AWS_SECRET_ACCESS_KEY" || -z "$MLFLOW_S3_ENDPOINT_URL" ]]; then
echo "Artefact backend set to $ARTIFACT_BACKEND, but the necessary S3 credentials are not found."
echo "Artifact backend set to $ARTIFACT_BACKEND, but the necessary S3 credentials are not found."
exit 1
fi
echo 'Setting up artefact server in ECS S3.'
echo 'Setting up artifact server in ECS S3.'
unset GOOGLE_APPLICATION_CREDENTIALS;;
GCS)
if [[ -z "$GOOGLE_APPLICATION_CREDENTIALS" ]]; then
echo "Artefact backend set to $ARTIFACT_BACKEND, but the necessary GCS credentials are not found."
echo "Artifact backend set to $ARTIFACT_BACKEND, but the necessary GCS credentials are not found."
exit 1
fi
echo 'Setting up artefact server in Google Cloud Storage.'
echo 'Setting up artifact server in Google Cloud Storage.'
unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY;;
"")
echo 'Artefact backend is not set. Defaulting to local filesystem.';;
echo 'Artifact backend is not set. Defaulting to local filesystem.';;
*)
echo 'Unknown artefact backend given. Defaulting to local filesystem.';;
echo 'Unknown artifact backend given. Defaulting to local filesystem.';;
esac

if [[ -n "$AUTH_USERNAME" && -n "$AUTH_PASSWORD" && -n "$AUTH_DATABASE_URL" ]]; then
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/code-server/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ apiVersion: v2
name: code-server-aisg
description: Deploys VSCode server with authentication in AISG-managed clusters.
type: application
version: 0.1.0
version: 0.1.1
Loading

0 comments on commit 57d6af8

Please sign in to comment.