CogStack · vladd-bit · Jun 19, 2025 · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -26,5 +26,12 @@
 	"workspaceMount": "source=${localWorkspaceFolder},target=${localWorkspaceFolder},type=bind",
 	"mounts": [
 		"source=${localEnv:HOME}/.cache/pip,target=/home/vscode/.cache/pip,type=bind"
-	]
+	],
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"charliermarsh.ruff"
+			]
+		}
+	}
 }
diff --git a/.dockerignore b/.dockerignore
@@ -1,10 +1,101 @@
 .DS_Store
-.idea
-__pycache__
-venv
-venv-test
 tmp_cat.log
 # skip the custom envs
 #envs
 # skip the models directory
-models
+models
+!models/examples
+models/examples/example-medcat-v1-model-pack
+
+## Default Python .dockerignore
+# Git
+.git
+.gitignore
+.gitattributes
+
+
+# CI
+.codeclimate.yml
+.travis.yml
+.taskcluster.yml
+
+# Docker
+docker/
+docker-compose.yml
+Dockerfile
+.docker
+.dockerignore
+
+# Byte-compiled / optimized / DLL files
+**/__pycache__/
+**/*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Virtual environment
+.env
+.venv/
+venv/
+
+# PyCharm
+.idea
+
+# Python mode for VIM
+.ropeproject
+**/.ropeproject
+
+# Vim swap files
+**/*.swp
+
+# VS Code
+.vscode/
+.devcontainer
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -15,6 +15,8 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
+    outputs:
+      image_version: ${{ steps.meta.outputs.version }}
     steps:        
       - name: Log in to Docker Hub
         uses: docker/login-action@v1
@@ -79,4 +81,27 @@ jobs:
           cache-to: type=registry,ref=cogstacksystems/medcat-service-gpu:buildcache,mode=max
 
       - name: Image digest
-        run: echo ${{ steps.docker_build.outputs.digest }}
+        run: echo ${{ steps.docker_build.outputs.digest }}
+  integration_test:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Run Test with example model pack
+        run: |
+          echo "🧪 Running Examples test..."
+          export IMAGE_TAG=${{ needs.build.outputs.image_version }} # Improve by running exact digest instead
+          echo "Running with image tag ${IMAGE_TAG}"
+          bash scripts/test_examples.sh
+
+      - name: Report test result
+        if: always()
+        run: |
+          if [ $? -eq 0 ]; then
+            echo "✅ Integration test PASSED"
+          else
+            echo "❌ Integration test FAILED"
+            exit 1
+          fi
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -27,18 +27,26 @@ jobs:
       - name: checkout repo
         uses: actions/checkout@v2
 
+      - name: Cache MedMen Model
+        id: cache-medmen
+        uses: actions/cache@v4
+        with:
+          path: models/medmen
+          key: medmen-${{ hashFiles('scripts/download_medmen.sh') }}
+
       - name: Install Python 3
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v5
         with:
           python-version: 3.9
+          cache: 'pip' # caching pip dependencies
 
       - name: Install dependencies
         run: |
           sudo apt-get install -y isort flake8
           python -m pip install --upgrade pip
           python -m pip install virtualenv setuptools
-          python -m pip install isort flake8 mypy stubs types-Flask types-simplejson types-setuptools
-          python -m pip install -r ./requirements.txt
+          python -m pip install isort flake8 mypy stubs types-Flask types-simplejson types-setuptools types-requests
+          python -m pip install -r ./requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/;
 
       - name: Check linting and types
         run: |

diff --git a/.gitignore b/.gitignore
@@ -11,6 +11,8 @@ __pycache__
 # skip the models directory
 models/*
 !models/*.sh
+!models/examples
+models/examples/example-medcat-v1-model-pack
 
 # tmp folder
 tmp/*

diff --git a/.ruff.toml b/.ruff.toml
@@ -0,0 +1,19 @@
+line-length = 120
+indent-width = 4
+
+[lint]
+# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults.
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+]
diff --git a/Dockerfile b/Dockerfile
@@ -5,14 +5,22 @@ ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
 # Set the python path and preapre the base layer
 WORKDIR /cat
 COPY ./requirements.txt /cat
-RUN pip install --upgrade pip
 
-# Install requirements for the app
-RUN pip3 install --no-cache-dir -r requirements.txt
+# Install Python dependencies
+ARG USE_CPU_TORCH=true
+# NOTE: Allow building without GPU so as to lower image size (GPU is disabled by default)
+RUN pip install -U pip && \
+    if [ "${USE_CPU_TORCH}" = "true" ]; then \
+        echo "Installing Torch for CPU, without GPU support " && \
+        pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/; \
+    else \
+        echo "Installing Torch with GPU support" && \
+        pip install --no-cache-dir -r requirements.txt; \
+    fi
 
 # Get the spacy model
 ARG SPACY_MODELS="en_core_web_sm en_core_web_md en_core_web_lg"
-RUN for spacy_model in ${SPACY_MODELS}; do python -m spacy download $spacy_model; done
+RUN for spacy_model in $SPACY_MODELS; do python -m spacy download $spacy_model; done
 
 # Copy the remaining files
 COPY . /cat

diff --git a/Dockerfile_gpu b/Dockerfile_gpu
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 
 # Get the spacy model
 ARG SPACY_MODELS="en_core_web_sm en_core_web_md en_core_web_lg"
-RUN for spacy_model in ${SPACY_MODELS}; do python -m spacy download $spacy_model; done
+RUN for spacy_model in $SPACY_MODELS; do python -m spacy download $spacy_model; done
 
 # Copy the remaining files
 COPY . /cat

diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ There are two scripts provided implementing starting the application:
 ## Running in a Docker container
 
 The recommended way to run the application is to use the provided Docker image. The Docker image can be either downloaded from the Docker Hub (`cogstacksystems/medcat-service:latest`) or build manually using the provided `Dockerfile`. 
-Please note that by default the built docker image will run the Flask application in 'production' mode running `start-service-prod.sh` script.
+Please note that by default the built docker image will run the Flask application in 'production' mode running `start_service_production.sh` script.
 
 To build the Docker image manually:
 
@@ -59,7 +59,7 @@ If you have a gpu and wish to use it, please change the `docker/docker-compose.y
 ### <span style="color:red">IMPORTANT !</span>
 If you wish to run this docker service manually, use the docker/docker-compose.yml file, execute `docker compose up -d` whilst in the `docker` folder. 
 
-Alternatively, an example script `./docker/run_example_medmen.sh` was provided to run the Docker container with MedCAT service. The script will download an example model (using the `./models/download_medmen.sh` script),it will use an example environment configuration, then it will build and start the service using the provided Docker Compose file, the service <b><span style="color:red">WONT WORK</span></b> without the model being present.
+Alternatively, an example script `./docker/run_example_medmen.sh` was provided to run the Docker container with MedCAT service. The script will download an example model (using the `./scripts/download_medmen.sh` script),it will use an example environment configuration, then it will build and start the service using the provided Docker Compose file, the service <b><span style="color:red">WONT WORK</span></b> without the model being present.
 
 All models should be mounted from the `models/` folder.
 

diff --git a/docker/docker-compose-example-medmen.yml b/docker/docker-compose-example-medmen.yml
@@ -10,6 +10,11 @@ services:
       - http_proxy=$HTTP_PROXY
       - https_proxy=$HTTPS_PROXY
       - no_proxy=$no_proxy
+      - ENABLE_MODEL_DOWNLOAD=true
+      - MODEL_NAME=medmen
+      - MODEL_VOCAB_URL=https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat 
+      - MODEL_CDB_URL=https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/cdb-medmen-v1.dat 
+      - MODEL_META_URL=https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/mc_status.zip 
     env_file:
       - ../env/app.env
       - ../env/medcat.env
@@ -21,27 +26,7 @@ services:
     networks:
       - cognet
 
-  nlp-medcat-service-production-deid:
-    ### Multiple images available:
-    ## default image, only CPU support: cogstacksystems/medcat-service:latest 
-    ## GPU support: cogstacksystems/medcat-service-gpu:latest
-    image: cogstacksystems/medcat-service:latest
-    restart: always
-    environment:
-      - http_proxy=$HTTP_PROXY
-      - https_proxy=$HTTPS_PROXY
-      - no_proxy=$no_proxy
-    env_file:
-      - ../env/app_deid.env
-      - ../env/medcat_deid.env
-    volumes:
-    - ../models:/cat/models/
-    ports:
-      - "5556:5000"
-    networks:
-      - cognet
-
+
 networks:
   cognet:
-    driver: bridge
     name: cogstack-net
diff --git a/docker/docker-compose.example.yml b/docker/docker-compose.example.yml
@@ -0,0 +1,10 @@
+name: cogstack-medcat-service
+services:
+  medcat-service:
+    image: cogstacksystems/medcat-service:${IMAGE_TAG-latest}
+    restart: unless-stopped
+    environment:
+      # Uses a preloaded model pack example inside the image
+      - APP_MEDCAT_MODEL_PACK=/cat/models/examples/example-medcat-v1-model-pack.zip 
+    ports:
+      - "5555:5000"
diff --git a/docker/run_example_medmen.sh b/docker/run_example_medmen.sh
@@ -1,43 +1,12 @@
 #!/usr/bin/env bash
 
-
-( cd ../models && bash download_medmen.sh )
-
-echo "Running docker-compose"
-DOCKER_COMPOSE_FILE="docker-compose-example-medmen.yml"
-docker compose -f ${DOCKER_COMPOSE_FILE} up -d
-
+DOCKER_COMPOSE_FILE="docker-compose.example-medmen.yml"
 # To run in a container run "export LOCALHOST_NAME=host.docker.internal"
 LOCALHOST_NAME=${LOCALHOST_NAME:-localhost}
 
-API="http://${LOCALHOST_NAME}:5555/api/info"
-
-MAX_RETRIES=12
-RETRY_DELAY=5
-COUNT=0
-
-while [ $COUNT -lt $MAX_RETRIES ]; do
-  echo "Checking service health on $API (Attempt $((COUNT+1))/$MAX_RETRIES)"
-  sleep $RETRY_DELAY
-  IS_READY=$(curl -s -o /dev/null -w "%{http_code}" $API)
-
-  if [ "$IS_READY" = "200" ]; then
-    echo "Service is ready!"
-    break
-  else
-    echo "Attempt $((COUNT+1))/$MAX_RETRIES: Not ready (HTTP $IS_READY)."
-    docker compose -f ${DOCKER_COMPOSE_FILE} logs
-    COUNT=$((COUNT+1))
-  fi
-done
-
-if [ $COUNT -eq $MAX_RETRIES ]; then
-  echo "❌ Service did not become ready after $MAX_RETRIES attempts."
-  exit 1
-fi
+echo "Running docker-compose"
+docker compose -f ${DOCKER_COMPOSE_FILE} up -d
 
-cat <<EOF
------------------------------------------------------------------
-MedCATService running on http://${LOCALHOST_NAME}:5555/
------------------------------------------------------------------
-EOF
+echo "Running test"
+source ../scripts/integration_test_functions.sh
+smoketest_medcat_service $LOCALHOST_NAME $DOCKER_COMPOSE_FILE