diff --git a/.envs/.local/.django b/.envs/.local/.django
index 3c2ac15..ddbdcc1 100755
--- a/.envs/.local/.django
+++ b/.envs/.local/.django
@@ -16,4 +16,7 @@ CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xb
 
 # Timeout fetch_data
 # ------------------------------------------------------------------------------
-FETCH_DATA_TIMEOUT=2
\ No newline at end of file
+FETCH_DATA_TIMEOUT=2
+
+
+HF_TOKEN=
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index ca9e27c..a3cd991 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,4 +171,6 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-# End of https://www.toptal.com/developers/gitignore/api/django
\ No newline at end of file
+# End of https://www.toptal.com/developers/gitignore/api/django
+
+llama3/llama-3.2/*
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 79cd5da..ce98fe5 100755
--- a/Makefile
+++ b/Makefile
@@ -38,6 +38,9 @@ build_date: ## Show build date
 build:  ## Build app using $(compose)
 	@docker compose -f $(compose) build
 
+build_llama:  ## Build app using $(compose) with llama enabled
+	@docker compose -f llama.$(compose) build
+
 build_no_cache:  ## Build app using $(compose)
 	@docker compose -f $(compose) build --no-cache
 
diff --git a/README.md b/README.md
index bc60963..725b37e 100644
--- a/README.md
+++ b/README.md
@@ -178,3 +178,7 @@ Signup at: [https://sentry.io/signup/?code=cookiecutter](https://sentry.io/signu
 ## Deployment
 
 See full [Docker deployment guide](http://cookiecutter-django.readthedocs.io/en/latest/deployment-with-docker.html).
+
+## Download Model
+
+https://github.com/scieloorg/markapi/wiki/Guia-r%C3%A1pido:-baixar-e-configurar-o-modelo-do-MarkAPI-para-marca%C3%A7%C3%A3o-de-refer%C3%AAncias-em-PDF
diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile
index 612c6e3..4fe37f1 100755
--- a/compose/local/django/Dockerfile
+++ b/compose/local/django/Dockerfile
@@ -27,16 +27,6 @@ RUN apt-get update && \
     update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 50 && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 
-# Instalar ninja-build y cmake
-RUN apt-get install -y ninja-build cmake
-
-# Configurar variables de entorno para compilar con BLAS y SIMD condicionalmente
-ARG ENABLE_OPTIMIZATIONS=true
-ARG ENABLE_OPTIMIZATIONS=true
-ENV CFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
-    CXXFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
-    CMAKE_ARGS="${ENABLE_OPTIMIZATIONS:+-DGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS}"
-
 # Actualizar pip, setuptools y wheel antes de instalar dependencias
 RUN python -m pip install --upgrade pip setuptools wheel
 
@@ -63,12 +53,6 @@ ENV PYTHONUNBUFFERED 1
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV BUILD_ENV ${BUILD_ENVIRONMENT}
 
-# Disable AVX support for llama-cpp-python if needed
-ARG DISABLE_AVX=false
-
-# Set the version of llama-cpp-python
-ARG LLAMA_VERSION=0.3.14
-
 WORKDIR ${APP_HOME}
 
 RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list
@@ -89,17 +73,10 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
 # copy python dependency wheels from python-build-stage
 COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
 
-# Use wheels to install python dependencies (excluding llama-cpp-python)
-RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") \
+# Use wheels to install python dependencies
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl") \
 	&& rm -rf /wheels/
 
-# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes with or without AVX support
-RUN if [ "${DISABLE_AVX}" = "true" ]; then \
-  CMAKE_ARGS='-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON' pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    else \
-  pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    fi
-
 COPY ./compose/production/django/entrypoint /entrypoint
 RUN sed -i 's/\r$//g' /entrypoint
 RUN chmod +x /entrypoint
diff --git a/compose/local/django/Dockerfile.llama b/compose/local/django/Dockerfile.llama
new file mode 100755
index 0000000..7719a34
--- /dev/null
+++ b/compose/local/django/Dockerfile.llama
@@ -0,0 +1,110 @@
+ARG PYTHON_VERSION=3.11-bullseye
+
+# define an alias for the specfic python version used in this file.
+FROM python:${PYTHON_VERSION} AS python
+
+# Python build stage
+FROM python AS python-build-stage
+
+ARG BUILD_ENVIRONMENT=local
+
+# Install apt packages
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # dependencies for building Python packages
+  build-essential \
+  git \
+  # psycopg2 dependencies
+  libpq-dev \
+  # other dependencies
+  software-properties-common \
+  libopenblas-dev \
+  libomp-dev
+
+# Instalar gcc-10 y g++-10 en Debian Bullseye
+RUN apt-get update && \
+    apt-get install -y gcc-10 g++-10 ninja-build cmake && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 50 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 50 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Configurar variables de entorno para compilar con BLAS y SIMD condicionalmente
+ARG ENABLE_OPTIMIZATIONS=true
+ENV CFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
+    CXXFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
+    CMAKE_ARGS="${ENABLE_OPTIMIZATIONS:+-DGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS}"
+
+# Actualizar pip, setuptools y wheel antes de instalar dependencias
+RUN python -m pip install --upgrade pip setuptools wheel
+
+# Requirements are installed here to ensure they will be cached.
+COPY ./requirements .
+
+# Create application directory to hold DOCX layouts
+COPY ./docx_layouts .
+
+# Update pip
+RUN python -m pip install --upgrade pip
+
+# Create Python Dependency and Sub-Dependency Wheels.
+RUN pip wheel --wheel-dir /usr/src/app/wheels  \
+  -r ${BUILD_ENVIRONMENT}.txt \
+  -r extra-llama.txt
+
+# Python 'run' stage
+FROM python AS python-run-stage
+
+ARG BUILD_ENVIRONMENT=local
+ARG APP_HOME=/app
+
+ENV PYTHONUNBUFFERED 1
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV BUILD_ENV ${BUILD_ENVIRONMENT}
+
+WORKDIR ${APP_HOME}
+
+RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list
+
+# Install required system dependencies
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # psycopg2 dependencies
+  libpq-dev \
+  # Translations dependencies
+  gettext \
+  # libreoffice for document conversions
+  default-jre libreoffice libreoffice-java-common ttf-mscorefonts-installer fonts-liberation fonts-liberation2 fonts-crosextra-carlito fonts-crosextra-caladea fonts-dejavu fonts-noto \
+  # cleaning up unused files
+  && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
+  && rm -rf /var/lib/apt/lists/*
+
+# All absolute dir copies ignore workdir instruction. All relative dir copies are wrt to the workdir instruction
+# copy python dependency wheels from python-build-stage
+COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
+
+# Use wheels to install python dependencies
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl") \
+	&& rm -rf /wheels/
+
+COPY ./compose/production/django/entrypoint /entrypoint
+RUN sed -i 's/\r$//g' /entrypoint
+RUN chmod +x /entrypoint
+
+COPY ./compose/local/django/start /start
+RUN sed -i 's/\r$//g' /start
+RUN chmod +x /start
+
+COPY ./compose/local/django/celery/worker/start /start-celeryworker
+RUN sed -i 's/\r$//g' /start-celeryworker
+RUN chmod +x /start-celeryworker
+
+COPY ./compose/local/django/celery/beat/start /start-celerybeat
+RUN sed -i 's/\r$//g' /start-celerybeat
+RUN chmod +x /start-celerybeat
+
+COPY ./compose/local/django/celery/flower/start /start-flower
+RUN sed -i 's/\r$//g' /start-flower
+RUN chmod +x /start-flower
+
+# copy application code to WORKDIR
+COPY . ${APP_HOME}
+
+ENTRYPOINT ["/entrypoint"]
diff --git a/compose/production/django/Dockerfile b/compose/production/django/Dockerfile
index 8ad536d..34b6e6f 100755
--- a/compose/production/django/Dockerfile
+++ b/compose/production/django/Dockerfile
@@ -39,12 +39,6 @@ ENV PYTHONUNBUFFERED 1
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV BUILD_ENV ${BUILD_ENVIRONMENT}
 
-# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
-ARG DISABLE_AVX=true
-
-# Set the version of llama-cpp-python
-ARG LLAMA_VERSION=0.3.14
-
 WORKDIR ${APP_HOME}
 
 RUN addgroup --system django \
@@ -68,15 +62,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
 # copy python dependency wheels from python-build-stage
 COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
 
-# use wheels to install python dependencies (excluding llama-cpp-python)
-RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") && rm -rf /wheels/
-
-# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
-RUN if [ "${DISABLE_AVX}" = "true" ]; then \
-  CMAKE_ARGS='-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON' pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    else \
-  pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    fi
+# use wheels to install python dependencies
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl") && rm -rf /wheels/
 
 COPY --chown=django:django ./compose/production/django/entrypoint /entrypoint
 RUN sed -i 's/\r$//g' /entrypoint
diff --git a/compose/production/django/Dockerfile.llama b/compose/production/django/Dockerfile.llama
new file mode 100755
index 0000000..8ad536d
--- /dev/null
+++ b/compose/production/django/Dockerfile.llama
@@ -0,0 +1,109 @@
+ARG PYTHON_VERSION=3.11-bullseye
+
+# define an alias for the specfic python version used in this file.
+FROM python:${PYTHON_VERSION} as python
+
+# Python build stage
+FROM python as python-build-stage
+
+ARG BUILD_ENVIRONMENT=production
+
+# Install apt packages
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # dependencies for building Python packages
+  git \
+  build-essential \
+  # psycopg2 dependencies
+  libpq-dev
+
+# Requirements are installed here to ensure they will be cached.
+COPY ./requirements .
+
+# Create application directory to hold DOCX layouts
+COPY ./docx_layouts .
+
+# Update pip
+RUN python -m pip install --upgrade pip
+
+# Create Python Dependency and Sub-Dependency Wheels.
+RUN pip wheel --wheel-dir /usr/src/app/wheels  \
+  -r ${BUILD_ENVIRONMENT}.txt
+
+# Python 'run' stage
+FROM python as python-run-stage
+
+ARG BUILD_ENVIRONMENT=production
+ARG APP_HOME=/app
+
+ENV PYTHONUNBUFFERED 1
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV BUILD_ENV ${BUILD_ENVIRONMENT}
+
+# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
+ARG DISABLE_AVX=true
+
+# Set the version of llama-cpp-python
+ARG LLAMA_VERSION=0.3.14
+
+WORKDIR ${APP_HOME}
+
+RUN addgroup --system django \
+    && adduser --system --ingroup django django
+
+RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list
+    
+# Install required system dependencies
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # psycopg2 dependencies
+  libpq-dev \
+  # libreoffice for document conversions
+  default-jre libreoffice libreoffice-java-common ttf-mscorefonts-installer fonts-liberation fonts-liberation2 fonts-crosextra-carlito fonts-crosextra-caladea fonts-dejavu fonts-noto \
+  # Translations dependencies
+  gettext \
+  # cleaning up unused files
+  && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
+  && rm -rf /var/lib/apt/lists/*
+
+# All absolute dir copies ignore workdir instruction. All relative dir copies are wrt to the workdir instruction
+# copy python dependency wheels from python-build-stage
+COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
+
+# use wheels to install python dependencies (excluding llama-cpp-python)
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") && rm -rf /wheels/
+
+# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
+RUN if [ "${DISABLE_AVX}" = "true" ]; then \
+  CMAKE_ARGS='-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON' pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
+    else \
+  pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
+    fi
+
+COPY --chown=django:django ./compose/production/django/entrypoint /entrypoint
+RUN sed -i 's/\r$//g' /entrypoint
+RUN chmod +x /entrypoint
+
+COPY --chown=django:django ./compose/production/django/start /start
+RUN sed -i 's/\r$//g' /start
+RUN chmod +x /start
+
+COPY --chown=django:django ./compose/production/django/celery/worker/start /start-celeryworker
+RUN sed -i 's/\r$//g' /start-celeryworker
+RUN chmod +x /start-celeryworker
+
+COPY --chown=django:django ./compose/production/django/celery/beat/start /start-celerybeat
+RUN sed -i 's/\r$//g' /start-celerybeat
+RUN chmod +x /start-celerybeat
+
+COPY ./compose/production/django/celery/flower/start /start-flower
+RUN sed -i 's/\r$//g' /start-flower
+RUN chmod +x /start-flower
+
+# copy application code to WORKDIR
+COPY --chown=django:django . ${APP_HOME}
+
+# make django owner of the WORKDIR directory as well.
+RUN chown django:django ${APP_HOME}
+
+USER django
+
+ENTRYPOINT ["/entrypoint"]
diff --git a/config/settings/base.py b/config/settings/base.py
index 51948db..d8e587e 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -24,8 +24,6 @@
 ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent.parent
 # core/
 APPS_DIR = ROOT_DIR / "core"
-LLAMA_MODEL_DIR = ROOT_DIR / "llama3/llama-3.2"
-MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
 
 env = environ.Env()
 READ_DOT_ENV_FILE = env.bool("DJANGO_READ_DOT_ENV_FILE", default=False)
@@ -294,4 +292,9 @@
     "ACCESS_TOKEN_LIFETIME": timedelta(minutes=60),
     "REFRESH_TOKEN_LIFETIME": timedelta(days=1),
     # "AUTH_TOKEN_CLASSES": ("rest_framework_simplejwt.tokens.AccessToken",),
-}
\ No newline at end of file
+}
+
+# LLAMA
+LLAMA_ENABLED = env.bool("LLAMA_ENABLED", default=False)
+LLAMA_MODEL_DIR = ROOT_DIR / "llama3/llama-3.2"
+MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
\ No newline at end of file
diff --git a/llama.local.yml b/llama.local.yml
new file mode 100644
index 0000000..6af50cb
--- /dev/null
+++ b/llama.local.yml
@@ -0,0 +1,77 @@
+services:
+  django: &django
+    build:
+      context: .
+      dockerfile: ./compose/local/django/Dockerfile.llama
+      args:
+        BUILD_ENVIRONMENT: local
+    image: markapi_local_django
+    container_name: markapi_local_django
+    depends_on:
+      - redis
+      - postgres
+      - mailhog
+    volumes:
+      - .:/app:z
+    env_file:
+      - ./.envs/.local/.django
+      - ./.envs/.local/.postgres
+    ports:
+      - "8009:8000"
+    command: /start
+
+  mailhog:
+    image: mailhog/mailhog:v1.0.0
+    container_name: markapi_local_mailhog
+    ports:
+      - "8029:8025"
+
+  postgres:
+    build:
+      context: .
+      dockerfile: ./compose/production/postgres/Dockerfile
+    image: markapi_local_postgres
+    container_name: markapi_local_postgres
+    volumes:
+      - ../scms_data/markapi/data_dev:/var/lib/postgresql/data:Z
+      - ../scms_data/markapi/data_dev_backup:/backups:z
+    ports:
+      - "5439:5432"
+    env_file:
+      - ./.envs/.local/.postgres
+
+  redis:
+    image: redis:6
+    container_name: markapi_local_redis
+    ports:
+      - "6399:6379"
+
+  celeryworker:
+    <<: *django
+    image: markapi_local_celeryworker
+    container_name: markapi_local_celeryworker
+    depends_on:
+      - redis
+      - postgres
+      - mailhog
+    ports: []
+    command: /start-celeryworker
+
+  celerybeat:
+    <<: *django
+    image: markapi_local_celerybeat
+    container_name: markapi_local_celerybeat
+    depends_on:
+      - redis
+      - postgres
+      - mailhog
+    ports: []
+    command: /start-celerybeat
+
+  flower:
+    <<: *django
+    image: markapi_local_flower
+    container_name: markapi_local_flower
+    ports:
+      - "5559:5555"
+    command: /start-flower
diff --git a/llama3/download_model.py b/llama3/download_model.py
deleted file mode 100644
index 0b93f8f..0000000
--- a/llama3/download_model.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from huggingface_hub import login
-from huggingface_hub import hf_hub_download
-
-
-HF_TOKEN = 'INTRODUCE_TOKEN'
-
-login(token=HF_TOKEN)
-
-LLAMA_MODEL_DIR = "llama3/llama-3.2"
-MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
-repo_id = 'hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF'
-filename = MODEL_LLAMA
-local_dir = LLAMA_MODEL_DIR
-
-downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
\ No newline at end of file
diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 2e56f16..e25d4f9 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -1,22 +1,58 @@
+from config.settings.base import LLAMA_ENABLED, LLAMA_MODEL_DIR, MODEL_LLAMA
 
-from config.settings.base import LLAMA_MODEL_DIR, MODEL_LLAMA
-from llama_cpp import Llama
 import os
 
+
+class LlamaDisabledError(Exception):
+    pass
+
+class LlamaModelNotFoundError(FileNotFoundError):
+    pass
+
+class LlamaNotInstalledError(ImportError):
+    pass
+
 class GenericLlama:
+  # Singleton pattern to cache the LLaMA model instance
+  _cached_llm = None
 
-  def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5, top_p=0.5): 
-    self. llm = Llama(model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA), n_ctx=4000)
-    self. messages = messages
+  def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5, top_p=0.5):
+    self.messages = messages
     self.response_format = response_format
     self.max_tokens = max_tokens
     self.temperature = temperature
     self.top_p = top_p
 
+    if not LLAMA_ENABLED:
+      raise LlamaDisabledError("LLaMA is disabled in settings.")
+    
+    if GenericLlama._cached_llm is None:
+      try:
+         from llama_cpp import Llama
+      except ImportError as e:
+         raise LlamaNotInstalledError("The 'llama-cpp-python' package is not installed. Please use the llama-activated Docker image (Dockerfile.llama).") from e
+
+      model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA)
+      if not os.path.isfile(model_path):
+        raise LlamaModelNotFoundError(f"LLaMA model file not found at {model_path}. Please ensure the model is downloaded and the path is correct.")
+
+      try:
+        GenericLlama._cached_llm = Llama(model_path=model_path, n_ctx=max_tokens)
+      except Exception as e:
+        raise RuntimeError(f"Failed to initialize LLaMA model: {e}") from e
+      
+    self.llm = GenericLlama._cached_llm
+
   def run(self, user_input):
     input = self.messages.copy()
     input.append({
       'role': 'user',
       'content': user_input
     })
-    return self.llm.create_chat_completion(messages=input, response_format=self.response_format, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
\ No newline at end of file
+    return self.llm.create_chat_completion(
+       messages=input, 
+       response_format=self.response_format, 
+       max_tokens=self.max_tokens, 
+       temperature=self.temperature, 
+       top_p=self.top_p
+    )
diff --git a/reference/management/commands/download_model.py b/reference/management/commands/download_model.py
new file mode 100644
index 0000000..39f99b7
--- /dev/null
+++ b/reference/management/commands/download_model.py
@@ -0,0 +1,48 @@
+import os
+from pathlib import Path
+
+from django.core.management.base import BaseCommand, CommandError
+from huggingface_hub import hf_hub_download, login
+
+
+class Command(BaseCommand):
+    help = "Download the model from HuggingFace"
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--dir",
+            type=str,
+            default="llama3/llama-3.2",
+            help="Directory to download the model",
+        )
+        parser.add_argument(
+            "--repo",
+            type=str,
+            default="hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF",
+        )
+        parser.add_argument(
+            "--filename",
+            type=str,
+            default="llama-3.2-3b-instruct-q4_k_m.gguf",
+            help="Model name",
+        )
+        parser.add_argument("--force", action="store_true", help="Force download")
+
+    def handle(self, *args, **options):
+        token = os.getenv("HF_TOKEN")
+        if not token:
+            raise CommandError("You need to set the HF_TOKEN environment variable")
+        login(token=token, add_to_git_credential=False)
+
+        target_dir = Path(options["dir"])
+        target_dir.mkdir(parents=True, exist_ok=True)
+
+        downloaded_file = hf_hub_download(
+            repo_id=options["repo"],
+            filename=options["filename"],
+            local_dir=str(target_dir),
+            local_dir_use_symlinks=False,
+            force_download=options["force"],
+            resume_download=True,
+        )
+        self.stdout.write(self.style.SUCCESS(f"Downloaded {downloaded_file}"))
diff --git a/reference/marker.py b/reference/marker.py
index 63582e9..dda75ec 100644
--- a/reference/marker.py
+++ b/reference/marker.py
@@ -1,14 +1,46 @@
-from llama3.generic_llama import GenericLlama
+import logging
 
+from llama3.generic_llama import (
+    GenericLlama,
+    LlamaDisabledError,
+    LlamaNotInstalledError,
+    LlamaModelNotFoundError,
+)
 from reference.config import MESSAGES, RESPONSE_FORMAT
+from tracker.models import GeneralEvent
 
 
 def mark_reference(reference_text):
-    reference_marker = GenericLlama(MESSAGES, RESPONSE_FORMAT)
-    output = reference_marker.run(reference_text)
-    # output['choices'][0]['message']['content']
-    for item in output["choices"]:
-        yield item["message"]["content"]
+    try:
+        reference_marker = GenericLlama(MESSAGES, RESPONSE_FORMAT)
+        output = reference_marker.run(reference_text)
+        for item in output.get("choices", []):
+            yield item.get("message", {}).get("content", "")
+
+    except (LlamaDisabledError, LlamaNotInstalledError, LlamaModelNotFoundError) as e:
+        logging.error(f"Error marking reference: {e}")
+        GeneralEvent.create(
+            exception=e,
+            exc_traceback=None,
+            item=None,
+            action="mark_reference",
+            detail={"reference_text": reference_text}
+        )
+        if isinstance(e, LlamaModelNotFoundError):
+            yield f"Llama model file not found: {str(e)}"
+        else:
+            yield f"Llama model is not available: {str(e)}"
+
+    except Exception as e:
+        logging.error(f"Unexpected error marking reference: {e}")
+        GeneralEvent.create(
+            exception=e,
+            exc_traceback=None,
+            item=None,
+            action="mark_reference",
+            detail={"reference_text": reference_text}
+        )
+        yield f"An unexpected error occurred: {str(e)}"
 
 
 def mark_references(reference_block):
diff --git a/reference/models.py b/reference/models.py
index b38dd2a..e5e39aa 100755
--- a/reference/models.py
+++ b/reference/models.py
@@ -10,7 +10,7 @@
 from django.core.validators import MinValueValidator, MaxValueValidator
 from core.forms import CoreAdminModelForm
 
-# Create your models here
+
 class Reference(CommonControlField, ClusterableModel):
     mixed_citation = models.TextField(_("Mixed Citation"), null=False, blank=True)
 
@@ -18,7 +18,7 @@ class Reference(CommonControlField, ClusterableModel):
 
     panels = [
         FieldPanel('mixed_citation'),
-        InlinePanel('element_citation')
+        InlinePanel('element_citation', label=_("Cited Elements"))
     ]
 
     base_form_class = CoreAdminModelForm
diff --git a/reference/wagtail_hooks.py b/reference/wagtail_hooks.py
index 8730e19..c7c5255 100644
--- a/reference/wagtail_hooks.py
+++ b/reference/wagtail_hooks.py
@@ -1,18 +1,11 @@
 from django.http import HttpResponseRedirect
 from django.utils.translation import gettext_lazy as _
-from wagtail_modeladmin.options import (
-    ModelAdmin,
-    ModelAdminGroup,
-    modeladmin_register,
-)
-from wagtail_modeladmin.views import CreateView
-from wagtail.admin.menu import MenuItem
+from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import CreateView, SnippetViewSet
 
+from reference.models import Reference
 from reference.tasks import get_reference
 
-from reference.models import ( 
-    Reference
-)
 
 class ReferenceCreateView(CreateView):
     def form_valid(self, form):
@@ -38,18 +31,16 @@ def form_valid(self, form):
         
 
 
-class ReferenceAdmin(ModelAdmin):
+class ReferenceAdmin(SnippetViewSet):
     model = Reference
-    create_view_class = ReferenceCreateView
-    #edit_view_class = ArticleDocxEditView
+    add_view_class = ReferenceCreateView
     menu_label = _("Reference")
     menu_icon = "folder"
     menu_order = 1
-    add_to_settings_menu = False  # or True to add your model to the Settings sub-menu
     exclude_from_explorer = (
-        False  # or True to exclude pages of this type from Wagtail's explorer view
+        False
     )
     list_per_page = 20
+    add_to_admin_menu = True
 
-
-modeladmin_register(ReferenceAdmin)
\ No newline at end of file
+register_snippet(ReferenceAdmin)
\ No newline at end of file
diff --git a/requirements/base.txt b/requirements/base.txt
index 32d29e8..e50947f 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -3,6 +3,7 @@ redis==5.0.1  # https://github.com/redis/redis-py
 celery==5.3.6  # pyup: < 6.0  # https://github.com/celery/celery
 flower==2.0.1  # https://github.com/mher/flower
 hiredis==2.2.3  # https://github.com/redis/hiredis-py
+
 # Django
 # ------------------------------------------------------------------------------
 django==5.0.8
@@ -26,7 +27,6 @@ wagtail-autocomplete==0.11.0
 # ------------------------------------------------------------------------------
 lxml==4.9.4 # https://github.com/lxml/lxml
 
-
 # Kombu
 # ------------------------------------------------------------------------------
 kombu==5.4.2
@@ -35,13 +35,6 @@ kombu==5.4.2
 # ------------------------------------------------------------------------------
 tenacity==8.2.3  # https://pypi.org/project/tenacity/
 
-whitenoise==6.6.0  # https://github.com/evansd/whitenoise
-
-# Llama
-# ------------------------------------------------------------------------------
-huggingface-hub==0.26.1
-llama-cpp-python==0.3.14
-
 # Packtools
 # ------------------------------------------------------------------------------
 git+https://git@github.com/scieloorg/packtools@4.12.0#egg=packtools
diff --git a/requirements/extra-llama.txt b/requirements/extra-llama.txt
new file mode 100644
index 0000000..c949747
--- /dev/null
+++ b/requirements/extra-llama.txt
@@ -0,0 +1,4 @@
+# Llama
+# ------------------------------------------------------------------------------
+huggingface-hub==0.26.1
+llama-cpp-python==0.3.14