From 4c98845fbce0205dc8e8fdbdc4a4d47203a123d6 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:23:29 -0300
Subject: [PATCH 01/27] gitignore update

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ca9e27c..a3cd991 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,4 +171,6 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-# End of https://www.toptal.com/developers/gitignore/api/django
\ No newline at end of file
+# End of https://www.toptal.com/developers/gitignore/api/django
+
+llama3/llama-3.2/*
\ No newline at end of file

From 7fcb03e08912575e4b0eb5c55fb6a9dcc300b2b4 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:24:11 -0300
Subject: [PATCH 02/27] Update modeladmin to snippetview

---
 reference/wagtail_hooks.py | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/reference/wagtail_hooks.py b/reference/wagtail_hooks.py
index 8730e19..c7c5255 100644
--- a/reference/wagtail_hooks.py
+++ b/reference/wagtail_hooks.py
@@ -1,18 +1,11 @@
 from django.http import HttpResponseRedirect
 from django.utils.translation import gettext_lazy as _
-from wagtail_modeladmin.options import (
-    ModelAdmin,
-    ModelAdminGroup,
-    modeladmin_register,
-)
-from wagtail_modeladmin.views import CreateView
-from wagtail.admin.menu import MenuItem
+from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import CreateView, SnippetViewSet
 
+from reference.models import Reference
 from reference.tasks import get_reference
 
-from reference.models import ( 
-    Reference
-)
 
 class ReferenceCreateView(CreateView):
     def form_valid(self, form):
@@ -38,18 +31,16 @@ def form_valid(self, form):
         
 
 
-class ReferenceAdmin(ModelAdmin):
+class ReferenceAdmin(SnippetViewSet):
     model = Reference
-    create_view_class = ReferenceCreateView
-    #edit_view_class = ArticleDocxEditView
+    add_view_class = ReferenceCreateView
     menu_label = _("Reference")
     menu_icon = "folder"
     menu_order = 1
-    add_to_settings_menu = False  # or True to add your model to the Settings sub-menu
     exclude_from_explorer = (
-        False  # or True to exclude pages of this type from Wagtail's explorer view
+        False
     )
     list_per_page = 20
+    add_to_admin_menu = True
 
-
-modeladmin_register(ReferenceAdmin)
\ No newline at end of file
+register_snippet(ReferenceAdmin)
\ No newline at end of file

From 050297e1e773b5a4fdc8c8daacaa7c341995e8b1 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:24:30 -0300
Subject: [PATCH 03/27] Add label em inlinePanel

---
 reference/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/reference/models.py b/reference/models.py
index b38dd2a..e5e39aa 100755
--- a/reference/models.py
+++ b/reference/models.py
@@ -10,7 +10,7 @@
 from django.core.validators import MinValueValidator, MaxValueValidator
 from core.forms import CoreAdminModelForm
 
-# Create your models here
+
 class Reference(CommonControlField, ClusterableModel):
     mixed_citation = models.TextField(_("Mixed Citation"), null=False, blank=True)
 
@@ -18,7 +18,7 @@ class Reference(CommonControlField, ClusterableModel):
 
     panels = [
         FieldPanel('mixed_citation'),
-        InlinePanel('element_citation')
+        InlinePanel('element_citation', label=_("Cited Elements"))
     ]
 
     base_form_class = CoreAdminModelForm

From b0f1a31dbd0c07e9a3f5a4cd3c22e9ddef6eb5d4 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:25:47 -0300
Subject: [PATCH 04/27] Atualiza o download do modelo para command

---
 .envs/.local/.django                          |  5 ++-
 llama3/download_model.py                      | 15 --------
 .../management/commands/download_model.py     | 34 +++++++++++++++++++
 3 files changed, 38 insertions(+), 16 deletions(-)
 delete mode 100644 llama3/download_model.py
 create mode 100644 reference/management/commands/download_model.py

diff --git a/.envs/.local/.django b/.envs/.local/.django
index 3c2ac15..a14e8e3 100755
--- a/.envs/.local/.django
+++ b/.envs/.local/.django
@@ -16,4 +16,7 @@ CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xb
 
 # Timeout fetch_data
 # ------------------------------------------------------------------------------
-FETCH_DATA_TIMEOUT=2
\ No newline at end of file
+FETCH_DATA_TIMEOUT=2
+
+
+HF_TOKEN=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
\ No newline at end of file
diff --git a/llama3/download_model.py b/llama3/download_model.py
deleted file mode 100644
index 0b93f8f..0000000
--- a/llama3/download_model.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from huggingface_hub import login
-from huggingface_hub import hf_hub_download
-
-
-HF_TOKEN = 'INTRODUCE_TOKEN'
-
-login(token=HF_TOKEN)
-
-LLAMA_MODEL_DIR = "llama3/llama-3.2"
-MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
-repo_id = 'hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF'
-filename = MODEL_LLAMA
-local_dir = LLAMA_MODEL_DIR
-
-downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
\ No newline at end of file
diff --git a/reference/management/commands/download_model.py b/reference/management/commands/download_model.py
new file mode 100644
index 0000000..e38a678
--- /dev/null
+++ b/reference/management/commands/download_model.py
@@ -0,0 +1,34 @@
+import os
+from pathlib import Path
+
+from django.core.management.base import BaseCommand, CommandError
+from huggingface_hub import hf_hub_download, login
+
+
+class Command(BaseCommand):
+    help = 'Download the model from HuggingFace'
+
+    def add_arguments(self, parser):
+        parser.add_argument('--dir', type=str, default='llama3/llama-3.2', help='Directory to download the model')
+        parser.add_argument('--repo', type=str, default='hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF')
+        parser.add_argument('--filename', type=str, default='llama-3.2-3b-instruct-q4_k_m.gguf', help='Model name')
+        parser.add_argument('--force', action='store_true', help='Force download')
+
+    def handle(self, *args, **options):
+        token = os.getenv('HF_TOKEN')
+        if not token:
+            raise CommandError('You need to set the HF_TOKEN environment variable')
+        login(token=token, add_to_git_credential=False)
+
+        target_dir = Path(options['dir'])
+        target_dir.mkdir(parents=True, exist_ok=True)
+
+        downloaded_file = hf_hub_download(
+            repo_id=options['repo'],
+            filename=options['filename'],
+            local_dir=str(target_dir),
+            local_dir_use_symlinks=False,
+            force_download=options['force'],
+            resume_download=True,
+        )
+        self.stdout.write(self.style.SUCCESS(f'Downloaded {downloaded_file}'))

From a32aadf8d52c45d3679dc0424d954475fb0997a1 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:28:07 -0300
Subject: [PATCH 05/27] Fix __init__

---
 llama3/generic_llama.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 2e56f16..5cc9d41 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -6,8 +6,8 @@
 class GenericLlama:
 
   def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5, top_p=0.5): 
-    self. llm = Llama(model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA), n_ctx=4000)
-    self. messages = messages
+    self.llm = Llama(model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA), n_ctx=4000)
+    self.messages = messages
     self.response_format = response_format
     self.max_tokens = max_tokens
     self.temperature = temperature

From a4161d891946baab5daa33f4e27a6c876af601c6 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:28:23 -0300
Subject: [PATCH 06/27] Move LLAMA_MODEL_DIR e MODEL_LLAMA

---
 config/settings/base.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/config/settings/base.py b/config/settings/base.py
index 51948db..33db1ba 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -24,8 +24,6 @@
 ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent.parent
 # core/
 APPS_DIR = ROOT_DIR / "core"
-LLAMA_MODEL_DIR = ROOT_DIR / "llama3/llama-3.2"
-MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
 
 env = environ.Env()
 READ_DOT_ENV_FILE = env.bool("DJANGO_READ_DOT_ENV_FILE", default=False)
@@ -294,4 +292,8 @@
     "ACCESS_TOKEN_LIFETIME": timedelta(minutes=60),
     "REFRESH_TOKEN_LIFETIME": timedelta(days=1),
     # "AUTH_TOKEN_CLASSES": ("rest_framework_simplejwt.tokens.AccessToken",),
-}
\ No newline at end of file
+}
+
+# LLAMA
+LLAMA_MODEL_DIR = ROOT_DIR / "llama3/llama-3.2"
+MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
\ No newline at end of file

From 5de0daf797ebc5872eebf7f2017a3ed02641b1a7 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Mon, 15 Sep 2025 20:29:31 -0300
Subject: [PATCH 07/27] black

---
 .../management/commands/download_model.py     | 38 +++++++++++++------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/reference/management/commands/download_model.py b/reference/management/commands/download_model.py
index e38a678..39f99b7 100644
--- a/reference/management/commands/download_model.py
+++ b/reference/management/commands/download_model.py
@@ -6,29 +6,43 @@
 
 
 class Command(BaseCommand):
-    help = 'Download the model from HuggingFace'
+    help = "Download the model from HuggingFace"
 
     def add_arguments(self, parser):
-        parser.add_argument('--dir', type=str, default='llama3/llama-3.2', help='Directory to download the model')
-        parser.add_argument('--repo', type=str, default='hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF')
-        parser.add_argument('--filename', type=str, default='llama-3.2-3b-instruct-q4_k_m.gguf', help='Model name')
-        parser.add_argument('--force', action='store_true', help='Force download')
+        parser.add_argument(
+            "--dir",
+            type=str,
+            default="llama3/llama-3.2",
+            help="Directory to download the model",
+        )
+        parser.add_argument(
+            "--repo",
+            type=str,
+            default="hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF",
+        )
+        parser.add_argument(
+            "--filename",
+            type=str,
+            default="llama-3.2-3b-instruct-q4_k_m.gguf",
+            help="Model name",
+        )
+        parser.add_argument("--force", action="store_true", help="Force download")
 
     def handle(self, *args, **options):
-        token = os.getenv('HF_TOKEN')
+        token = os.getenv("HF_TOKEN")
         if not token:
-            raise CommandError('You need to set the HF_TOKEN environment variable')
+            raise CommandError("You need to set the HF_TOKEN environment variable")
         login(token=token, add_to_git_credential=False)
 
-        target_dir = Path(options['dir'])
+        target_dir = Path(options["dir"])
         target_dir.mkdir(parents=True, exist_ok=True)
 
         downloaded_file = hf_hub_download(
-            repo_id=options['repo'],
-            filename=options['filename'],
+            repo_id=options["repo"],
+            filename=options["filename"],
             local_dir=str(target_dir),
             local_dir_use_symlinks=False,
-            force_download=options['force'],
+            force_download=options["force"],
             resume_download=True,
         )
-        self.stdout.write(self.style.SUCCESS(f'Downloaded {downloaded_file}'))
+        self.stdout.write(self.style.SUCCESS(f"Downloaded {downloaded_file}"))

From 71c0a4be49e5f48fe75876d343b65f934f7daf4b Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel <samuelveigarangel@hotmail.com>
Date: Thu, 25 Sep 2025 20:48:11 -0300
Subject: [PATCH 08/27] remove token HF_TOKEN

---
 .envs/.local/.django | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.envs/.local/.django b/.envs/.local/.django
index a14e8e3..ddbdcc1 100755
--- a/.envs/.local/.django
+++ b/.envs/.local/.django
@@ -19,4 +19,4 @@ CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xb
 FETCH_DATA_TIMEOUT=2
 
 
-HF_TOKEN=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
\ No newline at end of file
+HF_TOKEN=
\ No newline at end of file

From f5caf9fff29d35df6315b810bd7d55da965fded2 Mon Sep 17 00:00:00 2001
From: Samuel Veiga Rangel
 <82840278+samuelveigarangel@users.noreply.github.com>
Date: Thu, 18 Sep 2025 01:13:19 -0300
Subject: [PATCH 09/27] Update README.md

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index bc60963..725b37e 100644
--- a/README.md
+++ b/README.md
@@ -178,3 +178,7 @@ Signup at: [https://sentry.io/signup/?code=cookiecutter](https://sentry.io/signu
 ## Deployment
 
 See full [Docker deployment guide](http://cookiecutter-django.readthedocs.io/en/latest/deployment-with-docker.html).
+
+## Download Model
+
+https://github.com/scieloorg/markapi/wiki/Guia-r%C3%A1pido:-baixar-e-configurar-o-modelo-do-MarkAPI-para-marca%C3%A7%C3%A3o-de-refer%C3%AAncias-em-PDF

From 648760b9fe8dc66bd5c0d3ac6e151426db80ca79 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:01:45 -0300
Subject: [PATCH 10/27] remove dep repetida no mesmo arquivo

---
 requirements/base.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/requirements/base.txt b/requirements/base.txt
index 32d29e8..f19e4d2 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -3,6 +3,7 @@ redis==5.0.1  # https://github.com/redis/redis-py
 celery==5.3.6  # pyup: < 6.0  # https://github.com/celery/celery
 flower==2.0.1  # https://github.com/mher/flower
 hiredis==2.2.3  # https://github.com/redis/hiredis-py
+
 # Django
 # ------------------------------------------------------------------------------
 django==5.0.8
@@ -26,7 +27,6 @@ wagtail-autocomplete==0.11.0
 # ------------------------------------------------------------------------------
 lxml==4.9.4 # https://github.com/lxml/lxml
 
-
 # Kombu
 # ------------------------------------------------------------------------------
 kombu==5.4.2
@@ -35,8 +35,6 @@ kombu==5.4.2
 # ------------------------------------------------------------------------------
 tenacity==8.2.3  # https://pypi.org/project/tenacity/
 
-whitenoise==6.6.0  # https://github.com/evansd/whitenoise
-
 # Llama
 # ------------------------------------------------------------------------------
 huggingface-hub==0.26.1

From 6865310942f6a93a6e7ee99c5d7834a06abe884f Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:02:00 -0300
Subject: [PATCH 11/27] remove dep llama e hugging face

---
 requirements/base.txt | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/requirements/base.txt b/requirements/base.txt
index f19e4d2..e50947f 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -35,11 +35,6 @@ kombu==5.4.2
 # ------------------------------------------------------------------------------
 tenacity==8.2.3  # https://pypi.org/project/tenacity/
 
-# Llama
-# ------------------------------------------------------------------------------
-huggingface-hub==0.26.1
-llama-cpp-python==0.3.14
-
 # Packtools
 # ------------------------------------------------------------------------------
 git+https://git@github.com/scieloorg/packtools@4.12.0#egg=packtools

From 51dd32e3c2bf31ad002b4d6abcc837480ba7fe83 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:02:25 -0300
Subject: [PATCH 12/27] =?UTF-8?q?cria=20requirement=20adicional=20espec?=
 =?UTF-8?q?=C3=ADfico=20para=20llama=20e=20huggingface?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements/extra-llama.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 requirements/extra-llama.txt

diff --git a/requirements/extra-llama.txt b/requirements/extra-llama.txt
new file mode 100644
index 0000000..c949747
--- /dev/null
+++ b/requirements/extra-llama.txt
@@ -0,0 +1,4 @@
+# Llama
+# ------------------------------------------------------------------------------
+huggingface-hub==0.26.1
+llama-cpp-python==0.3.14

From 2b00c39364de1f997ad48973488076387e10a44e Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:05:10 -0300
Subject: [PATCH 13/27] =?UTF-8?q?Altera=20Dockerfile=20padr=C3=A3o=20(loca?=
 =?UTF-8?q?l=20e=20production)=20para=20n=C3=A3o=20terem=20llama?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 compose/local/django/Dockerfile      | 24 ++----------------------
 compose/production/django/Dockerfile | 17 ++---------------
 2 files changed, 4 insertions(+), 37 deletions(-)

diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile
index 612c6e3..3947566 100755
--- a/compose/local/django/Dockerfile
+++ b/compose/local/django/Dockerfile
@@ -30,13 +30,6 @@ RUN apt-get update && \
 # Instalar ninja-build y cmake
 RUN apt-get install -y ninja-build cmake
 
-# Configurar variables de entorno para compilar con BLAS y SIMD condicionalmente
-ARG ENABLE_OPTIMIZATIONS=true
-ARG ENABLE_OPTIMIZATIONS=true
-ENV CFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
-    CXXFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
-    CMAKE_ARGS="${ENABLE_OPTIMIZATIONS:+-DGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS}"
-
 # Actualizar pip, setuptools y wheel antes de instalar dependencias
 RUN python -m pip install --upgrade pip setuptools wheel
 
@@ -63,12 +56,6 @@ ENV PYTHONUNBUFFERED 1
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV BUILD_ENV ${BUILD_ENVIRONMENT}
 
-# Disable AVX support for llama-cpp-python if needed
-ARG DISABLE_AVX=false
-
-# Set the version of llama-cpp-python
-ARG LLAMA_VERSION=0.3.14
-
 WORKDIR ${APP_HOME}
 
 RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list
@@ -89,17 +76,10 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
 # copy python dependency wheels from python-build-stage
 COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
 
-# Use wheels to install python dependencies (excluding llama-cpp-python)
-RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") \
+# Use wheels to install python dependencies
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl") \
 	&& rm -rf /wheels/
 
-# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes with or without AVX support
-RUN if [ "${DISABLE_AVX}" = "true" ]; then \
-  CMAKE_ARGS='-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON' pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    else \
-  pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    fi
-
 COPY ./compose/production/django/entrypoint /entrypoint
 RUN sed -i 's/\r$//g' /entrypoint
 RUN chmod +x /entrypoint
diff --git a/compose/production/django/Dockerfile b/compose/production/django/Dockerfile
index 8ad536d..34b6e6f 100755
--- a/compose/production/django/Dockerfile
+++ b/compose/production/django/Dockerfile
@@ -39,12 +39,6 @@ ENV PYTHONUNBUFFERED 1
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV BUILD_ENV ${BUILD_ENVIRONMENT}
 
-# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
-ARG DISABLE_AVX=true
-
-# Set the version of llama-cpp-python
-ARG LLAMA_VERSION=0.3.14
-
 WORKDIR ${APP_HOME}
 
 RUN addgroup --system django \
@@ -68,15 +62,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
 # copy python dependency wheels from python-build-stage
 COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
 
-# use wheels to install python dependencies (excluding llama-cpp-python)
-RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") && rm -rf /wheels/
-
-# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
-RUN if [ "${DISABLE_AVX}" = "true" ]; then \
-  CMAKE_ARGS='-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON' pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    else \
-  pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
-    fi
+# use wheels to install python dependencies
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl") && rm -rf /wheels/
 
 COPY --chown=django:django ./compose/production/django/entrypoint /entrypoint
 RUN sed -i 's/\r$//g' /entrypoint

From 67d1f7056c0a14bc755cb2f506c72dfd42cb4a25 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:09:21 -0300
Subject: [PATCH 14/27] =?UTF-8?q?Cria=20vari=C3=A1vel=20settings=20para=20?=
 =?UTF-8?q?indicar=20se=20Llama=20est=C3=A1=20ou=20n=C3=A3o=20ativado?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config/settings/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/settings/base.py b/config/settings/base.py
index 33db1ba..d8e587e 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -295,5 +295,6 @@
 }
 
 # LLAMA
+LLAMA_ENABLED = env.bool("LLAMA_ENABLED", default=False)
 LLAMA_MODEL_DIR = ROOT_DIR / "llama3/llama-3.2"
 MODEL_LLAMA = "llama-3.2-3b-instruct-q4_k_m.gguf"
\ No newline at end of file

From 78a42aa17fe561feb7ae3f52889c0bd22b5ac5cd Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:09:54 -0300
Subject: [PATCH 15/27] Cria um yml para usar fazer deploy local de imagem com
 llama

---
 llama.local.yml | 77 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 llama.local.yml

diff --git a/llama.local.yml b/llama.local.yml
new file mode 100644
index 0000000..6af50cb
--- /dev/null
+++ b/llama.local.yml
@@ -0,0 +1,77 @@
+services:
+  django: &django
+    build:
+      context: .
+      dockerfile: ./compose/local/django/Dockerfile.llama
+      args:
+        BUILD_ENVIRONMENT: local
+    image: markapi_local_django
+    container_name: markapi_local_django
+    depends_on:
+      - redis
+      - postgres
+      - mailhog
+    volumes:
+      - .:/app:z
+    env_file:
+      - ./.envs/.local/.django
+      - ./.envs/.local/.postgres
+    ports:
+      - "8009:8000"
+    command: /start
+
+  mailhog:
+    image: mailhog/mailhog:v1.0.0
+    container_name: markapi_local_mailhog
+    ports:
+      - "8029:8025"
+
+  postgres:
+    build:
+      context: .
+      dockerfile: ./compose/production/postgres/Dockerfile
+    image: markapi_local_postgres
+    container_name: markapi_local_postgres
+    volumes:
+      - ../scms_data/markapi/data_dev:/var/lib/postgresql/data:Z
+      - ../scms_data/markapi/data_dev_backup:/backups:z
+    ports:
+      - "5439:5432"
+    env_file:
+      - ./.envs/.local/.postgres
+
+  redis:
+    image: redis:6
+    container_name: markapi_local_redis
+    ports:
+      - "6399:6379"
+
+  celeryworker:
+    <<: *django
+    image: markapi_local_celeryworker
+    container_name: markapi_local_celeryworker
+    depends_on:
+      - redis
+      - postgres
+      - mailhog
+    ports: []
+    command: /start-celeryworker
+
+  celerybeat:
+    <<: *django
+    image: markapi_local_celerybeat
+    container_name: markapi_local_celerybeat
+    depends_on:
+      - redis
+      - postgres
+      - mailhog
+    ports: []
+    command: /start-celerybeat
+
+  flower:
+    <<: *django
+    image: markapi_local_flower
+    container_name: markapi_local_flower
+    ports:
+      - "5559:5555"
+    command: /start-flower

From ca0819ecb0e458cd7b2e40bfbb349c199930b058 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:10:11 -0300
Subject: [PATCH 16/27] Cria dockerfile com llama (prod e local)

---
 compose/local/django/Dockerfile.llama      | 113 +++++++++++++++++++++
 compose/production/django/Dockerfile.llama | 109 ++++++++++++++++++++
 2 files changed, 222 insertions(+)
 create mode 100755 compose/local/django/Dockerfile.llama
 create mode 100755 compose/production/django/Dockerfile.llama

diff --git a/compose/local/django/Dockerfile.llama b/compose/local/django/Dockerfile.llama
new file mode 100755
index 0000000..21808d9
--- /dev/null
+++ b/compose/local/django/Dockerfile.llama
@@ -0,0 +1,113 @@
+ARG PYTHON_VERSION=3.11-bullseye
+
+# define an alias for the specfic python version used in this file.
+FROM python:${PYTHON_VERSION} AS python
+
+# Python build stage
+FROM python AS python-build-stage
+
+ARG BUILD_ENVIRONMENT=local
+
+# Install apt packages
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # dependencies for building Python packages
+  build-essential \
+  git \
+  # psycopg2 dependencies
+  libpq-dev \
+  # other dependencies
+  software-properties-common \
+  libopenblas-dev \
+  libomp-dev
+
+# Instalar gcc-10 y g++-10 en Debian Bullseye
+RUN apt-get update && \
+    apt-get install -y gcc-10 g++-10 ninja-build cmake && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 50 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 50 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Instalar ninja-build y cmake
+RUN apt-get install -y ninja-build cmake
+
+# Configurar variables de entorno para compilar con BLAS y SIMD condicionalmente
+ARG ENABLE_OPTIMIZATIONS=true
+ENV CFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
+    CXXFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \
+    CMAKE_ARGS="${ENABLE_OPTIMIZATIONS:+-DGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS}"
+
+# Actualizar pip, setuptools y wheel antes de instalar dependencias
+RUN python -m pip install --upgrade pip setuptools wheel
+
+# Requirements are installed here to ensure they will be cached.
+COPY ./requirements .
+
+# Create application directory to hold DOCX layouts
+COPY ./docx_layouts .
+
+# Update pip
+RUN python -m pip install --upgrade pip
+
+# Create Python Dependency and Sub-Dependency Wheels.
+RUN pip wheel --wheel-dir /usr/src/app/wheels  \
+  -r ${BUILD_ENVIRONMENT}.txt \
+  -r extra-llama.txt
+
+# Python 'run' stage
+FROM python AS python-run-stage
+
+ARG BUILD_ENVIRONMENT=local
+ARG APP_HOME=/app
+
+ENV PYTHONUNBUFFERED 1
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV BUILD_ENV ${BUILD_ENVIRONMENT}
+
+WORKDIR ${APP_HOME}
+
+RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list
+
+# Install required system dependencies
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # psycopg2 dependencies
+  libpq-dev \
+  # Translations dependencies
+  gettext \
+  # libreoffice for document conversions
+  default-jre libreoffice libreoffice-java-common ttf-mscorefonts-installer fonts-liberation fonts-liberation2 fonts-crosextra-carlito fonts-crosextra-caladea fonts-dejavu fonts-noto \
+  # cleaning up unused files
+  && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
+  && rm -rf /var/lib/apt/lists/*
+
+# All absolute dir copies ignore workdir instruction. All relative dir copies are wrt to the workdir instruction
+# copy python dependency wheels from python-build-stage
+COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
+
+# Use wheels to install python dependencies
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl") \
+	&& rm -rf /wheels/
+
+COPY ./compose/production/django/entrypoint /entrypoint
+RUN sed -i 's/\r$//g' /entrypoint
+RUN chmod +x /entrypoint
+
+COPY ./compose/local/django/start /start
+RUN sed -i 's/\r$//g' /start
+RUN chmod +x /start
+
+COPY ./compose/local/django/celery/worker/start /start-celeryworker
+RUN sed -i 's/\r$//g' /start-celeryworker
+RUN chmod +x /start-celeryworker
+
+COPY ./compose/local/django/celery/beat/start /start-celerybeat
+RUN sed -i 's/\r$//g' /start-celerybeat
+RUN chmod +x /start-celerybeat
+
+COPY ./compose/local/django/celery/flower/start /start-flower
+RUN sed -i 's/\r$//g' /start-flower
+RUN chmod +x /start-flower
+
+# copy application code to WORKDIR
+COPY . ${APP_HOME}
+
+ENTRYPOINT ["/entrypoint"]
diff --git a/compose/production/django/Dockerfile.llama b/compose/production/django/Dockerfile.llama
new file mode 100755
index 0000000..8ad536d
--- /dev/null
+++ b/compose/production/django/Dockerfile.llama
@@ -0,0 +1,109 @@
+ARG PYTHON_VERSION=3.11-bullseye
+
+# define an alias for the specfic python version used in this file.
+FROM python:${PYTHON_VERSION} as python
+
+# Python build stage
+FROM python as python-build-stage
+
+ARG BUILD_ENVIRONMENT=production
+
+# Install apt packages
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # dependencies for building Python packages
+  git \
+  build-essential \
+  # psycopg2 dependencies
+  libpq-dev
+
+# Requirements are installed here to ensure they will be cached.
+COPY ./requirements .
+
+# Create application directory to hold DOCX layouts
+COPY ./docx_layouts .
+
+# Update pip
+RUN python -m pip install --upgrade pip
+
+# Create Python Dependency and Sub-Dependency Wheels.
+RUN pip wheel --wheel-dir /usr/src/app/wheels  \
+  -r ${BUILD_ENVIRONMENT}.txt
+
+# Python 'run' stage
+FROM python as python-run-stage
+
+ARG BUILD_ENVIRONMENT=production
+ARG APP_HOME=/app
+
+ENV PYTHONUNBUFFERED 1
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV BUILD_ENV ${BUILD_ENVIRONMENT}
+
+# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
+ARG DISABLE_AVX=true
+
+# Set the version of llama-cpp-python
+ARG LLAMA_VERSION=0.3.14
+
+WORKDIR ${APP_HOME}
+
+RUN addgroup --system django \
+    && adduser --system --ingroup django django
+
+RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list
+    
+# Install required system dependencies
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  # psycopg2 dependencies
+  libpq-dev \
+  # libreoffice for document conversions
+  default-jre libreoffice libreoffice-java-common ttf-mscorefonts-installer fonts-liberation fonts-liberation2 fonts-crosextra-carlito fonts-crosextra-caladea fonts-dejavu fonts-noto \
+  # Translations dependencies
+  gettext \
+  # cleaning up unused files
+  && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
+  && rm -rf /var/lib/apt/lists/*
+
+# All absolute dir copies ignore workdir instruction. All relative dir copies are wrt to the workdir instruction
+# copy python dependency wheels from python-build-stage
+COPY --from=python-build-stage /usr/src/app/wheels  /wheels/
+
+# use wheels to install python dependencies (excluding llama-cpp-python)
+RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") && rm -rf /wheels/
+
+# Install llama-cpp-python with specific CMAKE flags for Kubernetes nodes without AVX support
+RUN if [ "${DISABLE_AVX}" = "true" ]; then \
+  CMAKE_ARGS='-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON' pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
+    else \
+  pip install llama-cpp-python==${LLAMA_VERSION} --force-reinstall --no-cache-dir; \
+    fi
+
+COPY --chown=django:django ./compose/production/django/entrypoint /entrypoint
+RUN sed -i 's/\r$//g' /entrypoint
+RUN chmod +x /entrypoint
+
+COPY --chown=django:django ./compose/production/django/start /start
+RUN sed -i 's/\r$//g' /start
+RUN chmod +x /start
+
+COPY --chown=django:django ./compose/production/django/celery/worker/start /start-celeryworker
+RUN sed -i 's/\r$//g' /start-celeryworker
+RUN chmod +x /start-celeryworker
+
+COPY --chown=django:django ./compose/production/django/celery/beat/start /start-celerybeat
+RUN sed -i 's/\r$//g' /start-celerybeat
+RUN chmod +x /start-celerybeat
+
+COPY ./compose/production/django/celery/flower/start /start-flower
+RUN sed -i 's/\r$//g' /start-flower
+RUN chmod +x /start-flower
+
+# copy application code to WORKDIR
+COPY --chown=django:django . ${APP_HOME}
+
+# make django owner of the WORKDIR directory as well.
+RUN chown django:django ${APP_HOME}
+
+USER django
+
+ENTRYPOINT ["/entrypoint"]

From 3302460a4b819368ffc4fc2fc44ed4a353593b93 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:11:06 -0300
Subject: [PATCH 17/27] =?UTF-8?q?Cria=20um=20comando=20em=20make=20para=20?=
 =?UTF-8?q?construir=20imagem=20com=20llama=20(build=5Fllama)=20-=20O=20ob?=
 =?UTF-8?q?jetivo=20=C3=A9=20facilitar=20o=20deploy=20local=20-=20Basta=20?=
 =?UTF-8?q?fazer=20make=20build=5Fllama=20(em=20lugar=20de=20make=20build)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index 79cd5da..ce98fe5 100755
--- a/Makefile
+++ b/Makefile
@@ -38,6 +38,9 @@ build_date: ## Show build date
 build:  ## Build app using $(compose)
 	@docker compose -f $(compose) build
 
+build_llama:  ## Build app using $(compose) with llama enabled
+	@docker compose -f llama.$(compose) build
+
 build_no_cache:  ## Build app using $(compose)
 	@docker compose -f $(compose) build --no-cache
 

From 7e3a713f38c1b2aff3ef6b95fccff42761e1b7cb Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:11:44 -0300
Subject: [PATCH 18/27] =?UTF-8?q?Ajusta=20import=20em=20m=C3=B3dulo=20que?=
 =?UTF-8?q?=20cria=20GenericLlama=20-=20Atrasamos=20o=20import=20da=20lib?=
 =?UTF-8?q?=20llama=5Fcpp=20para=20melhor=20lidar=20com=20exce=C3=A7=C3=B5?=
 =?UTF-8?q?es?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llama3/generic_llama.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 5cc9d41..b8341dc 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -1,6 +1,5 @@
+from config.settings.base import LLAMA_ENABLED, LLAMA_MODEL_DIR, MODEL_LLAMA
 
-from config.settings.base import LLAMA_MODEL_DIR, MODEL_LLAMA
-from llama_cpp import Llama
 import os
 
 class GenericLlama:

From 524d97ea9ee6fce2713f29352ce295725ad786c5 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:12:12 -0300
Subject: [PATCH 19/27] =?UTF-8?q?Cria=20exce=C3=A7=C3=B5es=20para=20melhor?=
 =?UTF-8?q?=20lidar=20com=20os=20erros=20de=20instancia=C3=A7=C3=A3o=20do?=
 =?UTF-8?q?=20Llama?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llama3/generic_llama.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index b8341dc..1b045e2 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -2,6 +2,16 @@
 
 import os
 
+
+class LlamaDisabledError(Exception):
+    pass
+
+class LlamaModelNotFoundError(FileNotFoundError):
+    pass
+
+class LlamaNotInstalledError(ImportError):
+    pass
+
 class GenericLlama:
 
   def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5, top_p=0.5): 

From b51c734f786b7e12ed7212412f32e9b1cb91654e Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:13:06 -0300
Subject: [PATCH 20/27] =?UTF-8?q?Impede=20cria=C3=A7=C3=A3o=20do=20Generic?=
 =?UTF-8?q?Llama=20-=20Faz=20raise=20que=20deve=20ser=20capturado=20em=20t?=
 =?UTF-8?q?odo=20mundo=20que=20usar=20GenericLlama?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llama3/generic_llama.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 1b045e2..325f93c 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -13,15 +13,19 @@ class LlamaNotInstalledError(ImportError):
     pass
 
 class GenericLlama:
+  # Singleton pattern to cache the LLaMA model instance
+  _cached_llm = None
 
-  def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5, top_p=0.5): 
-    self.llm = Llama(model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA), n_ctx=4000)
+  def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5, top_p=0.5):
     self.messages = messages
     self.response_format = response_format
     self.max_tokens = max_tokens
     self.temperature = temperature
     self.top_p = top_p
 
+    if not LLAMA_ENABLED:
+      raise LlamaDisabledError("LLaMA is disabled in settings.")
+    
   def run(self, user_input):
     input = self.messages.copy()
     input.append({

From beb0036df4ee79b16c30331eec24ecb0c55a98ce Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:13:27 -0300
Subject: [PATCH 21/27] =?UTF-8?q?Caso=20o=20singleton=20n=C3=A3o=20esteja?=
 =?UTF-8?q?=20instanciado,=20faz=20import=20do=20llama?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llama3/generic_llama.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 325f93c..870013e 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -26,6 +26,11 @@ def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5,
     if not LLAMA_ENABLED:
       raise LlamaDisabledError("LLaMA is disabled in settings.")
     
+    if GenericLlama._cached_llm is None:
+      try:
+         from llama_cpp import Llama
+      except ImportError as e:
+         raise LlamaNotInstalledError("The 'llama-cpp-python' package is not installed. Please use the llama-activated Docker image (Dockerfile.llama).") from e
   def run(self, user_input):
     input = self.messages.copy()
     input.append({

From 1509008c2c3bcbe8731a7c37c66df83c1cddf4f3 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:13:38 -0300
Subject: [PATCH 22/27] Verifica se o caminho do modelo existe

---
 llama3/generic_llama.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 870013e..088889d 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -31,6 +31,10 @@ def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5,
          from llama_cpp import Llama
       except ImportError as e:
          raise LlamaNotInstalledError("The 'llama-cpp-python' package is not installed. Please use the llama-activated Docker image (Dockerfile.llama).") from e
+
+      model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA)
+      if not os.path.isfile(model_path):
+        raise LlamaModelNotFoundError(f"LLaMA model file not found at {model_path}. Please ensure the model is downloaded and the path is correct.")
   def run(self, user_input):
     input = self.messages.copy()
     input.append({

From c9e268133fca8c727f52f826f18a2e23f1d2e773 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:13:53 -0300
Subject: [PATCH 23/27] =?UTF-8?q?Cria=20uma=20inst=C3=A2ncia=20do=20LLama?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llama3/generic_llama.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 088889d..57f55bf 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -35,6 +35,11 @@ def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5,
       model_path = os.path.join(LLAMA_MODEL_DIR, MODEL_LLAMA)
       if not os.path.isfile(model_path):
         raise LlamaModelNotFoundError(f"LLaMA model file not found at {model_path}. Please ensure the model is downloaded and the path is correct.")
+
+      try:
+        GenericLlama._cached_llm = Llama(model_path=model_path, n_ctx=max_tokens)
+      except Exception as e:
+        raise RuntimeError(f"Failed to initialize LLaMA model: {e}") from e
   def run(self, user_input):
     input = self.messages.copy()
     input.append({

From 2a57bed8af4b18f498aa93f03737c5d6c4bae4c4 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:14:08 -0300
Subject: [PATCH 24/27] E povoa o cache (singleton pattern)

---
 llama3/generic_llama.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index 57f55bf..b9c569d 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -40,6 +40,9 @@ def __init__(self, messages, response_format, max_tokens=4000, temperature=0.5,
         GenericLlama._cached_llm = Llama(model_path=model_path, n_ctx=max_tokens)
       except Exception as e:
         raise RuntimeError(f"Failed to initialize LLaMA model: {e}") from e
+      
+    self.llm = GenericLlama._cached_llm
+
   def run(self, user_input):
     input = self.messages.copy()
     input.append({

From 975045b17160affb53bb5bc3396d6f3101fbd6ff Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:14:22 -0300
Subject: [PATCH 25/27] Ajuste return para melhor legibilidade

---
 llama3/generic_llama.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llama3/generic_llama.py b/llama3/generic_llama.py
index b9c569d..e25d4f9 100644
--- a/llama3/generic_llama.py
+++ b/llama3/generic_llama.py
@@ -49,4 +49,10 @@ def run(self, user_input):
       'role': 'user',
       'content': user_input
     })
-    return self.llm.create_chat_completion(messages=input, response_format=self.response_format, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
\ No newline at end of file
+    return self.llm.create_chat_completion(
+       messages=input, 
+       response_format=self.response_format, 
+       max_tokens=self.max_tokens, 
+       temperature=self.temperature, 
+       top_p=self.top_p
+    )

From 77d3aa3ba3da113522c25f277332c495a874dc83 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:15:18 -0300
Subject: [PATCH 26/27] =?UTF-8?q?Ajuste=20o=20marker=20para=20melhor=20lid?=
 =?UTF-8?q?ar=20com=20as=20altera=C3=A7=C3=B5es=20no=20GenericLlama=20-=20?=
 =?UTF-8?q?Lidar=20com=20raises=20provocados=20no=20GenericLlama=20-=20Cap?=
 =?UTF-8?q?tura=20e=20registra=20exce=C3=A7=C3=B5es=20com=20o=20Tracker=20?=
 =?UTF-8?q?-=20Retorna=20string=20indicando=20que=20marca=C3=A7=C3=A3o=20f?=
 =?UTF-8?q?alhou,=20se=20for=20o=20caso?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 reference/marker.py | 44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/reference/marker.py b/reference/marker.py
index 63582e9..dda75ec 100644
--- a/reference/marker.py
+++ b/reference/marker.py
@@ -1,14 +1,46 @@
-from llama3.generic_llama import GenericLlama
+import logging
 
+from llama3.generic_llama import (
+    GenericLlama,
+    LlamaDisabledError,
+    LlamaNotInstalledError,
+    LlamaModelNotFoundError,
+)
 from reference.config import MESSAGES, RESPONSE_FORMAT
+from tracker.models import GeneralEvent
 
 
 def mark_reference(reference_text):
-    reference_marker = GenericLlama(MESSAGES, RESPONSE_FORMAT)
-    output = reference_marker.run(reference_text)
-    # output['choices'][0]['message']['content']
-    for item in output["choices"]:
-        yield item["message"]["content"]
+    try:
+        reference_marker = GenericLlama(MESSAGES, RESPONSE_FORMAT)
+        output = reference_marker.run(reference_text)
+        for item in output.get("choices", []):
+            yield item.get("message", {}).get("content", "")
+
+    except (LlamaDisabledError, LlamaNotInstalledError, LlamaModelNotFoundError) as e:
+        logging.error(f"Error marking reference: {e}")
+        GeneralEvent.create(
+            exception=e,
+            exc_traceback=None,
+            item=None,
+            action="mark_reference",
+            detail={"reference_text": reference_text}
+        )
+        if isinstance(e, LlamaModelNotFoundError):
+            yield f"Llama model file not found: {str(e)}"
+        else:
+            yield f"Llama model is not available: {str(e)}"
+
+    except Exception as e:
+        logging.error(f"Unexpected error marking reference: {e}")
+        GeneralEvent.create(
+            exception=e,
+            exc_traceback=None,
+            item=None,
+            action="mark_reference",
+            detail={"reference_text": reference_text}
+        )
+        yield f"An unexpected error occurred: {str(e)}"
 
 
 def mark_references(reference_block):

From cb2021a4f1cd448495e807adb64ac75cc52e2f38 Mon Sep 17 00:00:00 2001
From: Pitanga Innovare <rafael@pitangainnovare.com.br>
Date: Fri, 3 Oct 2025 17:36:31 -0300
Subject: [PATCH 27/27] =?UTF-8?q?Remove=20instala=C3=A7=C3=A3o=20duplicada?=
 =?UTF-8?q?=20de=20deps?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 compose/local/django/Dockerfile       | 3 ---
 compose/local/django/Dockerfile.llama | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile
index 3947566..4fe37f1 100755
--- a/compose/local/django/Dockerfile
+++ b/compose/local/django/Dockerfile
@@ -27,9 +27,6 @@ RUN apt-get update && \
     update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 50 && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 
-# Instalar ninja-build y cmake
-RUN apt-get install -y ninja-build cmake
-
 # Actualizar pip, setuptools y wheel antes de instalar dependencias
 RUN python -m pip install --upgrade pip setuptools wheel
 
diff --git a/compose/local/django/Dockerfile.llama b/compose/local/django/Dockerfile.llama
index 21808d9..7719a34 100755
--- a/compose/local/django/Dockerfile.llama
+++ b/compose/local/django/Dockerfile.llama
@@ -27,9 +27,6 @@ RUN apt-get update && \
     update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 50 && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 
-# Instalar ninja-build y cmake
-RUN apt-get install -y ninja-build cmake
-
 # Configurar variables de entorno para compilar con BLAS y SIMD condicionalmente
 ARG ENABLE_OPTIMIZATIONS=true
 ENV CFLAGS="${ENABLE_OPTIMIZATIONS:+-mfma -mavx2}" \