diff --git a/backend/account_v2/serializer.py b/backend/account_v2/serializer.py
index 4011cf4d5a..e189b4efe1 100644
--- a/backend/account_v2/serializer.py
+++ b/backend/account_v2/serializer.py
@@ -1,6 +1,7 @@
import re
from rest_framework import serializers
+from utils.input_sanitizer import validate_name_field
from account_v2.models import Organization, User
@@ -10,6 +11,12 @@ class OrganizationSignupSerializer(serializers.Serializer):
display_name = serializers.CharField(required=True, max_length=150)
organization_id = serializers.CharField(required=True, max_length=30)
+ def validate_name(self, value: str) -> str:
+ return validate_name_field(value, field_name="Organization name")
+
+ def validate_display_name(self, value: str) -> str:
+ return validate_name_field(value, field_name="Display name")
+
def validate_organization_id(self, value): # type: ignore
if not re.match(r"^[a-z0-9_-]+$", value):
raise serializers.ValidationError(
diff --git a/backend/adapter_processor_v2/serializers.py b/backend/adapter_processor_v2/serializers.py
index 1d931f1266..3550c4003f 100644
--- a/backend/adapter_processor_v2/serializers.py
+++ b/backend/adapter_processor_v2/serializers.py
@@ -6,6 +6,7 @@
from django.conf import settings
from rest_framework import serializers
from rest_framework.serializers import ModelSerializer
+from utils.input_sanitizer import validate_name_field, validate_no_html_tags
from adapter_processor_v2.adapter_processor import AdapterProcessor
from adapter_processor_v2.constants import AdapterKeys
@@ -28,6 +29,20 @@ class Meta:
model = AdapterInstance
fields = "__all__"
+ def validate(self, data):
+ data = super().validate(data)
+ adapter_name = data.get("adapter_name")
+ if adapter_name is not None:
+ data["adapter_name"] = validate_name_field(
+ adapter_name, field_name="Adapter name"
+ )
+ description = data.get("description")
+ if description is not None:
+ data["description"] = validate_no_html_tags(
+ description, field_name="Description"
+ )
+ return data
+
class DefaultAdapterSerializer(serializers.Serializer):
llm_default = serializers.CharField(max_length=FLC.UUID_LENGTH, required=False)
diff --git a/backend/api_v2/serializers.py b/backend/api_v2/serializers.py
index 5a2068fd93..4f81ef7c49 100644
--- a/backend/api_v2/serializers.py
+++ b/backend/api_v2/serializers.py
@@ -22,6 +22,7 @@
ValidationError,
)
from tags.serializers import TagParamsSerializer
+from utils.input_sanitizer import validate_name_field, validate_no_html_tags
from utils.serializer.integrity_error_mixin import IntegrityErrorMixin
from workflow_manager.endpoint_v2.models import WorkflowEndpoint
from workflow_manager.workflow_v2.exceptions import ExecutionDoesNotExistError
@@ -62,6 +63,14 @@ def validate_api_name(self, value: str) -> str:
api_name_validator(value)
return value
+ def validate_display_name(self, value: str) -> str:
+ return validate_name_field(value, field_name="Display name")
+
+ def validate_description(self, value: str) -> str:
+ if value is None:
+ return value
+ return validate_no_html_tags(value, field_name="Description")
+
def validate_workflow(self, workflow):
"""Validate that the workflow has properly configured source and destination endpoints."""
# Get all endpoints for this workflow with related data
diff --git a/backend/backend/settings/base.py b/backend/backend/settings/base.py
index 97e5be789d..64c393d735 100644
--- a/backend/backend/settings/base.py
+++ b/backend/backend/settings/base.py
@@ -418,6 +418,7 @@ def filter(self, record):
"social_django.middleware.SocialAuthExceptionMiddleware",
"middleware.remove_allow_header.RemoveAllowHeaderMiddleware",
"middleware.cache_control.CacheControlMiddleware",
+ "middleware.content_security_policy.ContentSecurityPolicyMiddleware",
]
TENANT_SUBFOLDER_PREFIX = f"{PATH_PREFIX}/unstract"
diff --git a/backend/connector_v2/serializers.py b/backend/connector_v2/serializers.py
index 45d5c07562..5517bc5257 100644
--- a/backend/connector_v2/serializers.py
+++ b/backend/connector_v2/serializers.py
@@ -9,6 +9,7 @@
from connector_processor.exceptions import OAuthTimeOut
from rest_framework.serializers import CharField, SerializerMethodField
from utils.fields import EncryptedBinaryFieldSerializer
+from utils.input_sanitizer import validate_name_field
from backend.serializers import AuditSerializer
from connector_v2.constants import ConnectorInstanceKey as CIKey
@@ -28,6 +29,9 @@ class Meta:
model = ConnectorInstance
fields = "__all__"
+ def validate_connector_name(self, value: str) -> str:
+ return validate_name_field(value, field_name="Connector name")
+
def save(self, **kwargs): # type: ignore
user = self.context.get("request").user or None
connector_id: str = kwargs[CIKey.CONNECTOR_ID]
diff --git a/backend/middleware/content_security_policy.py b/backend/middleware/content_security_policy.py
new file mode 100644
index 0000000000..caa142ba43
--- /dev/null
+++ b/backend/middleware/content_security_policy.py
@@ -0,0 +1,31 @@
+from django.http import HttpRequest, HttpResponse
+from django.utils.deprecation import MiddlewareMixin
+
+
+class ContentSecurityPolicyMiddleware(MiddlewareMixin):
+ """Middleware to add Content-Security-Policy header to all responses.
+
+ Since this is a JSON API backend, the policy is restrictive by default:
+ only 'self' is allowed for all directives, and no inline scripts or styles
+ are permitted. This prevents any injected content from being executed if a
+ response is ever rendered in a browser context.
+ """
+
+ def process_response(
+ self, request: HttpRequest, response: HttpResponse
+ ) -> HttpResponse:
+ response.setdefault(
+ "Content-Security-Policy",
+ (
+ "default-src 'self'; "
+ "script-src 'self'; "
+ "style-src 'self'; "
+ "img-src 'self'; "
+ "font-src 'self'; "
+ "connect-src 'self'; "
+ "frame-ancestors 'none'; "
+ "base-uri 'self'; "
+ "form-action 'self'"
+ ),
+ )
+ return response
diff --git a/backend/notification_v2/serializers.py b/backend/notification_v2/serializers.py
index cd82fed8a6..115487c481 100644
--- a/backend/notification_v2/serializers.py
+++ b/backend/notification_v2/serializers.py
@@ -1,4 +1,5 @@
from rest_framework import serializers
+from utils.input_sanitizer import validate_name_field
from .enums import AuthorizationType, NotificationType, PlatformType
from .models import Notification
@@ -109,6 +110,8 @@ def validate_name(self, value):
"""Check uniqueness of the name with respect to either 'api' or
'pipeline'.
"""
+ value = validate_name_field(value, field_name="Notification name")
+
api = self.initial_data.get("api", getattr(self.instance, "api", None))
pipeline = self.initial_data.get(
"pipeline", getattr(self.instance, "pipeline", None)
diff --git a/backend/prompt_studio/prompt_studio_core_v2/serializers.py b/backend/prompt_studio/prompt_studio_core_v2/serializers.py
index 9a90fa3583..fd13e9b289 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/serializers.py
+++ b/backend/prompt_studio/prompt_studio_core_v2/serializers.py
@@ -8,6 +8,7 @@
from rest_framework import serializers
from rest_framework.exceptions import ValidationError
from utils.FileValidator import FileValidator
+from utils.input_sanitizer import validate_name_field, validate_no_html_tags
from utils.serializer.integrity_error_mixin import IntegrityErrorMixin
from backend.serializers import AuditSerializer
@@ -51,6 +52,12 @@ class Meta:
}
}
+ def validate_tool_name(self, value: str) -> str:
+ return validate_name_field(value, field_name="Tool name")
+
+ def validate_description(self, value: str) -> str:
+ return validate_no_html_tags(value, field_name="Description")
+
def validate_summarize_llm_adapter(self, value):
"""Validate that the adapter type is LLM and is accessible to the user."""
if value is None:
diff --git a/backend/utils/input_sanitizer.py b/backend/utils/input_sanitizer.py
new file mode 100644
index 0000000000..bb71a2559f
--- /dev/null
+++ b/backend/utils/input_sanitizer.py
@@ -0,0 +1,29 @@
+import re
+
+from rest_framework.serializers import ValidationError
+
+# Pattern to detect HTML/script tags
+HTML_TAG_PATTERN = re.compile(r"<[^>]*>")
+# Pattern to detect javascript: protocol
+JS_PROTOCOL_PATTERN = re.compile(r"javascript\s*:", re.IGNORECASE)
+# Pattern to detect event handlers (onclick, onerror, etc.)
+EVENT_HANDLER_PATTERN = re.compile(r"(?:^|\s)on\w+\s*=", re.IGNORECASE)
+
+
+def validate_no_html_tags(value: str, field_name: str = "This field") -> str:
+ """Reject values containing HTML/script tags."""
+ if HTML_TAG_PATTERN.search(value):
+ raise ValidationError(f"{field_name} must not contain HTML or script tags.")
+ if JS_PROTOCOL_PATTERN.search(value):
+ raise ValidationError(f"{field_name} must not contain JavaScript protocols.")
+ if EVENT_HANDLER_PATTERN.search(value):
+ raise ValidationError(f"{field_name} must not contain event handler attributes.")
+ return value
+
+
+def validate_name_field(value: str, field_name: str = "This field") -> str:
+ """Validate name/identifier fields - no HTML tags, strip whitespace."""
+ value = value.strip()
+ if not value:
+ raise ValidationError(f"{field_name} must not be empty.")
+ return validate_no_html_tags(value, field_name)
diff --git a/backend/utils/tests/__init__.py b/backend/utils/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/backend/utils/tests/test_input_sanitizer.py b/backend/utils/tests/test_input_sanitizer.py
new file mode 100644
index 0000000000..08353af658
--- /dev/null
+++ b/backend/utils/tests/test_input_sanitizer.py
@@ -0,0 +1,97 @@
+import pytest
+from rest_framework.serializers import ValidationError
+
+from utils.input_sanitizer import validate_name_field, validate_no_html_tags
+
+
+class TestValidateNoHtmlTags:
+ def test_clean_input_passes(self):
+ assert validate_no_html_tags("Hello World") == "Hello World"
+
+ def test_allows_normal_special_chars(self):
+ assert (
+ validate_no_html_tags("My workflow (v2), test - final")
+ == "My workflow (v2), test - final"
+ )
+
+ def test_allows_numbers_and_punctuation(self):
+ assert validate_no_html_tags("Test 123 & more!") == "Test 123 & more!"
+
+ def test_rejects_script_tag(self):
+ with pytest.raises(ValidationError, match="must not contain HTML or script tags"):
+ validate_no_html_tags("")
+
+ def test_rejects_img_tag(self):
+ with pytest.raises(ValidationError, match="must not contain HTML or script tags"):
+ validate_no_html_tags('')
+
+ def test_rejects_div_tag(self):
+ with pytest.raises(ValidationError, match="must not contain HTML or script tags"):
+ validate_no_html_tags("