diff --git a/backend/account_v2/serializer.py b/backend/account_v2/serializer.py index 4011cf4d5a..e189b4efe1 100644 --- a/backend/account_v2/serializer.py +++ b/backend/account_v2/serializer.py @@ -1,6 +1,7 @@ import re from rest_framework import serializers +from utils.input_sanitizer import validate_name_field from account_v2.models import Organization, User @@ -10,6 +11,12 @@ class OrganizationSignupSerializer(serializers.Serializer): display_name = serializers.CharField(required=True, max_length=150) organization_id = serializers.CharField(required=True, max_length=30) + def validate_name(self, value: str) -> str: + return validate_name_field(value, field_name="Organization name") + + def validate_display_name(self, value: str) -> str: + return validate_name_field(value, field_name="Display name") + def validate_organization_id(self, value): # type: ignore if not re.match(r"^[a-z0-9_-]+$", value): raise serializers.ValidationError( diff --git a/backend/adapter_processor_v2/serializers.py b/backend/adapter_processor_v2/serializers.py index 1d931f1266..3550c4003f 100644 --- a/backend/adapter_processor_v2/serializers.py +++ b/backend/adapter_processor_v2/serializers.py @@ -6,6 +6,7 @@ from django.conf import settings from rest_framework import serializers from rest_framework.serializers import ModelSerializer +from utils.input_sanitizer import validate_name_field, validate_no_html_tags from adapter_processor_v2.adapter_processor import AdapterProcessor from adapter_processor_v2.constants import AdapterKeys @@ -28,6 +29,20 @@ class Meta: model = AdapterInstance fields = "__all__" + def validate(self, data): + data = super().validate(data) + adapter_name = data.get("adapter_name") + if adapter_name is not None: + data["adapter_name"] = validate_name_field( + adapter_name, field_name="Adapter name" + ) + description = data.get("description") + if description is not None: + data["description"] = validate_no_html_tags( + description, field_name="Description" + ) + return data + class DefaultAdapterSerializer(serializers.Serializer): llm_default = serializers.CharField(max_length=FLC.UUID_LENGTH, required=False) diff --git a/backend/api_v2/serializers.py b/backend/api_v2/serializers.py index 5a2068fd93..4f81ef7c49 100644 --- a/backend/api_v2/serializers.py +++ b/backend/api_v2/serializers.py @@ -22,6 +22,7 @@ ValidationError, ) from tags.serializers import TagParamsSerializer +from utils.input_sanitizer import validate_name_field, validate_no_html_tags from utils.serializer.integrity_error_mixin import IntegrityErrorMixin from workflow_manager.endpoint_v2.models import WorkflowEndpoint from workflow_manager.workflow_v2.exceptions import ExecutionDoesNotExistError @@ -62,6 +63,14 @@ def validate_api_name(self, value: str) -> str: api_name_validator(value) return value + def validate_display_name(self, value: str) -> str: + return validate_name_field(value, field_name="Display name") + + def validate_description(self, value: str) -> str: + if value is None: + return value + return validate_no_html_tags(value, field_name="Description") + def validate_workflow(self, workflow): """Validate that the workflow has properly configured source and destination endpoints.""" # Get all endpoints for this workflow with related data diff --git a/backend/backend/settings/base.py b/backend/backend/settings/base.py index 97e5be789d..64c393d735 100644 --- a/backend/backend/settings/base.py +++ b/backend/backend/settings/base.py @@ -418,6 +418,7 @@ def filter(self, record): "social_django.middleware.SocialAuthExceptionMiddleware", "middleware.remove_allow_header.RemoveAllowHeaderMiddleware", "middleware.cache_control.CacheControlMiddleware", + "middleware.content_security_policy.ContentSecurityPolicyMiddleware", ] TENANT_SUBFOLDER_PREFIX = f"{PATH_PREFIX}/unstract" diff --git a/backend/connector_v2/serializers.py b/backend/connector_v2/serializers.py index 45d5c07562..5517bc5257 100644 --- a/backend/connector_v2/serializers.py +++ b/backend/connector_v2/serializers.py @@ -9,6 +9,7 @@ from connector_processor.exceptions import OAuthTimeOut from rest_framework.serializers import CharField, SerializerMethodField from utils.fields import EncryptedBinaryFieldSerializer +from utils.input_sanitizer import validate_name_field from backend.serializers import AuditSerializer from connector_v2.constants import ConnectorInstanceKey as CIKey @@ -28,6 +29,9 @@ class Meta: model = ConnectorInstance fields = "__all__" + def validate_connector_name(self, value: str) -> str: + return validate_name_field(value, field_name="Connector name") + def save(self, **kwargs): # type: ignore user = self.context.get("request").user or None connector_id: str = kwargs[CIKey.CONNECTOR_ID] diff --git a/backend/middleware/content_security_policy.py b/backend/middleware/content_security_policy.py new file mode 100644 index 0000000000..caa142ba43 --- /dev/null +++ b/backend/middleware/content_security_policy.py @@ -0,0 +1,31 @@ +from django.http import HttpRequest, HttpResponse +from django.utils.deprecation import MiddlewareMixin + + +class ContentSecurityPolicyMiddleware(MiddlewareMixin): + """Middleware to add Content-Security-Policy header to all responses. + + Since this is a JSON API backend, the policy is restrictive by default: + only 'self' is allowed for all directives, and no inline scripts or styles + are permitted. This prevents any injected content from being executed if a + response is ever rendered in a browser context. + """ + + def process_response( + self, request: HttpRequest, response: HttpResponse + ) -> HttpResponse: + response.setdefault( + "Content-Security-Policy", + ( + "default-src 'self'; " + "script-src 'self'; " + "style-src 'self'; " + "img-src 'self'; " + "font-src 'self'; " + "connect-src 'self'; " + "frame-ancestors 'none'; " + "base-uri 'self'; " + "form-action 'self'" + ), + ) + return response diff --git a/backend/notification_v2/serializers.py b/backend/notification_v2/serializers.py index cd82fed8a6..115487c481 100644 --- a/backend/notification_v2/serializers.py +++ b/backend/notification_v2/serializers.py @@ -1,4 +1,5 @@ from rest_framework import serializers +from utils.input_sanitizer import validate_name_field from .enums import AuthorizationType, NotificationType, PlatformType from .models import Notification @@ -109,6 +110,8 @@ def validate_name(self, value): """Check uniqueness of the name with respect to either 'api' or 'pipeline'. """ + value = validate_name_field(value, field_name="Notification name") + api = self.initial_data.get("api", getattr(self.instance, "api", None)) pipeline = self.initial_data.get( "pipeline", getattr(self.instance, "pipeline", None) diff --git a/backend/prompt_studio/prompt_studio_core_v2/serializers.py b/backend/prompt_studio/prompt_studio_core_v2/serializers.py index 9a90fa3583..fd13e9b289 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/serializers.py +++ b/backend/prompt_studio/prompt_studio_core_v2/serializers.py @@ -8,6 +8,7 @@ from rest_framework import serializers from rest_framework.exceptions import ValidationError from utils.FileValidator import FileValidator +from utils.input_sanitizer import validate_name_field, validate_no_html_tags from utils.serializer.integrity_error_mixin import IntegrityErrorMixin from backend.serializers import AuditSerializer @@ -51,6 +52,12 @@ class Meta: } } + def validate_tool_name(self, value: str) -> str: + return validate_name_field(value, field_name="Tool name") + + def validate_description(self, value: str) -> str: + return validate_no_html_tags(value, field_name="Description") + def validate_summarize_llm_adapter(self, value): """Validate that the adapter type is LLM and is accessible to the user.""" if value is None: diff --git a/backend/utils/input_sanitizer.py b/backend/utils/input_sanitizer.py new file mode 100644 index 0000000000..bb71a2559f --- /dev/null +++ b/backend/utils/input_sanitizer.py @@ -0,0 +1,29 @@ +import re + +from rest_framework.serializers import ValidationError + +# Pattern to detect HTML/script tags +HTML_TAG_PATTERN = re.compile(r"<[^>]*>") +# Pattern to detect javascript: protocol +JS_PROTOCOL_PATTERN = re.compile(r"javascript\s*:", re.IGNORECASE) +# Pattern to detect event handlers (onclick, onerror, etc.) +EVENT_HANDLER_PATTERN = re.compile(r"(?:^|\s)on\w+\s*=", re.IGNORECASE) + + +def validate_no_html_tags(value: str, field_name: str = "This field") -> str: + """Reject values containing HTML/script tags.""" + if HTML_TAG_PATTERN.search(value): + raise ValidationError(f"{field_name} must not contain HTML or script tags.") + if JS_PROTOCOL_PATTERN.search(value): + raise ValidationError(f"{field_name} must not contain JavaScript protocols.") + if EVENT_HANDLER_PATTERN.search(value): + raise ValidationError(f"{field_name} must not contain event handler attributes.") + return value + + +def validate_name_field(value: str, field_name: str = "This field") -> str: + """Validate name/identifier fields - no HTML tags, strip whitespace.""" + value = value.strip() + if not value: + raise ValidationError(f"{field_name} must not be empty.") + return validate_no_html_tags(value, field_name) diff --git a/backend/utils/tests/__init__.py b/backend/utils/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/utils/tests/test_input_sanitizer.py b/backend/utils/tests/test_input_sanitizer.py new file mode 100644 index 0000000000..08353af658 --- /dev/null +++ b/backend/utils/tests/test_input_sanitizer.py @@ -0,0 +1,97 @@ +import pytest +from rest_framework.serializers import ValidationError + +from utils.input_sanitizer import validate_name_field, validate_no_html_tags + + +class TestValidateNoHtmlTags: + def test_clean_input_passes(self): + assert validate_no_html_tags("Hello World") == "Hello World" + + def test_allows_normal_special_chars(self): + assert ( + validate_no_html_tags("My workflow (v2), test - final") + == "My workflow (v2), test - final" + ) + + def test_allows_numbers_and_punctuation(self): + assert validate_no_html_tags("Test 123 & more!") == "Test 123 & more!" + + def test_rejects_script_tag(self): + with pytest.raises(ValidationError, match="must not contain HTML or script tags"): + validate_no_html_tags("") + + def test_rejects_img_tag(self): + with pytest.raises(ValidationError, match="must not contain HTML or script tags"): + validate_no_html_tags('') + + def test_rejects_div_tag(self): + with pytest.raises(ValidationError, match="must not contain HTML or script tags"): + validate_no_html_tags("
content
") + + def test_rejects_self_closing_tag(self): + with pytest.raises(ValidationError, match="must not contain HTML or script tags"): + validate_no_html_tags("
") + + def test_rejects_javascript_protocol(self): + with pytest.raises(ValidationError, match="must not contain JavaScript protocols"): + validate_no_html_tags("javascript:alert(1)") + + def test_rejects_javascript_protocol_with_spaces(self): + with pytest.raises(ValidationError, match="must not contain JavaScript protocols"): + validate_no_html_tags("javascript :alert(1)") + + def test_rejects_javascript_protocol_case_insensitive(self): + with pytest.raises(ValidationError, match="must not contain JavaScript protocols"): + validate_no_html_tags("JAVASCRIPT:alert(1)") + + def test_rejects_event_handler(self): + with pytest.raises( + ValidationError, match="must not contain event handler attributes" + ): + validate_no_html_tags("onclick=alert(1)") + + def test_rejects_event_handler_with_spaces(self): + with pytest.raises( + ValidationError, match="must not contain event handler attributes" + ): + validate_no_html_tags("onerror =alert(1)") + + def test_rejects_event_handler_case_insensitive(self): + with pytest.raises( + ValidationError, match="must not contain event handler attributes" + ): + validate_no_html_tags("ONLOAD=alert(1)") + + def test_custom_field_name_in_error(self): + with pytest.raises(ValidationError, match="Workflow name"): + validate_no_html_tags("") + + def test_allows_hyphens_and_underscores(self): + assert validate_name_field("my-workflow_v2") == "my-workflow_v2" + + def test_allows_periods(self): + assert validate_name_field("config.v2") == "config.v2" + + def test_allows_parentheses_and_commas(self): + assert validate_name_field("Test (v2), final") == "Test (v2), final" + + def test_custom_field_name_in_error(self): + with pytest.raises(ValidationError, match="Tool name"): + validate_name_field(" ", field_name="Tool name") diff --git a/backend/workflow_manager/workflow_v2/serializers.py b/backend/workflow_manager/workflow_v2/serializers.py index 6442d1e1fa..108aadd13f 100644 --- a/backend/workflow_manager/workflow_v2/serializers.py +++ b/backend/workflow_manager/workflow_v2/serializers.py @@ -14,6 +14,7 @@ ) from tool_instance_v2.serializers import ToolInstanceSerializer from tool_instance_v2.tool_instance_helper import ToolInstanceHelper +from utils.input_sanitizer import validate_name_field, validate_no_html_tags from utils.serializer.integrity_error_mixin import IntegrityErrorMixin from backend.constants import RequestKey @@ -46,6 +47,12 @@ class Meta: } } + def validate_workflow_name(self, value: str) -> str: + return validate_name_field(value, field_name="Workflow name") + + def validate_description(self, value: str) -> str: + return validate_no_html_tags(value, field_name="Description") + def to_representation(self, instance: Workflow) -> dict[str, str]: representation: dict[str, str] = super().to_representation(instance) representation[WorkflowKey.WF_NAME] = instance.workflow_name diff --git a/frontend/nginx.conf b/frontend/nginx.conf index f3eef19532..27f1bd04ac 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -47,6 +47,27 @@ http { add_header X-Frame-Options "SAMEORIGIN" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; + # Content Security Policy + # - 'unsafe-inline' in script-src: required for runtime-config.js injected at container start + # - 'unsafe-inline' in style-src: required by Ant Design CSS-in-JS + # - cdn.jsdelivr.net: Monaco Editor loads from this CDN + # - unpkg.com: PDF.js worker + # - PostHog, GTM, reCAPTCHA, Stripe, Product Fruits: third-party services + add_header Content-Security-Policy + "default-src 'self'; " + "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://unpkg.com https://eu.i.posthog.com https://eu-assets.i.posthog.com https://www.googletagmanager.com https://www.google.com/recaptcha/ https://www.gstatic.com/recaptcha/ https://js.stripe.com https://app.productfruits.com; " + "style-src 'self' 'unsafe-inline'; " + "img-src 'self' data: blob: https:; " + "font-src 'self' data:; " + "connect-src 'self' ws: wss: https://eu.i.posthog.com https://eu-assets.i.posthog.com https://www.google-analytics.com https://api.stripe.com https://app.productfruits.com; " + "frame-src 'self' https://www.google.com/recaptcha/ https://recaptcha.google.com https://js.stripe.com https://hooks.stripe.com; " + "worker-src 'self' blob: https://unpkg.com https://cdn.jsdelivr.net; " + "object-src 'none'; " + "base-uri 'self'; " + "form-action 'self' https://checkout.stripe.com; " + "frame-ancestors 'self'" + always; + # Disable TRACE and TRACK methods if ($request_method ~ ^(TRACE|TRACK)$) { return 405;