diff --git a/spp_aggregation/__init__.py b/spp_aggregation/__init__.py new file mode 100644 index 00000000..c4ccea79 --- /dev/null +++ b/spp_aggregation/__init__.py @@ -0,0 +1,3 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from . import models diff --git a/spp_aggregation/__manifest__.py b/spp_aggregation/__manifest__.py new file mode 100644 index 00000000..58f0cb46 --- /dev/null +++ b/spp_aggregation/__manifest__.py @@ -0,0 +1,38 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +{ + "name": "OpenSPP Aggregation Engine", + "summary": "Unified aggregation service for statistics, simulations, and GIS queries", + "category": "OpenSPP", + "version": "19.0.2.0.0", + "sequence": 1, + "author": "OpenSPP.org", + "website": "https://github.com/OpenSPP/OpenSPP2", + "license": "LGPL-3", + "development_status": "Alpha", + "maintainers": ["jeremi"], + "depends": [ + "base", + "spp_cel_domain", + "spp_area", + "spp_registry", + "spp_security", + "spp_metrics_services", + ], + "data": [ + # Security + "security/aggregation_security.xml", + "security/ir.model.access.csv", + # Data + "data/cron_cache_cleanup.xml", + # Views + "views/aggregation_scope_views.xml", + "views/aggregation_access_views.xml", + "views/menu.xml", + ], + "assets": {}, + "demo": [], + "images": [], + "application": False, + "installable": True, + "auto_install": False, +} diff --git a/spp_aggregation/data/cron_cache_cleanup.xml b/spp_aggregation/data/cron_cache_cleanup.xml new file mode 100644 index 00000000..e4a362bc --- /dev/null +++ b/spp_aggregation/data/cron_cache_cleanup.xml @@ -0,0 +1,13 @@ + + + + + Aggregation: Cache Cleanup + + code + model.cron_cleanup_expired() + 1 + hours + True + + diff --git a/spp_aggregation/models/__init__.py b/spp_aggregation/models/__init__.py new file mode 100644 index 00000000..24cb09c3 --- /dev/null +++ b/spp_aggregation/models/__init__.py @@ -0,0 +1,8 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from . import aggregation_scope +from . import aggregation_access +from . import service_scope_resolver +from . import service_cache +from . import statistic_registry +from . import service_aggregation diff --git a/spp_aggregation/models/aggregation_access.py b/spp_aggregation/models/aggregation_access.py new file mode 100644 index 00000000..6514bce2 --- /dev/null +++ b/spp_aggregation/models/aggregation_access.py @@ -0,0 +1,318 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging + +from odoo import _, api, fields, models +from odoo.exceptions import ValidationError + +_logger = logging.getLogger(__name__) + + +class AggregationAccessRule(models.Model): + """ + Access control rules for aggregation queries. + + Determines what level of data access a user/group has: + - aggregate: Can only see counts and statistics (no individual records) + - individual: Can see individual record IDs in results + + Also controls k-anonymity thresholds and scope restrictions. + """ + + _name = "spp.aggregation.access.rule" + _description = "Aggregation Access Rule" + _order = "sequence, name" + + name = fields.Char( + required=True, + help="Human-readable name for this access rule.", + ) + description = fields.Text( + help="Optional description of what this rule grants.", + ) + sequence = fields.Integer( + default=10, + help="Lower sequence = higher priority when multiple rules match.", + ) + active = fields.Boolean( + default=True, + index=True, + ) + + # ------------------------------------------------------------------------- + # Who this rule applies to (one of user/group) + # ------------------------------------------------------------------------- + user_id = fields.Many2one( + comodel_name="res.users", + string="User", + ondelete="cascade", + help="Specific user this rule applies to.", + ) + group_id = fields.Many2one( + comodel_name="res.groups", + string="Security Group", + ondelete="cascade", + help="Security group this rule applies to.", + ) + + # ------------------------------------------------------------------------- + # Access level + # ------------------------------------------------------------------------- + access_level = fields.Selection( + selection=[ + ("aggregate", "Aggregates Only"), + ("individual", "Individual Records"), + ], + required=True, + default="aggregate", + help=( + "Aggregates Only: User can see counts and statistics but NOT individual record IDs. " + "Individual Records: User can see individual record IDs in results." + ), + ) + + # ------------------------------------------------------------------------- + # Privacy settings + # ------------------------------------------------------------------------- + minimum_k_anonymity = fields.Integer( + default=5, + help="Minimum count for a cell before it's suppressed (k-anonymity). Higher = more private.", + ) + + # ------------------------------------------------------------------------- + # Scope restrictions + # ------------------------------------------------------------------------- + allowed_scope_types = fields.Selection( + selection=[ + ("all", "All Scope Types"), + ("area_only", "Area-based Only"), + ("predefined", "Predefined Scopes Only"), + ], + default="all", + help=( + "Restrict which scope types this user can query. " + "Predefined means they can only use saved scope IDs, not inline definitions." + ), + ) + allow_inline_scopes = fields.Boolean( + default=False, + help=( + "If False, user can only query predefined scope IDs, not create inline scope definitions. " + "This prevents ad-hoc queries that could be used to narrow down individuals." + ), + ) + allowed_scope_ids = fields.Many2many( + comodel_name="spp.aggregation.scope", + relation="spp_aggregation_access_rule_scope_rel", + column1="rule_id", + column2="scope_id", + string="Allowed Scopes", + help="If set, user can only query these specific scopes (for predefined mode).", + ) + + # ------------------------------------------------------------------------- + # Area restrictions + # ------------------------------------------------------------------------- + allowed_area_ids = fields.Many2many( + comodel_name="spp.area", + relation="spp_aggregation_access_rule_area_rel", + column1="rule_id", + column2="area_id", + string="Allowed Areas", + help="If set, user can only query data from these specific areas (and optionally their children).", + ) + include_child_areas = fields.Boolean( + default=True, + help="If True, allowed_area_ids includes child areas. If False, only the exact areas are allowed.", + ) + + # ------------------------------------------------------------------------- + # Dimension restrictions + # ------------------------------------------------------------------------- + max_group_by_dimensions = fields.Integer( + default=3, + help="Maximum number of dimensions allowed in group_by. More dimensions = more granular = less private.", + ) + allowed_dimension_ids = fields.Many2many( + comodel_name="spp.demographic.dimension", + relation="spp_aggregation_access_rule_dimension_rel", + column1="rule_id", + column2="dimension_id", + string="Allowed Dimensions", + help="If set, user can only group by these dimensions.", + ) + + # ------------------------------------------------------------------------- + # Validation + # ------------------------------------------------------------------------- + @api.constrains("user_id", "group_id") + def _check_user_or_group(self): + """Ensure exactly one of user_id or group_id is set.""" + for rule in self: + if rule.user_id and rule.group_id: + raise ValidationError(_("A rule cannot apply to both a specific user and a group.")) + if not rule.user_id and not rule.group_id: + raise ValidationError(_("A rule must apply to either a user or a group.")) + + @api.constrains("minimum_k_anonymity") + def _check_k_anonymity(self): + """Ensure k-anonymity threshold is reasonable.""" + for rule in self: + if rule.minimum_k_anonymity < 1: + raise ValidationError(_("Minimum k-anonymity must be at least 1.")) + if rule.minimum_k_anonymity > 100: + raise ValidationError(_("Minimum k-anonymity should not exceed 100.")) + + @api.constrains("max_group_by_dimensions") + def _check_max_dimensions(self): + """Ensure max dimensions is reasonable.""" + for rule in self: + if rule.max_group_by_dimensions < 0: + raise ValidationError(_("Maximum group_by dimensions cannot be negative.")) + if rule.max_group_by_dimensions > 10: + raise ValidationError(_("Maximum group_by dimensions should not exceed 10.")) + + # ------------------------------------------------------------------------- + # Public API + # ------------------------------------------------------------------------- + @api.model + def get_effective_rule_for_user(self, user=None): + """ + Get the most permissive applicable access rule for a user. + + Rules are evaluated in sequence order. User-specific rules take precedence + over group-based rules. + + :param user: res.users record (defaults to current user) + :returns: Access rule record or None if no rule matches + :rtype: spp.aggregation.access.rule or None + """ + user = user or self.env.user + + # First check for user-specific rule + user_rule = self.search( + [("user_id", "=", user.id), ("active", "=", True)], + limit=1, + order="sequence", + ) + if user_rule: + return user_rule + + # Then check for group-based rules + group_rule = self.search( + [("group_id", "in", user.group_ids.ids), ("active", "=", True)], + limit=1, + order="sequence", + ) + return group_rule + + def check_scope_allowed(self, scope): + """ + Check if a scope is allowed under this rule. + + :param scope: spp.aggregation.scope record or dict for inline scope + :returns: True if allowed + :raises: ValidationError if not allowed + """ + self.ensure_one() + + # Check inline scope restriction + if isinstance(scope, dict) and not self.allow_inline_scopes: + raise ValidationError(_("Inline scope definitions are not allowed for your access level.")) + + # Get scope type + scope_type = scope.get("scope_type") if isinstance(scope, dict) else scope.scope_type + + # Check scope type restriction + if self.allowed_scope_types == "predefined": + if isinstance(scope, dict): + raise ValidationError(_("Only predefined scopes are allowed for your access level.")) + if self.allowed_scope_ids and scope.id not in self.allowed_scope_ids.ids: + raise ValidationError(_("This scope is not in your allowed scope list.")) + + if self.allowed_scope_types == "area_only": + if scope_type not in ("area", "area_tag"): + raise ValidationError(_("Only area-based scopes are allowed for your access level.")) + + # Check area restrictions for explicit scopes + if scope_type == "explicit" and self.allowed_area_ids: + partner_ids = ( + scope.get("explicit_partner_ids") if isinstance(scope, dict) else scope.explicit_partner_ids.ids + ) + self._check_explicit_scope_area_compliance(partner_ids) + + return True + + def check_dimensions_allowed(self, dimension_names): + """ + Check if the requested dimensions are allowed. + + :param dimension_names: List of dimension names + :returns: True if allowed + :raises: ValidationError if not allowed + """ + self.ensure_one() + + if len(dimension_names) > self.max_group_by_dimensions: + raise ValidationError( + _("Too many dimensions: maximum %d allowed, %d requested.") + % (self.max_group_by_dimensions, len(dimension_names)) + ) + + if self.allowed_dimension_ids: + allowed_names = set(self.allowed_dimension_ids.mapped("name")) + requested = set(dimension_names) + disallowed = requested - allowed_names + if disallowed: + raise ValidationError(_("Dimensions not allowed: %s") % ", ".join(disallowed)) + + return True + + def _check_explicit_scope_area_compliance(self, partner_ids): + """ + Check if explicit partner IDs are within allowed areas. + + :param partner_ids: List of partner IDs + :returns: True if allowed + :raises: ValidationError if any partner is outside allowed areas + """ + self.ensure_one() + + if not self.allowed_area_ids: + # No area restrictions + return True + + if not partner_ids: + # Empty list is always allowed + return True + + # Build set of allowed area IDs + allowed_area_ids = set(self.allowed_area_ids.ids) + + # If include_child_areas is True, expand to include all child areas + if self.include_child_areas: + # Collect all parent_path values first, then do a single search using + # OR-chained domain conditions to avoid N+1 queries inside a loop. + parent_paths = [area.parent_path for area in self.allowed_area_ids if area.parent_path] + if parent_paths: + domain = ["|"] * (len(parent_paths) - 1) + for path in parent_paths: + domain.append(("parent_path", "like", f"{path}%")) + child_areas = self.env["spp.area"].sudo().search(domain) # nosemgrep: odoo-sudo-without-context + allowed_area_ids.update(child_areas.ids) + + # Get area_ids for the partners + partners = self.env["res.partner"].sudo().browse(partner_ids) # nosemgrep: odoo-sudo-without-context, odoo-sudo-on-sensitive-models # noqa: E501 # fmt: skip + partner_area_ids = set(partners.mapped("area_id").ids) + + # Check if all partner areas are in allowed areas + disallowed_area_ids = partner_area_ids - allowed_area_ids + + if disallowed_area_ids: + # Get area names for error message + disallowed_areas = self.env["spp.area"].sudo().browse(list(disallowed_area_ids)) # nosemgrep: odoo-sudo-without-context # noqa: E501 # fmt: skip + area_names = ", ".join(disallowed_areas.mapped("draft_name")) + raise ValidationError( + _("Some registrants are outside your allowed areas. Disallowed areas: %s") % area_names + ) + + return True diff --git a/spp_aggregation/models/aggregation_scope.py b/spp_aggregation/models/aggregation_scope.py new file mode 100644 index 00000000..00ea6fbd --- /dev/null +++ b/spp_aggregation/models/aggregation_scope.py @@ -0,0 +1,286 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import json +import logging + +from odoo import _, api, fields, models +from odoo.exceptions import UserError, ValidationError + +_logger = logging.getLogger(__name__) + + +class AggregationScope(models.Model): + """ + Unified targeting scope for aggregation queries. + + Defines WHAT to aggregate by resolving to a set of registrant IDs. + Supports multiple scope types: + - CEL expressions + - Spatial polygons/buffers (requires spp_spatial bridge) + - Administrative areas + - Simulation results + - Explicit ID lists + """ + + _name = "spp.aggregation.scope" + _description = "Aggregation Scope" + _order = "name" + + name = fields.Char( + required=True, + index=True, + help="Human-readable name for this scope.", + ) + description = fields.Text( + help="Optional description of what this scope targets.", + ) + active = fields.Boolean( + default=True, + index=True, + ) + + scope_type = fields.Selection( + selection=[ + ("cel", "CEL Expression"), + ("spatial_polygon", "Within Polygon"), + ("spatial_buffer", "Within Distance"), + ("area", "Administrative Area"), + ("area_tag", "Area Tags"), + ("explicit", "Explicit IDs"), + # ("simulation", "Simulation Result") - added by spp_simulation + ], + required=True, + default="cel", + help="How to determine which registrants are in scope.", + ) + + # ------------------------------------------------------------------------- + # CEL targeting + # ------------------------------------------------------------------------- + cel_expression = fields.Text( + string="CEL Expression", + help="CEL expression to filter registrants (e.g., 'r.age >= 18 && r.gender_id.code == \"M\"').", + ) + cel_profile = fields.Selection( + selection=[ + ("registry_individuals", "Individuals"), + ("registry_groups", "Groups/Households"), + ], + default="registry_individuals", + help="CEL profile determines base domain and available fields.", + ) + + # ------------------------------------------------------------------------- + # Spatial targeting (basic storage, PostGIS via bridge module) + # ------------------------------------------------------------------------- + geometry_geojson = fields.Text( + string="Geometry (GeoJSON)", + help="GeoJSON polygon for spatial_polygon scope type.", + ) + buffer_center_latitude = fields.Float( + string="Center Latitude", + digits=(10, 7), + help="Latitude of buffer center for spatial_buffer scope type.", + ) + buffer_center_longitude = fields.Float( + string="Center Longitude", + digits=(10, 7), + help="Longitude of buffer center for spatial_buffer scope type.", + ) + buffer_radius_km = fields.Float( + string="Buffer Radius (km)", + digits=(10, 3), + help="Radius in kilometers for spatial_buffer scope type.", + ) + + # ------------------------------------------------------------------------- + # Area targeting + # ------------------------------------------------------------------------- + area_id = fields.Many2one( + comodel_name="spp.area", + string="Area", + ondelete="restrict", + help="Administrative area for area scope type.", + ) + area_tag_ids = fields.Many2many( + comodel_name="spp.area.tag", + relation="spp_aggregation_scope_area_tag_rel", + column1="scope_id", + column2="tag_id", + string="Area Tags", + help="Areas with these tags for area_tag scope type.", + ) + include_child_areas = fields.Boolean( + default=True, + help="Include registrants in child areas when using area scope.", + ) + + # ------------------------------------------------------------------------- + # Simulation targeting (extended by spp_simulation if installed) + # ------------------------------------------------------------------------- + # Note: simulation_run_id field is added by spp_simulation module + # when it extends this model + + # ------------------------------------------------------------------------- + # Explicit ID targeting + # ------------------------------------------------------------------------- + explicit_partner_ids = fields.Many2many( + comodel_name="res.partner", + relation="spp_aggregation_scope_partner_rel", + column1="scope_id", + column2="partner_id", + string="Explicit Registrants", + domain="[('is_registrant', '=', True)]", + help="Explicit list of registrants for explicit scope type.", + ) + + # ------------------------------------------------------------------------- + # Cache configuration + # ------------------------------------------------------------------------- + is_cache_enabled = fields.Boolean( + string="Enable Caching", + default=True, + help="Enable result caching for this scope. Disable for scopes that change frequently.", + ) + cache_ttl = fields.Integer( + string="Cache TTL (seconds)", + default=0, + help="Custom cache TTL in seconds. 0 means use default system TTL.", + ) + last_cache_refresh = fields.Datetime( + string="Last Cache Refresh", + readonly=True, + help="Timestamp of last cache refresh for this scope.", + ) + + # ------------------------------------------------------------------------- + # Computed fields + # ------------------------------------------------------------------------- + registrant_count = fields.Integer( + compute="_compute_registrant_count", + string="Registrant Count", + help="Approximate count of registrants in scope (may be cached).", + ) + + @api.depends("scope_type", "cel_expression", "area_id", "area_tag_ids", "explicit_partner_ids") + def _compute_registrant_count(self): + """Compute approximate registrant count for display.""" + for scope in self: + if scope.scope_type == "explicit": + scope.registrant_count = len(scope.explicit_partner_ids) + else: + # For other types, resolve and count + try: + ids = self.env["spp.aggregation.scope.resolver"].resolve(scope) + scope.registrant_count = len(ids) + except (ValidationError, UserError) as e: + _logger.debug("Could not compute registrant count for scope %s: %s", scope.id, e) + scope.registrant_count = 0 + + # ------------------------------------------------------------------------- + # Validation + # ------------------------------------------------------------------------- + @api.constrains("scope_type", "cel_expression") + def _check_cel_expression(self): + """Validate CEL expression is provided for CEL scope type.""" + for scope in self: + if scope.scope_type == "cel" and not scope.cel_expression: + raise ValidationError(_("CEL expression is required for CEL scope type.")) + + @api.constrains("scope_type", "geometry_geojson") + def _check_spatial_polygon(self): + """Validate GeoJSON is provided and valid for spatial_polygon scope type.""" + for scope in self: + if scope.scope_type == "spatial_polygon": + if not scope.geometry_geojson: + raise ValidationError(_("GeoJSON geometry is required for spatial polygon scope type.")) + try: + geojson = json.loads(scope.geometry_geojson) + valid_types = ("Polygon", "MultiPolygon", "Feature", "FeatureCollection") + if geojson.get("type") not in valid_types: + message = _("GeoJSON must be a Polygon, MultiPolygon, Feature, or FeatureCollection.") + raise ValidationError(message) + except json.JSONDecodeError as e: + raise ValidationError(_("Invalid GeoJSON: %s") % str(e)) from e + + @api.constrains("scope_type", "buffer_center_latitude", "buffer_center_longitude", "buffer_radius_km") + def _check_spatial_buffer(self): + """Validate buffer parameters for spatial_buffer scope type.""" + for scope in self: + if scope.scope_type == "spatial_buffer": + if not scope.buffer_radius_km or scope.buffer_radius_km <= 0: + raise ValidationError(_("Buffer radius must be a positive number.")) + if not scope.buffer_center_latitude or not scope.buffer_center_longitude: + raise ValidationError(_("Buffer center coordinates are required.")) + if not -90 <= scope.buffer_center_latitude <= 90: + raise ValidationError(_("Latitude must be between -90 and 90.")) + if not -180 <= scope.buffer_center_longitude <= 180: + raise ValidationError(_("Longitude must be between -180 and 180.")) + + @api.constrains("scope_type", "area_id") + def _check_area(self): + """Validate area is provided for area scope type.""" + for scope in self: + if scope.scope_type == "area" and not scope.area_id: + raise ValidationError(_("Area is required for area scope type.")) + + @api.constrains("scope_type", "area_tag_ids") + def _check_area_tags(self): + """Validate area tags are provided for area_tag scope type.""" + for scope in self: + if scope.scope_type == "area_tag" and not scope.area_tag_ids: + raise ValidationError(_("At least one area tag is required for area tag scope type.")) + + # Note: _check_simulation_run constraint is added by spp_simulation module + + @api.constrains("scope_type", "explicit_partner_ids") + def _check_explicit_ids(self): + """Validate explicit IDs are provided for explicit scope type.""" + for scope in self: + if scope.scope_type == "explicit" and not scope.explicit_partner_ids: + raise ValidationError(_("At least one registrant is required for explicit scope type.")) + + # ------------------------------------------------------------------------- + # Public API + # ------------------------------------------------------------------------- + def resolve_registrant_ids(self): + """ + Resolve this scope to a list of partner IDs. + + This is the core unification method - all scope types resolve + to a list of res.partner IDs. + + :returns: List of partner IDs + :rtype: list[int] + """ + self.ensure_one() + return self.env["spp.aggregation.scope.resolver"].resolve(self) + + def action_preview_registrants(self): + """Action to preview registrants in this scope.""" + self.ensure_one() + ids = self.resolve_registrant_ids() + return { + "name": _("Registrants in Scope: %s") % self.name, + "type": "ir.actions.act_window", + "res_model": "res.partner", + "view_mode": "list,form", + "domain": [("id", "in", ids)], + "context": {"create": False, "delete": False}, + } + + def action_refresh_cache(self): + """ + Manually invalidate all cache entries for this scope. + + Invalidates all cache entries for this scope's type and + updates the last_cache_refresh timestamp. + """ + self.ensure_one() + cache_service = self.env["spp.aggregation.cache"] + count = cache_service.invalidate_scope(self) + if count: + scope_name = self.name + _logger.info("Invalidated %d cache entries for scope %s", count, scope_name) + + self.write({"last_cache_refresh": fields.Datetime.now()}) + return True diff --git a/spp_aggregation/models/service_aggregation.py b/spp_aggregation/models/service_aggregation.py new file mode 100644 index 00000000..f5c23d98 --- /dev/null +++ b/spp_aggregation/models/service_aggregation.py @@ -0,0 +1,395 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging +from datetime import datetime + +from odoo import _, api, models +from odoo.exceptions import AccessError, ValidationError + +_logger = logging.getLogger(__name__) + + +class AggregationService(models.AbstractModel): + """ + Main aggregation service for unified statistics computation. + + This is THE entry point for all aggregation queries. All consumers + (simulation, GIS API, dashboards) should use this service. + + Access level is determined from user permissions, not passed as + a parameter, to prevent callers bypassing restrictions. + """ + + _name = "spp.aggregation.service" + _description = "Aggregation Service" + + MAX_GROUP_BY_DIMENSIONS = 3 + + @api.model + def compute_aggregation( + self, + scope, + statistics=None, + group_by=None, + context=None, + use_cache=True, + ): + """ + Compute aggregation for a scope with optional breakdown. + + Access level is determined from user permissions (AggregationAccessRule), + NOT passed as a parameter. This prevents callers bypassing restrictions. + + :param scope: spp.aggregation.scope record, ID, or inline dict definition + :param statistics: List of statistic names to compute (or None for defaults) + :param group_by: List of dimension names for breakdown (max 3) + :param context: Context string for configuration (e.g., "api", "dashboard") + :param use_cache: Whether to use cached results (default: True) + :returns: Aggregation result dictionary + :rtype: dict + + Returns: + { + "total_count": int, + "statistics": {stat_name: {"value": ..., "suppressed": bool}}, + "breakdown": { # Only if group_by specified + "dimension1|dimension2|...": {"count": int, "statistics": {...}}, + }, + "from_cache": bool, + "computed_at": datetime, + "access_level": str, # "aggregate" or "individual" + } + """ + # Resolve scope + scope_record = self._resolve_scope(scope) + + # Validate group_by dimensions + group_by = group_by or [] + self._validate_group_by(group_by) + + # Determine access level from user permissions + access_level = self._determine_access_level() + k_threshold = self._get_k_threshold() + + # Check scope is allowed for user + self._check_scope_allowed(scope) + + # Check cache if enabled + cache_service = self.env["spp.aggregation.cache"] + if use_cache: + cached_result = cache_service.get_cached_result(scope_record, statistics, group_by) + if cached_result: + # Apply access level to cached result (in case user permissions changed) + cached_result["access_level"] = access_level + _logger.debug("Returning cached result for scope") + return cached_result + + # Get registrant IDs from scope + registrant_ids = self._get_registrant_ids(scope_record) + + # Build result + result = { + "total_count": len(registrant_ids), + "statistics": {}, + "from_cache": False, + "computed_at": datetime.now().isoformat(), + "access_level": access_level, + } + + # Compute statistics if requested + if statistics: + result["statistics"] = self._compute_statistics( + registrant_ids, + statistics, + context=context, + k_threshold=k_threshold, + ) + + # Compute breakdown if group_by specified + if group_by: + result["breakdown"] = self._compute_breakdown(registrant_ids, group_by, statistics, context) + + # Apply privacy protections + privacy_service = self.env["spp.metrics.privacy"] + result = privacy_service.enforce(result, k_threshold, access_level) + + # Store in cache if enabled + if use_cache: + cache_service.store_result(scope_record, statistics, group_by, result) + + return result + + def _resolve_scope(self, scope): + """ + Resolve scope input to a scope record. + + :param scope: Record, ID, or dict + :returns: spp.aggregation.scope record or dict + """ + if isinstance(scope, dict): + # Inline scope definition + return scope + + if isinstance(scope, int): + # Scope ID + return self.env["spp.aggregation.scope"].browse(scope) + + # Assume it's already a record + return scope + + def _validate_group_by(self, group_by): + """ + Validate group_by dimensions. + + :param group_by: List of dimension names + :raises: ValidationError if invalid + """ + if len(group_by) > self.MAX_GROUP_BY_DIMENSIONS: + raise ValidationError( + _("Maximum %d group_by dimensions allowed, got %d.") % (self.MAX_GROUP_BY_DIMENSIONS, len(group_by)) + ) + + # Check dimensions exist (use sudo for internal validation) + dimension_model = self.env["spp.demographic.dimension"].sudo() # nosemgrep: odoo-sudo-without-context + for dim_name in group_by: + if not dimension_model.get_by_name(dim_name): + raise ValidationError(_("Unknown dimension: %s") % dim_name) + + # Check access rule dimension restrictions + user = self.env.user + # Use sudo() to read access rules - this is an internal security check + rule = self.env["spp.aggregation.access.rule"].sudo().get_effective_rule_for_user(user) # nosemgrep: odoo-sudo-without-context # noqa: E501 # fmt: skip + if rule and group_by: + rule.check_dimensions_allowed(group_by) + + def _determine_access_level(self, user=None): + """ + Determine access level from user permissions. + + :param user: res.users record (defaults to current user) + :returns: "aggregate" or "individual" + :rtype: str + """ + return self.env["spp.metrics.privacy"].validate_access_level(user) + + def _get_k_threshold(self, user=None): + """ + Get k-anonymity threshold for user. + + :param user: res.users record (defaults to current user) + :returns: k threshold value + :rtype: int + """ + return self.env["spp.metrics.privacy"].get_k_threshold(user) + + def _check_scope_allowed(self, scope): + """ + Check if scope is allowed for current user. + + :param scope: Scope record or dict + :raises: AccessError if not allowed + """ + user = self.env.user + # Use sudo() to read access rules - this is an internal security check + rule = self.env["spp.aggregation.access.rule"].sudo().get_effective_rule_for_user(user) # nosemgrep: odoo-sudo-without-context # noqa: E501 # fmt: skip + + if not rule: + # No explicit rule - allow with defaults + return + + try: + rule.check_scope_allowed(scope) + except ValidationError as e: + raise AccessError(str(e)) from e + + def _get_registrant_ids(self, scope): + """ + Get registrant IDs from scope. + + :param scope: Scope record or dict + :returns: List of partner IDs + :rtype: list[int] + """ + resolver = self.env["spp.aggregation.scope.resolver"] + return resolver.resolve(scope) + + def _compute_statistics(self, registrant_ids, statistics, context=None, k_threshold=None): + """ + Compute statistics for registrants. + + :param registrant_ids: List of partner IDs + :param statistics: List of statistic names + :param context: Context string + :returns: Dictionary of statistic results + :rtype: dict + """ + result = {} + total_count = len(registrant_ids) + + statistic_by_name = {} + statistic_model = self.env.get("spp.statistic") + if statistic_model is not None: + statistic_records = statistic_model.sudo().search( # nosemgrep: odoo-sudo-without-context + [("name", "in", statistics)] + ) + statistic_by_name = {record.name: record for record in statistic_records} + + privacy_service = self.env["spp.metrics.privacy"] + + for stat_name in statistics: + try: + value = self._compute_single_statistic(stat_name, registrant_ids, context) + value, suppressed = self._apply_statistic_suppression( + stat_name=stat_name, + value=value, + total_count=total_count, + context=context, + k_threshold=k_threshold, + statistic_by_name=statistic_by_name, + privacy_service=privacy_service, + ) + result[stat_name] = { + "value": value, + "suppressed": suppressed, + } + except (ValueError, AttributeError, TypeError, KeyError, ValidationError) as e: + _logger.warning("Error computing statistic %s: %s", stat_name, e) + result[stat_name] = { + "value": None, + "error": str(e), + "suppressed": False, + } + + return result + + def _apply_statistic_suppression( + self, + stat_name, + value, + total_count, + context, + k_threshold, + statistic_by_name, + privacy_service, + ): + """ + Apply top-level statistic suppression, delegating to unified privacy service. + + Precedence rule: + - Base threshold comes from access rules (user-level k-anonymity) + - Statistic/context threshold can raise privacy further + - Effective threshold = max(user threshold, statistic/context threshold) + """ + if value is None: + return value, False + + # Build stat config from statistic record + stat_config = None + stat = statistic_by_name.get(stat_name) + if stat: + config = stat.get_context_config(context) if context else {} + stat_config = { + "minimum_count": config.get("minimum_count") or stat.minimum_count or 0, + "suppression_display": config.get("suppression_display", stat.suppression_display) or "less_than", + } + + return privacy_service.suppress_value(value, total_count, k_threshold=k_threshold, stat_config=stat_config) + + def _compute_single_statistic(self, stat_name, registrant_ids, context=None): + """ + Compute a single statistic. + + Delegates to statistic registry for clean lookup and computation. + + :param stat_name: Statistic name + :param registrant_ids: List of partner IDs + :param context: Context string + :returns: Computed value + """ + registry = self.env["spp.aggregation.statistic.registry"] + return registry.compute(stat_name, registrant_ids, context) + + def _compute_breakdown(self, registrant_ids, group_by, statistics, context=None): + """ + Compute breakdown by dimensions. + + Delegates to spp.metrics.breakdown service. + + :param registrant_ids: List of partner IDs + :param group_by: List of dimension names + :param statistics: List of statistic names + :param context: Context string + :returns: Breakdown dictionary + :rtype: dict + """ + breakdown_service = self.env["spp.metrics.breakdown"] + return breakdown_service.compute_breakdown(registrant_ids, group_by, statistics, context) + + # ------------------------------------------------------------------------- + # Convenience Methods + # ------------------------------------------------------------------------- + @api.model + def compute_for_area(self, area_id, include_children=True, **kwargs): + """ + Compute aggregation for an administrative area. + + :param area_id: spp.area ID + :param include_children: Include child areas + :param kwargs: Additional arguments for compute_aggregation + :returns: Aggregation result + :rtype: dict + """ + scope = { + "scope_type": "area", + "area_id": area_id, + "include_child_areas": include_children, + } + return self.compute_aggregation(scope, **kwargs) + + @api.model + def compute_for_expression(self, cel_expression, profile="registry_individuals", **kwargs): + """ + Compute aggregation for a CEL expression. + + :param cel_expression: CEL expression string + :param profile: CEL profile + :param kwargs: Additional arguments for compute_aggregation + :returns: Aggregation result + :rtype: dict + """ + scope = { + "scope_type": "cel", + "cel_expression": cel_expression, + "cel_profile": profile, + } + return self.compute_aggregation(scope, **kwargs) + + @api.model + def compute_fairness(self, scope, dimensions=None, **kwargs): + """ + Compute fairness analysis for a scope. + + :param scope: Scope record, ID, or dict + :param dimensions: Dimension names for analysis + :returns: Fairness result + :rtype: dict + """ + scope_record = self._resolve_scope(scope) + registrant_ids = self._get_registrant_ids(scope_record) + + # Get base domain for population + base_domain = [("is_registrant", "=", True)] + + fairness_service = self.env["spp.metrics.fairness"] + return fairness_service.compute_fairness(registrant_ids, base_domain, dimensions) + + @api.model + def compute_distribution(self, amounts): + """ + Compute distribution statistics for a list of amounts. + + :param amounts: List of numerical values + :returns: Distribution result + :rtype: dict + """ + distribution_service = self.env["spp.metrics.distribution"] + return distribution_service.compute_distribution(amounts) diff --git a/spp_aggregation/models/service_cache.py b/spp_aggregation/models/service_cache.py new file mode 100644 index 00000000..8f814899 --- /dev/null +++ b/spp_aggregation/models/service_cache.py @@ -0,0 +1,435 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import hashlib +import json +import logging +from datetime import timedelta + +from odoo import api, fields, models + +_logger = logging.getLogger(__name__) + + +class AggregationCacheService(models.AbstractModel): + """ + Cache service for aggregation results. + + This service manages caching of aggregation results to improve performance + for frequently requested scopes and statistics. Results are stored in + spp.aggregation.cache.entry with TTL-based expiration. + + TTL Configuration by Scope Type: + - area: 1 hour (3600 seconds) - administrative data is relatively static + - cel: 15 minutes (900 seconds) - expressions may reference dynamic data + - spatial_polygon: No cache (0) - spatial queries are too varied + - spatial_buffer: No cache (0) - buffer queries are too varied + - area_tag: 1 hour (3600 seconds) - tag-based queries are relatively static + - explicit: 30 minutes (1800 seconds) - explicit lists may change + """ + + _name = "spp.aggregation.cache" + _description = "Aggregation Cache Service" + + # TTL configuration in seconds + TTL_CONFIG = { + "area": 3600, # 1 hour + "cel": 900, # 15 minutes + "spatial_polygon": 0, # No cache + "spatial_buffer": 0, # No cache + "area_tag": 3600, # 1 hour + "explicit": 1800, # 30 minutes + } + + @api.model + def get_cached_result(self, scope, statistics=None, group_by=None): + """ + Get cached aggregation result if available and not expired. + + :param scope: spp.aggregation.scope record, ID, or dict + :param statistics: List of statistic names (None for defaults) + :param group_by: List of dimension names for breakdown + :returns: Cached result dictionary or None if not found/expired + :rtype: dict or None + """ + # Resolve scope to get scope type + scope_record = self._resolve_scope(scope) + scope_type = self._get_scope_type(scope_record) + + # Check if caching is enabled for this scope type + ttl = self._get_ttl_for_scope_type(scope_type) + if ttl == 0: + _logger.debug("Caching disabled for scope type: %s", scope_type) + return None + + # Generate cache key + cache_key = self._generate_cache_key(scope_record, statistics, group_by) + + # Find cache entry (use sudo for internal caching operation) + entry = ( + self.env["spp.aggregation.cache.entry"] # nosemgrep: odoo-sudo-without-context + .sudo() + .search( + [("cache_key", "=", cache_key)], + limit=1, + ) + ) + + if not entry: + _logger.debug("Cache miss for key: %s", cache_key) + return None + + # Check if expired + now = fields.Datetime.now() + expires_at = entry.computed_at + timedelta(seconds=ttl) + + if now > expires_at: + _logger.debug( + "Cache expired for key: %s (computed at %s, expired at %s)", cache_key, entry.computed_at, expires_at + ) + # Clean up expired entry + entry.unlink() + return None + + _logger.debug("Cache hit for key: %s (computed at %s, expires at %s)", cache_key, entry.computed_at, expires_at) + + # Parse and return result + try: + result = json.loads(entry.result_json) + # Mark that this result came from cache + result["from_cache"] = True + return result + except json.JSONDecodeError as e: + _logger.warning("Invalid JSON in cache entry %s: %s", entry.id, e) + entry.unlink() + return None + + @api.model + def store_result(self, scope, statistics, group_by, result): + """ + Store aggregation result in cache. + + :param scope: spp.aggregation.scope record, ID, or dict + :param statistics: List of statistic names + :param group_by: List of dimension names + :param result: Aggregation result dictionary + :returns: True if stored, False if caching disabled + :rtype: bool + """ + # Resolve scope to get scope type + scope_record = self._resolve_scope(scope) + scope_type = self._get_scope_type(scope_record) + + # Check if caching is enabled for this scope type + ttl = self._get_ttl_for_scope_type(scope_type) + if ttl == 0: + _logger.debug("Caching disabled for scope type: %s", scope_type) + return False + + # Generate cache key + cache_key = self._generate_cache_key(scope_record, statistics, group_by) + + # Serialize result + try: + result_json = json.dumps(result, default=str) + except (TypeError, ValueError) as e: + _logger.warning("Failed to serialize result for caching: %s", e) + return False + + # Store or update cache entry (use sudo for internal caching operation) + cache_model = self.env["spp.aggregation.cache.entry"].sudo() # nosemgrep: odoo-sudo-without-context + existing = cache_model.search( + [("cache_key", "=", cache_key)], + limit=1, + ) + + values = { + "cache_key": cache_key, + "scope_type": scope_type, + "result_json": result_json, + "computed_at": fields.Datetime.now(), + } + + if existing: + existing.write(values) + _logger.debug("Updated cache entry for key: %s", cache_key) + else: + cache_model.create(values) + _logger.debug("Created cache entry for key: %s", cache_key) + + return True + + @api.model + def invalidate_scope(self, scope): + """ + Invalidate all cache entries for a specific scope. + + This is useful when the underlying data for a scope changes + (e.g., registrants are added/removed from an area). + + Note: Currently this invalidates all cache entries of the same scope type. + For more granular invalidation, consider adding a scope_id foreign key + to the cache entry model in a future update. + + :param scope: spp.aggregation.scope record, ID, or dict + :returns: Number of cache entries invalidated + :rtype: int + """ + scope_record = self._resolve_scope(scope) + scope_type = self._get_scope_type(scope_record) + + # Invalidate all cache entries of this scope type + # This is a conservative approach - it may invalidate more than needed, + # but ensures consistency + entries = ( + self.env["spp.aggregation.cache.entry"] # nosemgrep: odoo-sudo-without-context + .sudo() + .search([("scope_type", "=", scope_type)]) + ) + + count = len(entries) + if count > 0: + entries.unlink() + _logger.info( + "Invalidated %d cache entries for scope type %s", + count, + scope_type, + ) + + return count + + @api.model + def invalidate_all(self): + """ + Invalidate all aggregation cache entries. + + This is useful for debugging or when performing bulk data updates + that affect many scopes. + + :returns: Number of cache entries invalidated + :rtype: int + """ + entries = self.env["spp.aggregation.cache.entry"].sudo().search([]) # nosemgrep: odoo-sudo-without-context + count = len(entries) + + if count > 0: + entries.unlink() + _logger.info("Invalidated all %d cache entries", count) + + return count + + @api.model + def cleanup_expired(self): + """ + Clean up expired cache entries. + + This should be called periodically (e.g., via cron) to prevent + the cache table from growing indefinitely. + + :returns: Number of cache entries removed + :rtype: int + """ + now = fields.Datetime.now() + removed = 0 + + for scope_type, ttl in self.TTL_CONFIG.items(): + if ttl == 0: + continue # Skip non-cached types + + expires_before = now - timedelta(seconds=ttl) + + entries = ( + self.env["spp.aggregation.cache.entry"] # nosemgrep: odoo-sudo-without-context + .sudo() + .search( + [ + ("scope_type", "=", scope_type), + ("computed_at", "<", expires_before), + ] + ) + ) + + count = len(entries) + if count > 0: + entries.unlink() + removed += count + _logger.debug("Removed %d expired %s cache entries", count, scope_type) + + if removed > 0: + _logger.info("Cleaned up %d expired cache entries", removed) + + return removed + + def _generate_cache_key(self, scope, statistics, group_by): + """ + Generate a unique cache key for an aggregation query. + + The key is a hash of: + - Scope definition (type + parameters) + - Statistics list + - Group by dimensions + + :param scope: Scope record or dict + :param statistics: List of statistic names + :param group_by: List of dimension names + :returns: Cache key string (hex hash) + :rtype: str + """ + # Normalize inputs + statistics = sorted(statistics) if statistics else [] + group_by = sorted(group_by) if group_by else [] + + # Build key components + key_parts = self._get_scope_key_parts(scope) + + # Add statistics and group_by + key_parts.extend(statistics) + key_parts.extend(group_by) + + # Generate hash + key_string = "|".join(str(p) for p in key_parts) + key_hash = hashlib.sha256(key_string.encode()).hexdigest() + + return key_hash + + def _get_scope_key_parts(self, scope): + """ + Extract key components from scope for cache key generation. + + :param scope: Scope record or dict + :returns: List of key components + :rtype: list + """ + key_parts = [] + + if isinstance(scope, dict): + # Inline scope definition + scope_type = scope.get("scope_type") + key_parts.append(scope_type) + + if scope_type == "area": + key_parts.append(f"area:{scope.get('area_id')}") + key_parts.append(f"children:{scope.get('include_child_areas', True)}") + elif scope_type == "cel": + key_parts.append(f"expr:{scope.get('cel_expression')}") + key_parts.append(f"profile:{scope.get('cel_profile', 'registry_individuals')}") + elif scope_type == "spatial_polygon": + key_parts.append(f"geojson:{scope.get('geometry_geojson')}") + elif scope_type == "spatial_buffer": + key_parts.append(f"lat:{scope.get('buffer_center_latitude')}") + key_parts.append(f"lon:{scope.get('buffer_center_longitude')}") + key_parts.append(f"radius:{scope.get('buffer_radius_km')}") + elif scope_type == "area_tag": + tag_ids = scope.get("area_tag_ids", []) + key_parts.append(f"tags:{sorted(tag_ids)}") + elif scope_type == "explicit": + partner_ids = scope.get("explicit_partner_ids", []) + key_parts.append(f"partners:{sorted(partner_ids)}") + else: + # Scope record + scope_type = scope.scope_type + key_parts.append(scope_type) + key_parts.append(f"scope_id:{scope.id}") + + return key_parts + + def _get_scope_type(self, scope): + """ + Get scope type from scope record or dict. + + :param scope: Scope record or dict + :returns: Scope type string + :rtype: str + """ + if isinstance(scope, dict): + return scope.get("scope_type", "explicit") + return scope.scope_type + + def _get_ttl_for_scope_type(self, scope_type): + """ + Get TTL (time-to-live) in seconds for a scope type. + + :param scope_type: Scope type string + :returns: TTL in seconds (0 = no cache) + :rtype: int + """ + return self.TTL_CONFIG.get(scope_type, 0) + + def _resolve_scope(self, scope): + """ + Resolve scope input to a scope record or dict. + + :param scope: Record, ID, or dict + :returns: Scope record or dict + """ + if isinstance(scope, dict): + return scope + + if isinstance(scope, int): + return self.env["spp.aggregation.scope"].browse(scope) + + return scope + + +class AggregationCacheEntry(models.Model): + """ + Cache entry for aggregation results. + + Stores cached results with TTL-based expiration. Each entry represents + a single aggregation query result. + """ + + _name = "spp.aggregation.cache.entry" + _description = "Aggregation Cache Entry" + _order = "computed_at desc" + + cache_key = fields.Char( + string="Cache Key", + required=True, + index=True, + help="Unique cache key (SHA256 hash of scope + statistics + group_by)", + ) + scope_type = fields.Selection( + selection=[ + ("cel", "CEL Expression"), + ("spatial_polygon", "Within Polygon"), + ("spatial_buffer", "Within Distance"), + ("area", "Administrative Area"), + ("area_tag", "Area Tags"), + ("explicit", "Explicit IDs"), + ], + required=True, + index=True, + help="Scope type for this cache entry", + ) + result_json = fields.Text( + string="Result (JSON)", + required=True, + help="Cached aggregation result as JSON", + ) + computed_at = fields.Datetime( + string="Computed At", + required=True, + default=fields.Datetime.now, + index=True, + help="When this result was computed", + ) + + _sql_constraints = [ + ( + "cache_key_unique", + "UNIQUE(cache_key)", + "Cache key must be unique", + ), + ] + + @api.model + def cron_cleanup_expired(self): + """ + Cron job method to clean up expired cache entries. + + This is a wrapper around the cache service's cleanup_expired method + that can be called from ir.cron. + + :returns: Number of cache entries removed + :rtype: int + """ + return self.env["spp.aggregation.cache"].cleanup_expired() diff --git a/spp_aggregation/models/service_scope_resolver.py b/spp_aggregation/models/service_scope_resolver.py new file mode 100644 index 00000000..efdca19a --- /dev/null +++ b/spp_aggregation/models/service_scope_resolver.py @@ -0,0 +1,366 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging + +from odoo import api, models + +_logger = logging.getLogger(__name__) + + +class ScopeResolverService(models.AbstractModel): + """ + Service to resolve aggregation scopes to partner IDs. + + Uses strategy pattern to handle different scope types. + Each scope type has a dedicated resolver method. + """ + + _name = "spp.aggregation.scope.resolver" + _description = "Aggregation Scope Resolver" + + @api.model + def resolve(self, scope): + """ + Resolve a scope to a list of partner IDs. + + :param scope: spp.aggregation.scope record or dict for inline scope + :returns: List of partner IDs + :rtype: list[int] + """ + if isinstance(scope, dict): + return self._resolve_inline(scope) + + scope.ensure_one() + scope_type = scope.scope_type + + resolver_method = getattr(self, f"_resolve_{scope_type}", None) + if resolver_method is None: + _logger.error("No resolver for scope type: %s", scope_type) + return [] + + try: + return resolver_method(scope) + except Exception as e: + scope_name = scope.name + _logger.error("Error resolving scope %s: %s", scope_name, e) + return [] + + def _resolve_inline(self, scope_dict): + """ + Resolve an inline scope definition (dict). + + Creates a temporary scope record to leverage existing resolver methods. + """ + scope_type = scope_dict.get("scope_type") + if not scope_type: + _logger.error("Inline scope missing scope_type") + return [] + + # Map inline scope dict to resolver + resolver_map = { + "cel": self._resolve_cel_inline, + "area": self._resolve_area_inline, + "area_tag": self._resolve_area_tag_inline, + "spatial_polygon": self._resolve_spatial_polygon_inline, + "spatial_buffer": self._resolve_spatial_buffer_inline, + "explicit": self._resolve_explicit_inline, + } + + resolver = resolver_map.get(scope_type) + if not resolver: + _logger.error("No resolver for inline scope type: %s", scope_type) + return [] + + try: + return resolver(scope_dict) + except Exception as e: + _logger.error("Error resolving inline scope: %s", e) + return [] + + # ------------------------------------------------------------------------- + # CEL Resolution + # ------------------------------------------------------------------------- + def _resolve_cel(self, scope): + """Resolve a CEL expression scope.""" + return self._resolve_cel_expression( + scope.cel_expression, + scope.cel_profile or "registry_individuals", + ) + + def _resolve_cel_inline(self, scope_dict): + """Resolve an inline CEL scope.""" + return self._resolve_cel_expression( + scope_dict.get("cel_expression", ""), + scope_dict.get("cel_profile", "registry_individuals"), + ) + + def _resolve_cel_expression(self, expression, profile): + """Execute CEL expression and return matching IDs.""" + if not expression: + return [] + + executor = self.env.get("spp.cel.executor") + if not executor: + _logger.error("CEL executor not available") + return [] + executor = executor.sudo() # nosemgrep: odoo-sudo-without-context + + all_ids = [] + try: + for batch_ids in executor.compile_for_batch("res.partner", expression, batch_size=5000): + all_ids.extend(batch_ids) + except Exception as e: + _logger.error("CEL execution failed: %s", e) + return [] + + return all_ids + + # ------------------------------------------------------------------------- + # Area Resolution + # ------------------------------------------------------------------------- + def _resolve_area(self, scope): + """Resolve an area scope to partner IDs.""" + area = scope.area_id + if not area: + return [] + + return self._resolve_area_ids([area.id], scope.include_child_areas) + + def _resolve_area_inline(self, scope_dict): + """Resolve an inline area scope.""" + area_id = scope_dict.get("area_id") + if not area_id: + return [] + + include_children = scope_dict.get("include_child_areas", True) + return self._resolve_area_ids([area_id], include_children) + + def _resolve_area_ids(self, area_ids, include_children=True): + """Resolve area IDs to partner IDs. + + Returns registrants directly in the given areas, plus individuals + whose group (household) is in those areas but who lack their own + area_id assignment. + """ + if not area_ids: + return [] + + # Build area domain (sudo for model reads - callers may be unprivileged) + if include_children: + # Collect all parent_path values first, then do a single search using + # OR-chained domain conditions to avoid N+1 queries inside a loop. + areas = self.env["spp.area"].sudo().browse(area_ids) # nosemgrep: odoo-sudo-without-context + all_area_ids = set(area_ids) + parent_paths = [area.parent_path for area in areas if area.parent_path] + if parent_paths: + domain = ["|"] * (len(parent_paths) - 1) + for path in parent_paths: + domain.append(("parent_path", "like", f"{path}%")) + child_areas = self.env["spp.area"].sudo().search(domain) # nosemgrep: odoo-sudo-without-context + all_area_ids.update(child_areas.ids) + area_ids = list(all_area_ids) + + # Find registrants directly in these areas + domain = [ + ("is_registrant", "=", True), + ("area_id", "in", area_ids), + ] + direct_ids = set( + self.env["res.partner"] # nosemgrep: odoo-sudo-without-context, odoo-sudo-on-sensitive-models + .sudo() + .search(domain) + .ids + ) + + # Also find individuals without area_id whose group is in these areas + Membership = self.env["spp.group.membership"].sudo() # nosemgrep: odoo-sudo-without-context + memberships = Membership.search( + [ + ("group.area_id", "in", area_ids), + ("individual.area_id", "=", False), + ("individual.is_registrant", "=", True), + ("is_ended", "=", False), + ] + ) + indirect_ids = set(memberships.mapped("individual").ids) + + return list(direct_ids | indirect_ids) + + # ------------------------------------------------------------------------- + # Area Tag Resolution + # ------------------------------------------------------------------------- + def _resolve_area_tag(self, scope): + """Resolve an area tag scope to partner IDs.""" + tag_ids = scope.area_tag_ids.ids + if not tag_ids: + return [] + + include_children = scope.include_child_areas + return self._resolve_area_tag_ids(tag_ids, include_children) + + def _resolve_area_tag_inline(self, scope_dict): + """Resolve an inline area tag scope.""" + tag_ids = scope_dict.get("area_tag_ids", []) + if not tag_ids: + return [] + + include_children = scope_dict.get("include_child_areas", True) + return self._resolve_area_tag_ids(tag_ids, include_children) + + def _resolve_area_tag_ids(self, tag_ids, include_children=True): + """Resolve area tag IDs to partner IDs.""" + if not tag_ids: + return [] + + # Find areas with these tags (sudo for model reads - callers may be unprivileged) + areas = self.env["spp.area"].sudo().search([("tag_ids", "in", tag_ids)]) # nosemgrep: odoo-sudo-without-context + if not areas: + return [] + + return self._resolve_area_ids(areas.ids, include_children) + + # ------------------------------------------------------------------------- + # Spatial Resolution (basic, full PostGIS in bridge module) + # ------------------------------------------------------------------------- + def _resolve_spatial_polygon(self, scope): + """Resolve a spatial polygon scope.""" + geojson = scope.geometry_geojson + if not geojson: + return [] + + return self._resolve_spatial_polygon_geometry(geojson) + + def _resolve_spatial_polygon_inline(self, scope_dict): + """Resolve an inline spatial polygon scope.""" + geojson = scope_dict.get("geometry_geojson") + if not geojson: + return [] + + return self._resolve_spatial_polygon_geometry(geojson) + + def _resolve_spatial_polygon_geometry(self, geojson_str): + """ + Resolve spatial polygon to partner IDs. + + This is a basic implementation. For full PostGIS support, + install the spp_aggregation_spatial bridge module. + """ + # Check if PostGIS bridge is available + spatial_resolver = self.env.get("spp.aggregation.spatial.resolver") + if spatial_resolver: + return spatial_resolver.resolve_polygon(geojson_str) + + # Fallback: no spatial support + _logger.warning("Spatial polygon scope requires spp_aggregation_spatial module. Returning empty result.") + return [] + + def _resolve_spatial_buffer(self, scope): + """Resolve a spatial buffer scope.""" + return self._resolve_spatial_buffer_params( + scope.buffer_center_latitude, + scope.buffer_center_longitude, + scope.buffer_radius_km, + ) + + def _resolve_spatial_buffer_inline(self, scope_dict): + """Resolve an inline spatial buffer scope.""" + return self._resolve_spatial_buffer_params( + scope_dict.get("buffer_center_latitude"), + scope_dict.get("buffer_center_longitude"), + scope_dict.get("buffer_radius_km"), + ) + + def _resolve_spatial_buffer_params(self, latitude, longitude, radius_km): + """ + Resolve spatial buffer to partner IDs. + + This is a basic implementation. For full PostGIS support, + install the spp_aggregation_spatial bridge module. + """ + if not all([latitude, longitude, radius_km]): + return [] + + # Check if PostGIS bridge is available + spatial_resolver = self.env.get("spp.aggregation.spatial.resolver") + if spatial_resolver: + return spatial_resolver.resolve_buffer(latitude, longitude, radius_km) + + # Fallback: no spatial support + _logger.warning("Spatial buffer scope requires spp_aggregation_spatial module. Returning empty result.") + return [] + + # ------------------------------------------------------------------------- + # Simulation Resolution (added by spp_simulation module) + # ------------------------------------------------------------------------- + # Note: _resolve_simulation method is added by spp_simulation when installed + + # ------------------------------------------------------------------------- + # Explicit Resolution + # ------------------------------------------------------------------------- + def _resolve_explicit(self, scope): + """Resolve an explicit ID list scope.""" + return scope.explicit_partner_ids.ids + + def _resolve_explicit_inline(self, scope_dict): + """Resolve an inline explicit scope.""" + partner_ids = scope_dict.get("explicit_partner_ids", []) + if not partner_ids: + return [] + + # Validate that these are actual registrants (sudo for model reads - callers may be unprivileged) + valid_ids = ( + self.env["res.partner"] # nosemgrep: odoo-sudo-without-context, odoo-sudo-on-sensitive-models + .sudo() + .search( + [ + ("id", "in", partner_ids), + ("is_registrant", "=", True), + ] + ) + .ids + ) + + return valid_ids + + # ------------------------------------------------------------------------- + # Batch Resolution + # ------------------------------------------------------------------------- + @api.model + def resolve_multiple(self, scopes): + """ + Resolve multiple scopes and return combined IDs. + + :param scopes: List of scope records or dicts + :returns: Combined list of unique partner IDs + :rtype: list[int] + """ + all_ids = set() + for scope in scopes: + ids = self.resolve(scope) + all_ids.update(ids) + return list(all_ids) + + @api.model + def resolve_intersect(self, scopes): + """ + Resolve multiple scopes and return intersection of IDs. + + :param scopes: List of scope records or dicts + :returns: List of partner IDs present in ALL scopes + :rtype: list[int] + """ + if not scopes: + return [] + + result_ids = None + for scope in scopes: + ids = set(self.resolve(scope)) + if result_ids is None: + result_ids = ids + else: + result_ids = result_ids.intersection(ids) + + # Short-circuit if intersection is empty + if not result_ids: + return [] + + return list(result_ids) if result_ids else [] diff --git a/spp_aggregation/models/statistic_registry.py b/spp_aggregation/models/statistic_registry.py new file mode 100644 index 00000000..7c12b9c5 --- /dev/null +++ b/spp_aggregation/models/statistic_registry.py @@ -0,0 +1,291 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging + +from odoo import api, models + +_logger = logging.getLogger(__name__) + + +class StatisticRegistry(models.AbstractModel): + """Registry that maps statistic names to computation strategies. + + Replaces the fallback chain in compute_single_statistic with + a clean lookup-based approach. Each statistic type registers + how it should be computed. + """ + + _name = "spp.aggregation.statistic.registry" + _description = "Statistic Computation Registry" + + @api.model + def compute(self, stat_name, registrant_ids, context=None): + """Compute a statistic by name. + + Lookup order: + 1. Built-in statistics (count, gini) + 2. spp.statistic records (via CEL variable) + 3. spp.cel.variable records (direct) + + :param stat_name: Statistic name + :param registrant_ids: List of partner IDs + :param context: Optional context string + :returns: Computed value or None + """ + # Try built-in + builtin_method = self._get_builtin(stat_name) + if builtin_method is not None: + return builtin_method(registrant_ids) + + # Try spp.statistic (if module installed) + value = self._try_statistic_model(stat_name, registrant_ids) + if value is not None: + return value + + # Try spp.cel.variable (if module installed) + value = self._try_variable_model(stat_name, registrant_ids) + if value is not None: + return value + + # Provide diagnostic information if debug logging is enabled + if _logger.isEnabledFor(logging.DEBUG): + # Check if models exist + has_stat_model = self.env.get("spp.statistic") is not None + has_var_model = self.env.get("spp.cel.variable") is not None + + stat_count = 0 + var_count = 0 + if has_stat_model: + stat_count = self.env["spp.statistic"].sudo().search_count([]) # nosemgrep: odoo-sudo-without-context + if has_var_model: + var_count = self.env["spp.cel.variable"].sudo().search_count([]) # nosemgrep: odoo-sudo-without-context + + _logger.debug( + "Statistic lookup failed for '%s'. Available: %d spp.statistic, %d spp.cel.variable", + stat_name, + stat_count, + var_count, + ) + + _logger.warning("Unknown statistic: %s", stat_name) + return None + + @api.model + def list_available(self): + """List all available statistics for discovery. + + :returns: List of dicts with name, label, source + :rtype: list[dict] + """ + available = [] + + # Built-ins + for name, info in self._BUILTINS.items(): + available.append({"name": name, "label": info["label"], "source": "builtin"}) + + # From spp.statistic + stat_model = self.env.get("spp.statistic") + if stat_model: + for stat in stat_model.sudo().search([("active", "=", True)]): # nosemgrep: odoo-sudo-without-context + available.append({"name": stat.name, "label": stat.label, "source": "statistic"}) + + # From spp.cel.variable + var_model = self.env.get("spp.cel.variable") + if var_model: + # Search for active variables (check if state field exists) + domain = [] + if "state" in var_model._fields: + domain = [("state", "=", "active")] + + for var in var_model.sudo().search(domain): # nosemgrep: odoo-sudo-without-context + if not any(a["name"] == var.name for a in available): + available.append({"name": var.name, "label": var.name, "source": "variable"}) + + return available + + _BUILTINS = { + "count": {"label": "Total Count", "compute": "_compute_count"}, + "gini": {"label": "Gini Coefficient", "compute": "_compute_gini"}, + "gini_coefficient": {"label": "Gini Coefficient", "compute": "_compute_gini"}, + } + + def _get_builtin(self, stat_name): + """Get builtin computation function. + + :param stat_name: Statistic name + :returns: Bound method or None + """ + info = self._BUILTINS.get(stat_name) + if info: + method_name = info["compute"] + return getattr(self, method_name) + return None + + @api.model + def _compute_count(self, registrant_ids): + """Compute count of registrants. + + :param registrant_ids: List of partner IDs + :returns: Count + :rtype: int + """ + return len(registrant_ids) + + @api.model + def _compute_gini(self, registrant_ids): + """Compute Gini coefficient for registrants. + + Note: This requires benefit amounts which may not be available + in all contexts. Returns None if not applicable. + + :param registrant_ids: List of partner IDs + :returns: Gini coefficient or None + """ + # This would need benefit amounts - placeholder for now + return None + + @api.model + def _try_statistic_model(self, stat_name, registrant_ids): + """Try computing via spp.statistic record. + + :param stat_name: Statistic name + :param registrant_ids: List of partner IDs + :returns: Computed value or None + """ + stat_model = self.env.get("spp.statistic") + if stat_model is None: + return None + stat = stat_model.sudo().search([("name", "=", stat_name)], limit=1) # nosemgrep: odoo-sudo-without-context + if stat and stat.variable_id: + return self._compute_from_variable(stat.variable_id, registrant_ids) + return None + + @api.model + def _try_variable_model(self, stat_name, registrant_ids): + """Try computing via spp.cel.variable record. + + :param stat_name: Statistic name + :param registrant_ids: List of partner IDs + :returns: Computed value or None + """ + var_model = self.env.get("spp.cel.variable") + if var_model is None: + return None + variable = var_model.sudo().search([("name", "=", stat_name)], limit=1) # nosemgrep: odoo-sudo-without-context + if variable: + return self._compute_from_variable(variable, registrant_ids) + return None + + @api.model + def _compute_from_variable(self, variable, registrant_ids): + """Compute statistic from a CEL variable. + + For aggregate variables (source_type='aggregate' with members.* expressions), + computes the SUM of per-group values using evaluate_member_aggregate. + For other variables, counts matching records via compile_expression. + + :param variable: spp.cel.variable record + :param registrant_ids: List of partner IDs + :returns: Computed value or None + """ + if not registrant_ids: + return 0 + + cel_service = self.env.get("spp.cel.service") + if cel_service is None: + return None + cel_service = cel_service.sudo() # nosemgrep: odoo-sudo-without-context + + # Get expression + expression = None + if hasattr(variable, "get_cel_expression"): + expression = variable.get_cel_expression() + if not expression and "cel_expression" in variable._fields: + expression = variable.cel_expression + if not expression and "expression" in variable._fields: + expression = variable.expression + if not expression: + return None + + # For member aggregate variables, compute the SUM of per-group values + # instead of counting groups where the expression is truthy + if self._is_member_aggregate(variable, expression): + return self._compute_member_aggregate_sum(cel_service, expression, registrant_ids) + + # Get profile based on target type + profile = "registry_individuals" + if "applies_to" in variable._fields: + target_type = variable.applies_to + if hasattr(cel_service, "get_profile_for_target_type"): + profile = cel_service.get_profile_for_target_type(target_type) + elif target_type == "group": + profile = "registry_groups" + + try: + result = cel_service.compile_expression( + expression, + profile=profile, + base_domain=[("id", "in", registrant_ids)], + limit=0, + ) + return result.get("count", 0) + except Exception as e: + variable_name = variable.name + _logger.warning("Error computing variable %s: %s", variable_name, e) + return None + + @api.model + def _is_member_aggregate(self, variable, expression): + """Check if variable is a member aggregate requiring sum computation. + + :param variable: spp.cel.variable record + :param expression: CEL expression string + :returns: True if this is a members.* aggregate + :rtype: bool + """ + if "source_type" not in variable._fields: + return False + if variable.source_type != "aggregate": + return False + return bool(expression and expression.startswith("members.")) + + @api.model + def _compute_member_aggregate_sum(self, cel_service, expression, registrant_ids): + """Compute SUM of a member aggregate expression across groups. + + For aggregate variables like members.count(m, true), the result should be + the total count of matching members across all groups, not the count of + groups that have matching members. + + :param cel_service: spp.cel.service instance + :param expression: CEL expression string (e.g. "members.count(m, true)") + :param registrant_ids: List of partner IDs (should be group IDs) + :returns: Sum of per-group aggregate values + :rtype: int or float + """ + if not hasattr(cel_service, "evaluate_member_aggregate"): + _logger.warning( + "CEL service missing evaluate_member_aggregate, falling back to count for expression: %s", + expression, + ) + return None + + try: + groups = self.env["res.partner"].sudo().browse(registrant_ids) # nosemgrep: odoo-sudo-without-context, odoo-sudo-on-sensitive-models # noqa: E501 # fmt: skip + total = 0 + for group in groups: + if not group.is_group: + continue + value = cel_service.evaluate_member_aggregate(group, expression) + if isinstance(value, bool): + if value: + total += 1 + elif isinstance(value, int | float): + total += value + return total + except Exception as e: + _logger.warning( + "Error computing member aggregate sum for '%s': %s", + expression, + e, + ) + return None diff --git a/spp_aggregation/pyproject.toml b/spp_aggregation/pyproject.toml new file mode 100644 index 00000000..4231d0cc --- /dev/null +++ b/spp_aggregation/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["whool"] +build-backend = "whool.buildapi" diff --git a/spp_aggregation/security/aggregation_security.xml b/spp_aggregation/security/aggregation_security.xml new file mode 100644 index 00000000..1ea3bcf6 --- /dev/null +++ b/spp_aggregation/security/aggregation_security.xml @@ -0,0 +1,83 @@ + + + + + Aggregation Engine + Aggregation queries and statistical analysis + + 36 + + + + + Aggregation + + Access to aggregation queries and statistical analysis + + + + + Aggregation: Read + Technical group for read access to aggregation models. + + + + Aggregation: Write + Technical group for write access to aggregation models. + + + + + + Viewer + + Can view aggregate statistics only. No individual records. + + + + + Analyst + + Can query aggregations with individual record access. + + + + + Manager + + Full aggregation management including scope and access rule configuration. + + + + + + + + diff --git a/spp_aggregation/security/ir.model.access.csv b/spp_aggregation/security/ir.model.access.csv new file mode 100644 index 00000000..2917fd14 --- /dev/null +++ b/spp_aggregation/security/ir.model.access.csv @@ -0,0 +1,7 @@ +id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink +access_spp_aggregation_scope_read,spp.aggregation.scope.read,model_spp_aggregation_scope,group_aggregation_read,1,0,0,0 +access_spp_aggregation_scope_write,spp.aggregation.scope.write,model_spp_aggregation_scope,group_aggregation_write,1,1,1,1 +access_spp_aggregation_cache_entry_read,spp.aggregation.cache.entry.read,model_spp_aggregation_cache_entry,group_aggregation_read,1,0,0,0 +access_spp_aggregation_cache_entry_write,spp.aggregation.cache.entry.write,model_spp_aggregation_cache_entry,group_aggregation_write,1,1,1,1 +access_spp_aggregation_access_rule_read,spp.aggregation.access.rule.read,model_spp_aggregation_access_rule,group_aggregation_read,1,0,0,0 +access_spp_aggregation_access_rule_write,spp.aggregation.access.rule.write,model_spp_aggregation_access_rule,group_aggregation_manager,1,1,1,1 diff --git a/spp_aggregation/services/__init__.py b/spp_aggregation/services/__init__.py new file mode 100644 index 00000000..8c8242b2 --- /dev/null +++ b/spp_aggregation/services/__init__.py @@ -0,0 +1,10 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +"""Shared services for aggregation engine.""" + +from .scope_builder import build_area_scope, build_cel_scope, build_explicit_scope + +__all__ = [ + "build_area_scope", + "build_cel_scope", + "build_explicit_scope", +] diff --git a/spp_aggregation/services/scope_builder.py b/spp_aggregation/services/scope_builder.py new file mode 100644 index 00000000..ebab5bcb --- /dev/null +++ b/spp_aggregation/services/scope_builder.py @@ -0,0 +1,84 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +"""Shared scope building utilities for API layers. + +This module provides a unified interface for constructing scope dictionaries +that are compatible with the aggregation engine's scope resolver. + +The scope dictionaries can be passed directly to spp.aggregation.service.compute_aggregation() +and will be resolved by spp.aggregation.scope.resolver. +""" + + +def build_area_scope(area_id, include_children=True): + """Build scope dict for an area query. + + Args: + area_id: ID of the spp.area record + include_children: Whether to include child areas in scope (default: True) + + Returns: + dict: Scope dict compatible with aggregation engine + { + "scope_type": "area", + "area_id": int, + "include_child_areas": bool, + } + + Example: + >>> scope = build_area_scope(area_id=123, include_children=True) + >>> result = env['spp.aggregation.service'].compute_aggregation(scope=scope) + """ + return { + "scope_type": "area", + "area_id": area_id, + "include_child_areas": include_children, + } + + +def build_cel_scope(cel_expression, profile="registry_individuals"): + """Build scope dict for a CEL expression query. + + Args: + cel_expression: CEL expression string to filter partners + profile: CEL profile name (default: "registry_individuals") + + Returns: + dict: Scope dict compatible with aggregation engine + { + "scope_type": "cel", + "cel_expression": str, + "cel_profile": str, + } + + Example: + >>> scope = build_cel_scope("partner.age > 18") + >>> result = env['spp.aggregation.service'].compute_aggregation(scope=scope) + """ + return { + "scope_type": "cel", + "cel_expression": cel_expression, + "cel_profile": profile, + } + + +def build_explicit_scope(partner_ids): + """Build scope dict for an explicit set of partner IDs. + + Args: + partner_ids: List or set of partner IDs + + Returns: + dict: Scope dict compatible with aggregation engine + { + "scope_type": "explicit", + "explicit_partner_ids": list[int], + } + + Example: + >>> scope = build_explicit_scope([1, 2, 3]) + >>> result = env['spp.aggregation.service'].compute_aggregation(scope=scope) + """ + return { + "scope_type": "explicit", + "explicit_partner_ids": list(partner_ids), + } diff --git a/spp_aggregation/tests/README_INTEGRATION_TESTS.md b/spp_aggregation/tests/README_INTEGRATION_TESTS.md new file mode 100644 index 00000000..5fbc20d4 --- /dev/null +++ b/spp_aggregation/tests/README_INTEGRATION_TESTS.md @@ -0,0 +1,217 @@ +# Integration Tests for spp_aggregation + +## Overview + +The integration tests in `test_integration_demo.py` use realistic demo data from +`spp_mis_demo_v2` to thoroughly test the aggregation service with real-world scenarios. + +## Test Coverage + +The integration tests cover: + +1. **Area-based aggregation** with hierarchical areas (Philippines demo data) +2. **Multi-dimensional breakdowns** (2D and 3D: gender × area × disability) +3. **K-anonymity suppression** with realistic demographic distributions +4. **Cache behavior** with repeated queries +5. **Performance testing** with larger datasets (50+ groups with members) +6. **Privacy scenarios**: + - Differencing attack prevention + - Complementary suppression across dimensions +7. **Spatial aggregation** using GPS coordinates +8. **Age group dimensions** with realistic birth dates +9. **Program enrollment** correlation with demographics + +## Running Integration Tests + +### Option 1: Run with spp_mis_demo_v2 (Full Integration) + +To run the full integration tests with realistic demo data: + +```bash +# Test both modules together +./scripts/test_single_module.sh spp_aggregation,spp_mis_demo_v2 +``` + +This will: + +- Install both `spp_aggregation` and `spp_mis_demo_v2` +- Generate ~50 household groups with members (realistic demographics) +- Run all aggregation tests including the 15+ integration test scenarios + +**Note:** This takes longer (~3-5 minutes) due to demo data generation. + +### Option 2: Run without Demo Module (Unit Tests Only) + +To run just the unit tests without demo data: + +```bash +# Test aggregation module only +./scripts/test_single_module.sh spp_aggregation +``` + +This will: + +- Install only `spp_aggregation` with minimal dependencies +- Run all unit tests (85+ tests) +- Skip integration tests that require demo data + +The integration tests will be automatically skipped with message: + +``` +spp_mis_demo_v2 module not installed - integration tests skipped +``` + +## Demo Data Generated + +When running with `spp_mis_demo_v2`, the following data is created: + +- **Registrants**: 50 household groups + 150-250 individual members +- **Areas**: Full Philippines hierarchy (country → region → province → municipality) +- **Demographics**: + - Gender: Realistic male/female distribution + - Ages: Children (<18), adults (18-59), elderly (60+) + - Disability: ~5% of population (realistic rate) + - Income: Varied distribution (70% low, 25% moderate, 5% higher) +- **Geographic**: GPS coordinates for spatial queries +- **Programs**: Multiple demo programs with enrollments + +## Test Scenarios + +### K-Anonymity Testing + +Tests verify that with k=5 or k=10 thresholds: + +- Small cells (count < k) are suppressed +- Complementary suppression prevents differencing attacks +- Users with aggregate-only access cannot identify individuals + +### Performance Testing + +Tests measure aggregation performance with: + +- 50+ household groups +- 2D breakdowns (gender × age_group) +- Full area hierarchies +- Expected completion: < 10 seconds + +### Multi-Dimensional Breakdowns + +Tests verify correct breakdown structure: + +- 2D: gender × area +- 3D: gender × disability × area (max dimensions) +- Proper dimension ordering and metadata +- Cell counts sum to total + +### Privacy Scenarios + +Tests verify protection against: + +- **Differencing attacks**: Complementary suppression when one cell is small +- **Single-cell isolation**: Multiple suppressions to prevent math-based identification +- **Cross-dimension differencing**: Protection across multiple dimensions + +## CI/CD Integration + +In CI pipelines, use the unit-only approach for faster feedback: + +```yaml +# .gitlab-ci.yml or .github/workflows +test-aggregation: + script: + - ./scripts/test_single_module.sh spp_aggregation +``` + +For comprehensive integration testing (nightly builds): + +```yaml +test-aggregation-integration: + script: + - ./scripts/test_single_module.sh spp_aggregation,spp_mis_demo_v2 + only: + - schedules +``` + +## Debugging Integration Tests + +If integration tests fail: + +1. **Check demo data generation**: + + ```python + # In test output, look for: + "Test setup complete: X registrants, Y areas" + ``` + +2. **Verify area hierarchy**: + + ```python + # Should see: country → region → province → municipality + "X regions, Y provinces, Z municipalities" + ``` + +3. **Check demographic distribution**: + + ```python + # Should have varied gender, age, disability + "Age groups found in demo data: {child, adult, elderly}" + ``` + +4. **Review suppression patterns**: + ```python + # In k-anonymity tests, should see: + "K-anonymity test: X visible cells, Y suppressed cells" + ``` + +## Test Data Consistency + +The demo generator creates consistent, reproducible data: + +- **Deterministic names**: Uses Faker with controlled randomness +- **Realistic distributions**: 70% low income, ~5% disability +- **Area assignment**: All registrants assigned to geographic areas +- **Complete demographics**: Gender, birth date, area, income + +## Future Enhancements + +Planned improvements for integration tests: + +- [ ] Test with larger datasets (1000+ registrants) +- [ ] Benchmark queries for performance regression detection +- [ ] Test with multiple programs and enrollments +- [ ] CEL expression evaluation in breakdowns +- [ ] Statistics computation (not just counts) +- [ ] Fairness analysis with demo data + +## Troubleshooting + +### "Module spp_mis_demo_v2 not installed" + +This is expected when running `./scripts/test_single_module.sh spp_aggregation` alone. +To run integration tests, use: + +```bash +./scripts/test_single_module.sh spp_aggregation,spp_mis_demo_v2 +``` + +### "No areas found in demo data" + +The demo generator may have failed. Check logs for: + +- Geographic data loading errors +- Philippines area data availability + +### "Performance test timeout" + +If aggregation takes > 10s: + +- Check database indices on `res.partner.area_id` +- Review `spp.aggregation.cache` configuration +- Ensure PostgreSQL has sufficient resources + +## Related Documentation + +- `spp_aggregation/README.md` - Module overview and architecture +- `spp_mis_demo_v2/README.md` - Demo data generator documentation +- `docs/principles/privacy-protection.md` - K-anonymity principles +- `docs/principles/performance-scalability.md` - Performance guidelines diff --git a/spp_aggregation/tests/__init__.py b/spp_aggregation/tests/__init__.py new file mode 100644 index 00000000..f2499a99 --- /dev/null +++ b/spp_aggregation/tests/__init__.py @@ -0,0 +1,14 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from . import common +from . import test_access_rule_area_restrictions +from . import test_aggregation_scope +from . import test_aggregation_service +from . import test_cache_service +from . import test_distribution_service +from . import test_fairness_service +from . import test_integration_demo +from . import test_privacy_enforcement +from . import test_scope_builder +from . import test_scope_resolver +from . import test_statistic_registry diff --git a/spp_aggregation/tests/common.py b/spp_aggregation/tests/common.py new file mode 100644 index 00000000..b78c25d1 --- /dev/null +++ b/spp_aggregation/tests/common.py @@ -0,0 +1,91 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from odoo.tests.common import TransactionCase + + +class AggregationTestCase(TransactionCase): + """Base test case for aggregation module tests.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + + # Create test areas + cls.area_country = cls.env["spp.area"].create( + { + "draft_name": "Test Country", + "code": "TC", + } + ) + cls.area_region = cls.env["spp.area"].create( + { + "draft_name": "Test Region", + "code": "TR", + "parent_id": cls.area_country.id, + } + ) + cls.area_district = cls.env["spp.area"].create( + { + "draft_name": "Test District", + "code": "TD", + "parent_id": cls.area_region.id, + } + ) + + # Get or create area tags (may already exist from demo data) + cls.tag_urban = cls.env["spp.area.tag"].search([("code", "=", "URBAN")], limit=1) + if not cls.tag_urban: + cls.tag_urban = cls.env["spp.area.tag"].create( + { + "name": "Urban", + "code": "URBAN", + } + ) + cls.tag_rural = cls.env["spp.area.tag"].search([("code", "=", "RURAL")], limit=1) + if not cls.tag_rural: + cls.tag_rural = cls.env["spp.area.tag"].create( + { + "name": "Rural", + "code": "RURAL", + } + ) + cls.area_district.tag_ids = [(4, cls.tag_urban.id)] + + # Create test registrants + cls.registrants = cls._create_test_registrants() + + @classmethod + def _create_test_registrants(cls): + """Create a set of test registrants with various attributes.""" + Partner = cls.env["res.partner"] + registrants = Partner.browse() + + # Create 20 test registrants with varied attributes + for i in range(20): + vals = { + "name": f"Test Registrant {i}", + "is_registrant": True, + "is_group": i >= 15, # Last 5 are groups + "area_id": cls.area_district.id if i < 10 else cls.area_region.id, + } + registrants |= Partner.create(vals) + + return registrants + + def create_scope(self, scope_type, **kwargs): + """Helper to create aggregation scopes.""" + vals = { + "name": f"Test {scope_type} Scope", + "scope_type": scope_type, + } + vals.update(kwargs) + return self.env["spp.aggregation.scope"].create(vals) + + def create_access_rule(self, access_level, **kwargs): + """Helper to create access rules.""" + vals = { + "name": f"Test {access_level} Rule", + "access_level": access_level, + "group_id": self.env.ref("base.group_user").id, + } + vals.update(kwargs) + return self.env["spp.aggregation.access.rule"].create(vals) diff --git a/spp_aggregation/tests/run_integration_tests.sh b/spp_aggregation/tests/run_integration_tests.sh new file mode 100755 index 00000000..93319c8c --- /dev/null +++ b/spp_aggregation/tests/run_integration_tests.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +# +# Run integration tests for spp_aggregation with MIS demo data. +# +# This script installs both spp_aggregation and spp_mis_demo_v2 to enable +# comprehensive integration testing with realistic demo data. +# +# Usage: +# ./spp_aggregation/tests/run_integration_tests.sh +# +# Options: +# --unit-only Run unit tests only (skip demo data generation) +# --verbose Show detailed test output +# --help Show this help message + +set -e + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" + +# Default options +UNIT_ONLY=0 + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --unit-only) + UNIT_ONLY=1 + shift + ;; + --verbose) + # Verbose flag for future use (currently not implemented) + shift + ;; + --help) + echo "Run integration tests for spp_aggregation" + echo "" + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --unit-only Run unit tests only (skip demo data)" + echo " --verbose Show detailed test output" + echo " --help Show this help message" + echo "" + echo "Examples:" + echo " $0 # Run full integration tests with demo data" + echo " $0 --unit-only # Run unit tests only (faster)" + echo " $0 --verbose # Show detailed output" + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Run '$0 --help' for usage information" + exit 1 + ;; + esac +done + +# Change to repo root +cd "$REPO_ROOT" + +# Run tests +if [ $UNIT_ONLY -eq 1 ]; then + echo "==========================================" + echo "Running UNIT tests for spp_aggregation" + echo "==========================================" + echo "" + echo "Integration tests will be skipped (spp_mis_demo_v2 not installed)" + echo "" + + ./scripts/test_single_module.sh spp_aggregation + +else + echo "==========================================" + echo "Running INTEGRATION tests for spp_aggregation" + echo "==========================================" + echo "" + echo "This will:" + echo " - Install spp_aggregation + spp_mis_demo_v2" + echo " - Generate ~50 household groups with members" + echo " - Run 100+ tests including integration scenarios" + echo " - Test k-anonymity, performance, privacy protection" + echo "" + echo "Expected duration: 3-5 minutes" + echo "" + read -p "Continue? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Cancelled." + exit 0 + fi + + # Run with both modules + ./scripts/test_single_module.sh spp_aggregation,spp_mis_demo_v2 +fi + +# Show summary +echo "" +echo "==========================================" +echo "Test run complete!" +echo "==========================================" +echo "" + +if [ $UNIT_ONLY -eq 0 ]; then + echo "Integration tests were executed with demo data." + echo "" + echo "Test coverage:" + echo " ✓ Area-based aggregation (hierarchical)" + echo " ✓ Multi-dimensional breakdowns (2D, 3D)" + echo " ✓ K-anonymity suppression" + echo " ✓ Cache behavior" + echo " ✓ Privacy protection (differencing attacks)" + echo " ✓ Spatial aggregation (GPS coordinates)" + echo " ✓ Performance testing" +else + echo "Unit tests completed." + echo "" + echo "To run full integration tests with demo data:" + echo " $0" +fi + +echo "" diff --git a/spp_aggregation/tests/test_access_rule_area_restrictions.py b/spp_aggregation/tests/test_access_rule_area_restrictions.py new file mode 100644 index 00000000..1df4c4e3 --- /dev/null +++ b/spp_aggregation/tests/test_access_rule_area_restrictions.py @@ -0,0 +1,321 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +"""Tests for area-based access restrictions on aggregation access rules.""" + +from odoo.exceptions import AccessError, ValidationError +from odoo.tests.common import TransactionCase + + +class TestAccessRuleAreaRestrictions(TransactionCase): + """Test area-based access restrictions for explicit scopes.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + + # Create test areas + cls.area_north = cls.env["spp.area"].create( + { + "draft_name": "North District", + "code": "NORTH-001", + } + ) + cls.area_south = cls.env["spp.area"].create( + { + "draft_name": "South District", + "code": "SOUTH-001", + } + ) + cls.area_east = cls.env["spp.area"].create( + { + "draft_name": "East District", + "code": "EAST-001", + } + ) + + # Create test registrants in different areas + cls.registrant_north_1 = cls.env["res.partner"].create( + { + "name": "North Registrant 1", + "is_registrant": True, + "is_group": False, + "area_id": cls.area_north.id, + } + ) + cls.registrant_north_2 = cls.env["res.partner"].create( + { + "name": "North Registrant 2", + "is_registrant": True, + "is_group": False, + "area_id": cls.area_north.id, + } + ) + cls.registrant_south_1 = cls.env["res.partner"].create( + { + "name": "South Registrant 1", + "is_registrant": True, + "is_group": False, + "area_id": cls.area_south.id, + } + ) + cls.registrant_south_2 = cls.env["res.partner"].create( + { + "name": "South Registrant 2", + "is_registrant": True, + "is_group": False, + "area_id": cls.area_south.id, + } + ) + cls.registrant_east = cls.env["res.partner"].create( + { + "name": "East Registrant", + "is_registrant": True, + "is_group": False, + "area_id": cls.area_east.id, + } + ) + + # Create test user + cls.restricted_user = cls.env["res.users"].create( + { + "name": "Restricted Area User", + "login": "restricted_user", + "email": "restricted@test.com", + } + ) + + # Create unrestricted user + cls.unrestricted_user = cls.env["res.users"].create( + { + "name": "Unrestricted User", + "login": "unrestricted_user", + "email": "unrestricted@test.com", + } + ) + + def test_access_rule_with_area_restrictions(self): + """Test that access rules can have area restrictions.""" + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "North District Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + self.assertEqual(len(rule.allowed_area_ids), 1) + self.assertIn(self.area_north, rule.allowed_area_ids) + + def test_explicit_scope_rejected_when_outside_allowed_areas(self): + """Test that explicit scopes are rejected when registrants are outside allowed areas.""" + # Create rule restricting to North district only + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "North District Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + # Try to query registrants from South district (should fail) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [self.registrant_south_1.id, self.registrant_south_2.id], + } + + with self.assertRaises(ValidationError) as context: + rule.check_scope_allowed(scope) + + self.assertIn("outside your allowed areas", str(context.exception)) + + def test_explicit_scope_allowed_when_within_allowed_areas(self): + """Test that explicit scopes are allowed when all registrants are within allowed areas.""" + # Create rule restricting to North district only + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "North District Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + # Query registrants from North district (should succeed) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [self.registrant_north_1.id, self.registrant_north_2.id], + } + + # Should not raise + self.assertTrue(rule.check_scope_allowed(scope)) + + def test_explicit_scope_rejected_when_mixed_areas(self): + """Test that explicit scopes are rejected when some registrants are outside allowed areas.""" + # Create rule restricting to North district only + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "North District Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + # Try to query registrants from both North and South (should fail) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [self.registrant_north_1.id, self.registrant_south_1.id], + } + + with self.assertRaises(ValidationError) as context: + rule.check_scope_allowed(scope) + + self.assertIn("outside your allowed areas", str(context.exception)) + + def test_explicit_scope_allowed_when_no_area_restrictions(self): + """Test that explicit scopes are allowed when user has no area restrictions.""" + # Create rule without area restrictions + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "Unrestricted Rule", + "user_id": self.unrestricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + } + ) + + # Query registrants from any area (should succeed) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [ + self.registrant_north_1.id, + self.registrant_south_1.id, + self.registrant_east.id, + ], + } + + # Should not raise + self.assertTrue(rule.check_scope_allowed(scope)) + + def test_explicit_scope_allowed_with_multiple_allowed_areas(self): + """Test that explicit scopes work correctly with multiple allowed areas.""" + # Create rule allowing North and South districts + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "North and South Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id, self.area_south.id])], + } + ) + + # Query registrants from North and South (should succeed) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [ + self.registrant_north_1.id, + self.registrant_south_1.id, + ], + } + + # Should not raise + self.assertTrue(rule.check_scope_allowed(scope)) + + # Query registrants including East (should fail) + scope_with_east = { + "scope_type": "explicit", + "explicit_partner_ids": [ + self.registrant_north_1.id, + self.registrant_east.id, + ], + } + + with self.assertRaises(ValidationError): + rule.check_scope_allowed(scope_with_east) + + def test_area_only_scope_type_rejects_explicit_scopes(self): + """Test that area_only scope type restriction rejects explicit scopes even within allowed areas.""" + # Create rule with area_only scope type restriction + rule = self.env["spp.aggregation.access.rule"].create( + { + "name": "Area Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allowed_scope_types": "area_only", + "allow_inline_scopes": True, # Allow inline scopes, but only area-based ones + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + # Try to use explicit scope even with allowed area registrants (should fail) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [self.registrant_north_1.id], + } + + with self.assertRaises(ValidationError) as context: + rule.check_scope_allowed(scope) + + self.assertIn("Only area-based scopes are allowed", str(context.exception)) + + def test_aggregation_service_enforces_area_restrictions_on_gis_explicit_scopes(self): + """Test that AggregationService enforces area restrictions on GIS-generated explicit scopes.""" + # Create rule restricting to North district only + self.env["spp.aggregation.access.rule"].create( + { + "name": "North District Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + # Switch to restricted user + restricted_env = self.env(user=self.restricted_user) + aggregation_service = restricted_env["spp.aggregation.service"] + + # Try to compute aggregation for South registrants (simulating GIS query) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [self.registrant_south_1.id, self.registrant_south_2.id], + } + + with self.assertRaises(AccessError) as context: + aggregation_service.compute_aggregation(scope=scope, statistics=["count"]) + + self.assertIn("outside your allowed areas", str(context.exception)) + + def test_aggregation_service_allows_area_restricted_explicit_scopes(self): + """Test that AggregationService allows explicit scopes within allowed areas.""" + # Create rule restricting to North district only + self.env["spp.aggregation.access.rule"].create( + { + "name": "North District Only Rule", + "user_id": self.restricted_user.id, + "access_level": "aggregate", + "allow_inline_scopes": True, + "allowed_area_ids": [(6, 0, [self.area_north.id])], + } + ) + + # Switch to restricted user + restricted_env = self.env(user=self.restricted_user) + aggregation_service = restricted_env["spp.aggregation.service"] + + # Compute aggregation for North registrants (should succeed) + scope = { + "scope_type": "explicit", + "explicit_partner_ids": [self.registrant_north_1.id, self.registrant_north_2.id], + } + + result = aggregation_service.compute_aggregation(scope=scope, statistics=["count"]) + + self.assertEqual(result["total_count"], 2) + self.assertIn("count", result["statistics"]) diff --git a/spp_aggregation/tests/test_aggregation_scope.py b/spp_aggregation/tests/test_aggregation_scope.py new file mode 100644 index 00000000..c7ede8d0 --- /dev/null +++ b/spp_aggregation/tests/test_aggregation_scope.py @@ -0,0 +1,119 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from odoo.exceptions import ValidationError + +from .common import AggregationTestCase + + +class TestAggregationScope(AggregationTestCase): + """Tests for spp.aggregation.scope model.""" + + def test_create_cel_scope(self): + """Test creating a CEL expression scope.""" + scope = self.create_scope( + "cel", + cel_expression="r.is_group == false", + cel_profile="registry_individuals", + ) + self.assertEqual(scope.scope_type, "cel") + self.assertEqual(scope.cel_expression, "r.is_group == false") + + def test_create_area_scope(self): + """Test creating an area scope.""" + scope = self.create_scope( + "area", + area_id=self.area_district.id, + include_child_areas=True, + ) + self.assertEqual(scope.scope_type, "area") + self.assertEqual(scope.area_id, self.area_district) + + def test_create_explicit_scope(self): + """Test creating an explicit IDs scope.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + self.assertEqual(scope.scope_type, "explicit") + self.assertEqual(len(scope.explicit_partner_ids), 5) + + def test_cel_scope_requires_expression(self): + """CEL scope type must have a CEL expression.""" + with self.assertRaises(ValidationError): + self.create_scope("cel") + + def test_area_scope_requires_area(self): + """Area scope type must have an area.""" + with self.assertRaises(ValidationError): + self.create_scope("area") + + def test_explicit_scope_requires_ids(self): + """Explicit scope type must have partner IDs.""" + with self.assertRaises(ValidationError): + self.create_scope("explicit") + + def test_spatial_polygon_requires_geojson(self): + """Spatial polygon scope must have GeoJSON.""" + with self.assertRaises(ValidationError): + self.create_scope("spatial_polygon") + + def test_spatial_polygon_validates_geojson(self): + """Spatial polygon scope validates GeoJSON format.""" + with self.assertRaises(ValidationError): + self.create_scope("spatial_polygon", geometry_geojson="not valid json") + + with self.assertRaises(ValidationError): + self.create_scope( + "spatial_polygon", + geometry_geojson='{"type": "Point", "coordinates": [0, 0]}', + ) + + def test_spatial_buffer_requires_params(self): + """Spatial buffer scope requires center and radius.""" + with self.assertRaises(ValidationError): + self.create_scope("spatial_buffer") + + with self.assertRaises(ValidationError): + self.create_scope( + "spatial_buffer", + buffer_center_latitude=0, + buffer_center_longitude=0, + ) + + def test_spatial_buffer_validates_coords(self): + """Spatial buffer validates coordinate ranges.""" + with self.assertRaises(ValidationError): + self.create_scope( + "spatial_buffer", + buffer_center_latitude=100, # Invalid + buffer_center_longitude=0, + buffer_radius_km=10, + ) + + def test_resolve_explicit_scope(self): + """Test resolving explicit scope to IDs.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + ids = scope.resolve_registrant_ids() + self.assertEqual(len(ids), 5) + self.assertEqual(set(ids), set(self.registrants[:5].ids)) + + def test_resolve_area_scope(self): + """Test resolving area scope to IDs.""" + scope = self.create_scope( + "area", + area_id=self.area_district.id, + include_child_areas=False, + ) + ids = scope.resolve_registrant_ids() + # Should find registrants in district + self.assertGreater(len(ids), 0) + + def test_registrant_count_computed(self): + """Test that registrant count is computed.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + self.assertEqual(scope.registrant_count, 5) diff --git a/spp_aggregation/tests/test_aggregation_service.py b/spp_aggregation/tests/test_aggregation_service.py new file mode 100644 index 00000000..f05fb1e7 --- /dev/null +++ b/spp_aggregation/tests/test_aggregation_service.py @@ -0,0 +1,367 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from odoo.exceptions import AccessError, ValidationError + +from .common import AggregationTestCase + + +class TestAggregationService(AggregationTestCase): + """Tests for spp.aggregation.service main entry point.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.service = cls.env["spp.aggregation.service"] + + def test_compute_aggregation_basic(self): + """Test basic aggregation with explicit scope.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:10].ids)], + ) + result = self.service.compute_aggregation(scope) + + self.assertEqual(result["total_count"], 10) + self.assertIn("computed_at", result) + self.assertIn("access_level", result) + + def test_compute_aggregation_with_scope_id(self): + """Test aggregation using scope ID instead of record.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + result = self.service.compute_aggregation(scope.id) + + self.assertEqual(result["total_count"], 5) + + def test_compute_aggregation_with_inline_scope(self): + """Test aggregation using inline scope definition.""" + result = self.service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:3].ids, + } + ) + + self.assertEqual(result["total_count"], 3) + + def test_compute_aggregation_with_statistics(self): + """Test aggregation with statistics requested.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:10].ids)], + ) + result = self.service.compute_aggregation(scope, statistics=["count"]) + + self.assertIn("statistics", result) + self.assertIn("count", result["statistics"]) + self.assertEqual(result["statistics"]["count"]["value"], 10) + + def test_compute_aggregation_with_group_by(self): + """Test aggregation with group_by breakdown.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants.ids)], + ) + result = self.service.compute_aggregation( + scope, + group_by=["registrant_type"], + ) + + self.assertIn("breakdown", result) + # Should have cells for groups and individuals + self.assertGreater(len(result["breakdown"]), 0) + + def test_compute_aggregation_max_dimensions(self): + """Test that too many dimensions raises error.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + + with self.assertRaises(ValidationError): + self.service.compute_aggregation( + scope, + group_by=["dim1", "dim2", "dim3", "dim4"], # 4 > max 3 + ) + + def test_compute_aggregation_unknown_dimension(self): + """Test that unknown dimension raises error.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + + with self.assertRaises(ValidationError): + self.service.compute_aggregation( + scope, + group_by=["nonexistent_dimension"], + ) + + def test_compute_for_area(self): + """Test convenience method for area-based aggregation.""" + result = self.service.compute_for_area(self.area_district.id) + + self.assertIn("total_count", result) + self.assertGreater(result["total_count"], 0) + + def test_compute_for_expression(self): + """Test convenience method for CEL expression aggregation.""" + result = self.service.compute_for_expression( + cel_expression="r.is_group == false", + profile="registry_individuals", + ) + + self.assertIn("total_count", result) + + def test_compute_fairness(self): + """Test fairness computation through service.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:10].ids)], + ) + result = self.service.compute_fairness(scope) + + self.assertIn("equity_score", result) + self.assertIn("has_disparity", result) + self.assertIn("overall_coverage", result) + + def test_compute_distribution(self): + """Test distribution computation through service.""" + amounts = [100, 200, 300, 400, 500] + result = self.service.compute_distribution(amounts) + + self.assertEqual(result["count"], 5) + self.assertEqual(result["total"], 1500) + self.assertIn("gini_coefficient", result) + + def test_privacy_enforced_on_result(self): + """Test that privacy protection is applied to results.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + result = self.service.compute_aggregation(scope) + + # For aggregate access, should not have individual IDs + if result["access_level"] == "aggregate": + self.assertNotIn("registrant_ids", result) + self.assertNotIn("partner_ids", result) + + def test_statistic_suppression_uses_stricter_threshold(self): + """Test top-level statistic suppression uses max(user_k, stat_k).""" + if "spp.statistic" not in self.env: + self.skipTest("spp_statistic module not installed") + + variable = self.env["spp.cel.variable"].create( + { + "name": "agg_suppression_test_var", + "cel_accessor": "agg_suppression_test_var", + "source_type": "computed", + "cel_expression": "true", + "value_type": "number", + "state": "active", + } + ) + self.env["spp.statistic"].create( + { + "name": "agg_suppression_test_stat", + "label": "Aggregation Suppression Test", + "variable_id": variable.id, + "minimum_count": 2, # lower than user rule + "is_published_api": True, + } + ) + self.env["spp.aggregation.access.rule"].create( + { + "name": "Agg Suppression Rule k10", + "access_level": "aggregate", + "user_id": self.env.user.id, + "minimum_k_anonymity": 10, + "allow_inline_scopes": True, + } + ) + + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + result = self.service.compute_aggregation( + scope, + statistics=["agg_suppression_test_stat"], + context="api", + ) + + stat = result["statistics"]["agg_suppression_test_stat"] + self.assertTrue(stat["suppressed"]) + self.assertEqual(stat["value"], "<10") + + +class TestAggregationServiceAccessControl(AggregationTestCase): + """Tests for access control in aggregation service.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + # Create a test user for access control tests + cls.test_user = cls.env["res.users"].create( + { + "name": "Test Aggregation User", + "login": "test_agg_user", + "email": "test_agg@example.com", + "group_ids": [(4, cls.env.ref("base.group_user").id)], + } + ) + + def test_access_rule_aggregate_only(self): + """Test that aggregate-only users cannot see IDs.""" + # Create user-specific rule + self.env["spp.aggregation.access.rule"].create( + { + "name": "Test Aggregate Rule", + "access_level": "aggregate", + "user_id": self.test_user.id, + } + ) + + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + + # Run as test user + service = self.env["spp.aggregation.service"].with_user(self.test_user) + result = service.compute_aggregation(scope) + + self.assertEqual(result["access_level"], "aggregate") + self.assertNotIn("registrant_ids", result) + + def test_access_rule_restricts_inline_scopes(self): + """Test that inline scopes can be restricted.""" + # Create user-specific rule + self.env["spp.aggregation.access.rule"].create( + { + "name": "Test Restricted Rule", + "access_level": "aggregate", + "user_id": self.test_user.id, + "allow_inline_scopes": False, + "allowed_scope_types": "predefined", + } + ) + + # Run as test user + service = self.env["spp.aggregation.service"].with_user(self.test_user) + + # Inline scope should be blocked + with self.assertRaises(AccessError): + service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:3].ids, + } + ) + + def test_access_rule_restricts_dimensions(self): + """Test that dimensions can be restricted.""" + # Create dimension first + dimension = self.env["spp.demographic.dimension"].search( + [ + ("name", "=", "registrant_type"), + ], + limit=1, + ) + + if dimension: + # Create user-specific rule with dimension restriction + self.env["spp.aggregation.access.rule"].create( + { + "name": "Test Dimension Rule", + "access_level": "aggregate", + "user_id": self.test_user.id, + "max_group_by_dimensions": 1, + } + ) + + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + + # Run as test user + service = self.env["spp.aggregation.service"].with_user(self.test_user) + + # Request with 2 dimensions when max is 1 + with self.assertRaises(ValidationError): + service.compute_aggregation( + scope, + group_by=["registrant_type", "area"], + ) + + +class TestAggregationServicePublicUser(AggregationTestCase): + """Tests for aggregation service running as public user (uid:3). + + The GIS API runs as base.public_user which has no Odoo model permissions. + The aggregation service must work for this user because internal config/data + reads use sudo() while access level is determined from the calling user. + """ + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.public_user = cls.env.ref("base.public_user") + + def test_compute_aggregation_explicit_scope_as_public_user(self): + """Test that public user can compute aggregation with explicit scope.""" + service = self.env["spp.aggregation.service"].with_user(self.public_user) + result = service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:5].ids, + } + ) + + self.assertEqual(result["total_count"], 5) + self.assertIn("access_level", result) + self.assertIn("computed_at", result) + + def test_statistics_computation_as_public_user(self): + """Test that statistics are computed for public user.""" + service = self.env["spp.aggregation.service"].with_user(self.public_user) + result = service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:10].ids, + }, + statistics=["count"], + ) + + self.assertIn("statistics", result) + self.assertIn("count", result["statistics"]) + self.assertEqual(result["statistics"]["count"]["value"], 10) + + def test_access_level_defaults_to_aggregate_for_public_user(self): + """Test that public user gets aggregate access level by default.""" + service = self.env["spp.aggregation.service"].with_user(self.public_user) + result = service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:3].ids, + } + ) + + self.assertEqual(result["access_level"], "aggregate") + + def test_no_registrant_ids_in_result_for_public_user(self): + """Test that registrant IDs are not exposed to public user.""" + service = self.env["spp.aggregation.service"].with_user(self.public_user) + result = service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:5].ids, + } + ) + + # Privacy enforcement should strip individual IDs for aggregate access + self.assertNotIn("registrant_ids", result) + self.assertNotIn("partner_ids", result) diff --git a/spp_aggregation/tests/test_cache_service.py b/spp_aggregation/tests/test_cache_service.py new file mode 100644 index 00000000..475db6ef --- /dev/null +++ b/spp_aggregation/tests/test_cache_service.py @@ -0,0 +1,256 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import json +from datetime import timedelta +from unittest.mock import patch + +from odoo import fields + +from .common import AggregationTestCase + + +class TestCacheService(AggregationTestCase): + """Test cache service for aggregation results.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.cache_service = cls.env["spp.aggregation.cache"] + + # Create a test scope + cls.test_scope = cls.env["spp.aggregation.scope"].create( + { + "name": "Test Cache Scope", + "scope_type": "area", + "area_id": cls.area_region.id, + "include_child_areas": True, + } + ) + + def test_cache_key_generation(self): + """Test that cache keys are generated consistently.""" + statistics = ["count", "eligible_count"] + group_by = ["gender", "disability_status"] + + # Generate key twice - should be same + key1 = self.cache_service._generate_cache_key(self.test_scope, statistics, group_by) + key2 = self.cache_service._generate_cache_key(self.test_scope, statistics, group_by) + + self.assertEqual(key1, key2, "Cache keys should be consistent") + self.assertTrue(len(key1) == 64, "Cache key should be SHA256 hash (64 chars)") + + # Different statistics should produce different key + key3 = self.cache_service._generate_cache_key(self.test_scope, ["count"], group_by) + self.assertNotEqual(key1, key3, "Different statistics should produce different keys") + + # Different group_by should produce different key + key4 = self.cache_service._generate_cache_key(self.test_scope, statistics, ["gender"]) + self.assertNotEqual(key1, key4, "Different group_by should produce different keys") + + def test_ttl_configuration(self): + """Test TTL is correct for each scope type.""" + # Area scope - 1 hour + ttl = self.cache_service._get_ttl_for_scope_type("area") + self.assertEqual(ttl, 3600, "Area scope should have 1 hour TTL") + + # CEL scope - 15 minutes + ttl = self.cache_service._get_ttl_for_scope_type("cel") + self.assertEqual(ttl, 900, "CEL scope should have 15 minute TTL") + + # Spatial polygon - no cache + ttl = self.cache_service._get_ttl_for_scope_type("spatial_polygon") + self.assertEqual(ttl, 0, "Spatial polygon should have no cache") + + # Spatial buffer - no cache + ttl = self.cache_service._get_ttl_for_scope_type("spatial_buffer") + self.assertEqual(ttl, 0, "Spatial buffer should have no cache") + + # Area tag - 1 hour + ttl = self.cache_service._get_ttl_for_scope_type("area_tag") + self.assertEqual(ttl, 3600, "Area tag scope should have 1 hour TTL") + + # Explicit - 30 minutes + ttl = self.cache_service._get_ttl_for_scope_type("explicit") + self.assertEqual(ttl, 1800, "Explicit scope should have 30 minute TTL") + + def test_store_and_retrieve_cache(self): + """Test storing and retrieving cached results.""" + statistics = ["count"] + group_by = ["gender"] + result = { + "total_count": 100, + "statistics": {"count": {"value": 100, "suppressed": False}}, + "breakdown": { + "male": {"count": 60, "statistics": {}}, + "female": {"count": 40, "statistics": {}}, + }, + "from_cache": False, + "computed_at": fields.Datetime.now().isoformat(), + "access_level": "aggregate", + } + + # Store result + stored = self.cache_service.store_result(self.test_scope, statistics, group_by, result) + self.assertTrue(stored, "Result should be stored") + + # Retrieve result + cached = self.cache_service.get_cached_result(self.test_scope, statistics, group_by) + self.assertIsNotNone(cached, "Should retrieve cached result") + self.assertTrue(cached["from_cache"], "Result should be marked as from cache") + self.assertEqual(cached["total_count"], 100, "Cached data should match") + + def test_cache_disabled_for_spatial_queries(self): + """Test that spatial queries are not cached.""" + # Create spatial polygon scope + spatial_scope = self.env["spp.aggregation.scope"].create( + { + "name": "Test Spatial Scope", + "scope_type": "spatial_polygon", + "geometry_geojson": json.dumps( + { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], + } + ), + } + ) + + result = { + "total_count": 50, + "statistics": {}, + "from_cache": False, + "computed_at": fields.Datetime.now().isoformat(), + "access_level": "aggregate", + } + + # Try to store - should return False (caching disabled) + stored = self.cache_service.store_result(spatial_scope, [], [], result) + self.assertFalse(stored, "Spatial queries should not be cached") + + # Try to retrieve - should return None + cached = self.cache_service.get_cached_result(spatial_scope, [], []) + self.assertIsNone(cached, "Should not retrieve cache for spatial queries") + + def test_cache_expiration(self): + """Test that expired cache entries are not returned.""" + statistics = ["count"] + group_by = [] + result = { + "total_count": 100, + "statistics": {"count": {"value": 100, "suppressed": False}}, + "from_cache": False, + "computed_at": fields.Datetime.now().isoformat(), + "access_level": "aggregate", + } + + # Store result + self.cache_service.store_result(self.test_scope, statistics, group_by, result) + + # Verify it's cached + cached = self.cache_service.get_cached_result(self.test_scope, statistics, group_by) + self.assertIsNotNone(cached, "Should retrieve fresh cache") + + # Mock time to be 2 hours later (past TTL for area scope) + future_time = fields.Datetime.now() + timedelta(hours=2) + with patch("odoo.addons.spp_aggregation.models.service_cache.fields.Datetime.now") as mock_now: + mock_now.return_value = future_time + + # Try to retrieve - should be expired + cached = self.cache_service.get_cached_result(self.test_scope, statistics, group_by) + self.assertIsNone(cached, "Expired cache should not be returned") + + def test_invalidate_scope(self): + """Test invalidating all cache for a scope. + + Note: Currently this invalidates all entries of the same scope type + (conservative approach). + """ + # Store multiple results for the same scope type (area) + result = { + "total_count": 100, + "statistics": {}, + "from_cache": False, + "computed_at": fields.Datetime.now().isoformat(), + "access_level": "aggregate", + } + + self.cache_service.store_result(self.test_scope, ["count"], [], result) + self.cache_service.store_result(self.test_scope, ["count"], ["gender"], result) + self.cache_service.store_result(self.test_scope, ["eligible_count"], [], result) + + # Verify all are cached + self.assertIsNotNone(self.cache_service.get_cached_result(self.test_scope, ["count"], [])) + self.assertIsNotNone(self.cache_service.get_cached_result(self.test_scope, ["count"], ["gender"])) + self.assertIsNotNone(self.cache_service.get_cached_result(self.test_scope, ["eligible_count"], [])) + + # Invalidate scope (invalidates all entries of scope type) + count = self.cache_service.invalidate_scope(self.test_scope) + self.assertGreaterEqual(count, 3, "Should invalidate at least the 3 cache entries we created") + + # Verify all are gone + self.assertIsNone(self.cache_service.get_cached_result(self.test_scope, ["count"], [])) + self.assertIsNone(self.cache_service.get_cached_result(self.test_scope, ["count"], ["gender"])) + self.assertIsNone(self.cache_service.get_cached_result(self.test_scope, ["eligible_count"], [])) + + def test_invalidate_all(self): + """Test invalidating all cache entries.""" + # Create multiple scopes and cache results + scope1 = self.test_scope + scope2 = self.env["spp.aggregation.scope"].create( + { + "name": "Test Scope 2", + "scope_type": "area", + "area_id": self.area_district.id, + "include_child_areas": False, + } + ) + + result = { + "total_count": 100, + "statistics": {}, + "from_cache": False, + "computed_at": fields.Datetime.now().isoformat(), + "access_level": "aggregate", + } + + self.cache_service.store_result(scope1, ["count"], [], result) + self.cache_service.store_result(scope2, ["count"], [], result) + + # Verify both are cached + self.assertIsNotNone(self.cache_service.get_cached_result(scope1, ["count"], [])) + self.assertIsNotNone(self.cache_service.get_cached_result(scope2, ["count"], [])) + + # Invalidate all + count = self.cache_service.invalidate_all() + self.assertEqual(count, 2, "Should invalidate all cache entries") + + # Verify both are gone + self.assertIsNone(self.cache_service.get_cached_result(scope1, ["count"], [])) + self.assertIsNone(self.cache_service.get_cached_result(scope2, ["count"], [])) + + def test_cleanup_expired(self): + """Test cleanup of expired cache entries.""" + statistics = ["count"] + group_by = [] + result = { + "total_count": 100, + "statistics": {}, + "from_cache": False, + "computed_at": fields.Datetime.now().isoformat(), + "access_level": "aggregate", + } + + # Store result + self.cache_service.store_result(self.test_scope, statistics, group_by, result) + + # Manually update computed_at to be 2 hours ago + entry = self.env["spp.aggregation.cache.entry"].search([], limit=1) + old_time = fields.Datetime.now() - timedelta(hours=2) + entry.write({"computed_at": old_time}) + + # Run cleanup + removed = self.cache_service.cleanup_expired() + self.assertEqual(removed, 1, "Should remove 1 expired entry") + + # Verify entry is gone + cached = self.cache_service.get_cached_result(self.test_scope, statistics, group_by) + self.assertIsNone(cached, "Expired cache should be cleaned up") diff --git a/spp_aggregation/tests/test_distribution_service.py b/spp_aggregation/tests/test_distribution_service.py new file mode 100644 index 00000000..ad7721cd --- /dev/null +++ b/spp_aggregation/tests/test_distribution_service.py @@ -0,0 +1,111 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from odoo.tests.common import TransactionCase + + +class TestDistributionService(TransactionCase): + """Tests for spp.metrics.distribution service.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.service = cls.env["spp.metrics.distribution"] + + def test_empty_amounts(self): + """Test distribution with empty list.""" + result = self.service.compute_distribution([]) + self.assertEqual(result["count"], 0) + self.assertEqual(result["total"], 0) + self.assertEqual(result["gini_coefficient"], 0) + self.assertEqual(result["lorenz_deciles"], []) + + def test_single_amount(self): + """Test distribution with single value.""" + result = self.service.compute_distribution([100]) + self.assertEqual(result["count"], 1) + self.assertEqual(result["total"], 100) + self.assertEqual(result["mean"], 100) + self.assertEqual(result["median"], 100) + + def test_uniform_distribution(self): + """Test uniform distribution has Gini close to 0.""" + # All equal values = perfect equality + amounts = [100] * 100 + result = self.service.compute_distribution(amounts) + self.assertEqual(result["count"], 100) + self.assertEqual(result["total"], 10000) + self.assertEqual(result["mean"], 100) + self.assertEqual(result["gini_coefficient"], 0.0) + + def test_skewed_distribution(self): + """Test skewed distribution has higher Gini.""" + # One person has everything = high inequality + amounts = [0] * 99 + [10000] + result = self.service.compute_distribution(amounts) + self.assertEqual(result["count"], 100) + self.assertGreater(result["gini_coefficient"], 0.9) + + def test_percentiles(self): + """Test percentile calculation.""" + amounts = list(range(1, 101)) # 1 to 100 + result = self.service.compute_distribution(amounts) + + self.assertAlmostEqual(result["percentiles"]["p50"], 50.5, places=1) + self.assertAlmostEqual(result["percentiles"]["p25"], 25.75, places=1) + self.assertAlmostEqual(result["percentiles"]["p75"], 75.25, places=1) + + def test_median_odd_count(self): + """Test median with odd number of values.""" + amounts = [1, 2, 3, 4, 5] + result = self.service.compute_distribution(amounts) + self.assertEqual(result["median"], 3) + + def test_median_even_count(self): + """Test median with even number of values.""" + amounts = [1, 2, 3, 4] + result = self.service.compute_distribution(amounts) + self.assertEqual(result["median"], 2.5) + + def test_standard_deviation(self): + """Test standard deviation calculation.""" + amounts = [2, 4, 4, 4, 5, 5, 7, 9] + result = self.service.compute_distribution(amounts) + # Mean = 5, variance = 4, std = 2 + self.assertAlmostEqual(result["standard_deviation"], 2, places=5) + + def test_lorenz_deciles(self): + """Test Lorenz curve decile points.""" + amounts = list(range(1, 11)) # 1 to 10 + result = self.service.compute_distribution(amounts) + + lorenz = result["lorenz_deciles"] + self.assertEqual(len(lorenz), 10) + + # 10% of population (1 person) has 1/55 of total + self.assertEqual(lorenz[0]["population_share"], 10) + + # 100% of population has 100% of income + self.assertEqual(lorenz[9]["population_share"], 100) + self.assertEqual(lorenz[9]["income_share"], 100) + + def test_gini_clamped(self): + """Test Gini coefficient is clamped between 0 and 1.""" + # Normal case + amounts = [10, 20, 30, 40] + result = self.service.compute_distribution(amounts) + self.assertGreaterEqual(result["gini_coefficient"], 0) + self.assertLessEqual(result["gini_coefficient"], 1) + + def test_zero_total(self): + """Test distribution with all zero values.""" + amounts = [0, 0, 0, 0] + result = self.service.compute_distribution(amounts) + self.assertEqual(result["gini_coefficient"], 0) + self.assertEqual(result["lorenz_deciles"], []) + + def test_negative_values(self): + """Test distribution handles negative values.""" + amounts = [-10, 0, 10, 20] + result = self.service.compute_distribution(amounts) + self.assertEqual(result["count"], 4) + self.assertEqual(result["minimum"], -10) + self.assertEqual(result["maximum"], 20) diff --git a/spp_aggregation/tests/test_fairness_service.py b/spp_aggregation/tests/test_fairness_service.py new file mode 100644 index 00000000..d310e3ee --- /dev/null +++ b/spp_aggregation/tests/test_fairness_service.py @@ -0,0 +1,85 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from .common import AggregationTestCase + + +class TestFairnessService(AggregationTestCase): + """Tests for spp.metrics.fairness service.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.service = cls.env["spp.metrics.fairness"] + + def test_empty_registrants(self): + """Test fairness with no registrants.""" + result = self.service.compute_fairness([]) + self.assertEqual(result["equity_score"], 100.0) + self.assertFalse(result["has_disparity"]) + self.assertEqual(result["total_beneficiaries"], 0) + + def test_compute_fairness_basic(self): + """Test basic fairness computation.""" + registrant_ids = self.registrants[:10].ids + result = self.service.compute_fairness(registrant_ids) + + self.assertIn("equity_score", result) + self.assertIn("has_disparity", result) + self.assertIn("overall_coverage", result) + self.assertIn("total_beneficiaries", result) + self.assertIn("total_population", result) + self.assertEqual(result["total_beneficiaries"], 10) + + def test_disparity_ratio_equal_coverage(self): + """Test disparity ratio when group coverage equals overall.""" + # When group_coverage == overall_coverage, ratio should be 1.0 + ratio = self.service._compute_disparity_ratio(0.5, 0.5) + self.assertEqual(ratio, 1.0) + + def test_disparity_ratio_under_coverage(self): + """Test disparity ratio when group is under-covered.""" + # Group at 25% coverage when overall is 50% = 0.5 ratio + ratio = self.service._compute_disparity_ratio(0.25, 0.5) + self.assertEqual(ratio, 0.5) + + def test_disparity_ratio_zero_overall(self): + """Test disparity ratio when overall coverage is zero.""" + ratio = self.service._compute_disparity_ratio(0.5, 0) + self.assertEqual(ratio, 0.0) + + def test_disparity_status_proportional(self): + """Test status for proportional coverage.""" + status = self.service._get_disparity_status(0.9) + self.assertEqual(status, "proportional") + + def test_disparity_status_low_coverage(self): + """Test status for low coverage.""" + status = self.service._get_disparity_status(0.75) + self.assertEqual(status, "low_coverage") + + def test_disparity_status_under_represented(self): + """Test status for under-representation.""" + status = self.service._get_disparity_status(0.6) + self.assertEqual(status, "under_represented") + + def test_fairness_with_specific_dimensions(self): + """Test fairness analysis with specific dimensions.""" + # Get the registrant_type dimension + registrant_ids = self.registrants.ids + result = self.service.compute_fairness( + registrant_ids, + dimensions=["registrant_type"], + ) + + # Should have analysis for registrant_type + self.assertIn("attributes", result) + + def test_equity_score_penalties(self): + """Test that equity score decreases with disparity.""" + # Create a scenario with known disparity + # This is a basic test - actual disparity depends on data + registrant_ids = self.registrants[:5].ids + result = self.service.compute_fairness(registrant_ids) + + # Equity score should be between 0 and 100 + self.assertGreaterEqual(result["equity_score"], 0) + self.assertLessEqual(result["equity_score"], 100) diff --git a/spp_aggregation/tests/test_integration_demo.py b/spp_aggregation/tests/test_integration_demo.py new file mode 100644 index 00000000..8c5feb17 --- /dev/null +++ b/spp_aggregation/tests/test_integration_demo.py @@ -0,0 +1,800 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +""" +Integration tests using MIS Demo V2 data. + +These tests use the realistic demo data generator from spp_mis_demo_v2 to test +aggregation service functionality with real-world data patterns including: +- Hierarchical administrative areas (Philippines) +- Realistic demographic distributions +- Multiple programs with enrollments +- GPS coordinates for spatial queries +""" + +import logging +import unittest + +from odoo.tests.common import TransactionCase, tagged + +_logger = logging.getLogger(__name__) + + +@tagged("post_install", "-at_install") +class TestAggregationIntegrationDemo(TransactionCase): + """Integration tests for aggregation service using MIS demo data.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + + # Check if spp_mis_demo_v2 is available + if "spp.mis.demo.generator" not in cls.env: + # Module not available - skip all tests in this class + raise unittest.SkipTest("spp_mis_demo_v2 module not installed - integration tests skipped") + + # Generate MIS demo data with controlled volume + # Use smaller volume for faster tests but enough for statistical significance + _logger.info("Generating MIS demo data for integration tests...") + cls.demo_wizard = cls.env["spp.mis.demo.generator"].create( + { + "name": "Integration Test Demo", + "demo_mode": "testing", + "create_demo_programs": True, + "enroll_demo_stories": True, + "generate_random_groups": True, + "random_groups_count": 50, # Enough for multi-dimensional breakdowns + "members_per_group_min": 2, + "members_per_group_max": 6, + "generate_volume": True, + "volume_enrollments": 30, + "load_geographic_data": True, + "country_code": "phl", + "create_cycles": True, + "cycles_per_program": 2, + "create_change_requests": False, # Skip CRs for faster tests + "generate_grm_demo": False, # Skip GRM for faster tests + "generate_case_demo": False, # Skip cases for faster tests + "generate_claim169_demo": False, # Skip QR credentials for faster tests + "generate_simulation_demo": False, # Skip simulation for faster tests + } + ) + + # Run demo generation + cls.demo_wizard.action_generate() + _logger.info("MIS demo data generation complete") + + # Get references to key models + cls.service = cls.env["spp.aggregation.service"] + cls.scope_model = cls.env["spp.aggregation.scope"] + cls.area_model = cls.env["spp.area"] + cls.partner_model = cls.env["res.partner"] + cls.dimension_model = cls.env["spp.demographic.dimension"] + + # Get demo data references + cls.all_registrants = cls.partner_model.search([("is_registrant", "=", True)]) + cls.all_areas = cls.area_model.search([]) + + # Get specific area levels (Philippines hierarchy) + cls.country_areas = cls.area_model.search([("level", "=", 0)]) + cls.region_areas = cls.area_model.search([("level", "=", 1)]) + cls.province_areas = cls.area_model.search([("level", "=", 2)]) + cls.municipality_areas = cls.area_model.search([("level", "=", 3)]) + + # Get dimension records for testing + cls.dim_gender = cls.dimension_model.search([("name", "=", "gender")], limit=1) + cls.dim_disability = cls.dimension_model.search([("name", "=", "disability_status")], limit=1) + cls.dim_area = cls.dimension_model.search([("name", "=", "area")], limit=1) + cls.dim_age_group = cls.dimension_model.search([("name", "=", "age_group")], limit=1) + + _logger.info( + "Test setup complete: %d registrants, %d areas (%d regions, %d provinces, %d municipalities)", + len(cls.all_registrants), + len(cls.all_areas), + len(cls.region_areas), + len(cls.province_areas), + len(cls.municipality_areas), + ) + + def test_area_based_aggregation_country_level(self): + """Test aggregation at country level with hierarchical rollup.""" + if not self.country_areas: + self.skipTest("No country-level areas found in demo data") + + country = self.country_areas[0] + + # Create scope for entire country + scope = self.scope_model.create( + { + "name": f"Test Country: {country.draft_name}", + "scope_type": "area", + "area_id": country.id, + "include_child_areas": True, + } + ) + + # Compute aggregation + result = self.service.compute_aggregation(scope) + + # Assertions + self.assertGreater(result["total_count"], 0, "Should have registrants in country") + self.assertEqual(result["access_level"], "individual", "Admin should have individual access") + self.assertIn("computed_at", result) + self.assertFalse(result["from_cache"], "First query should not be cached") + + # Verify all registrants from demo are included + country_registrants = self.partner_model.search( + [ + ("is_registrant", "=", True), + ("area_id", "child_of", country.id), + ] + ) + self.assertEqual( + result["total_count"], + len(country_registrants), + "Country scope should include all descendant areas", + ) + + def test_area_based_aggregation_region_level(self): + """Test aggregation at region level (first administrative division).""" + if not self.region_areas: + self.skipTest("No region-level areas found in demo data") + + region = self.region_areas[0] + + # Create scope for single region + scope = self.scope_model.create( + { + "name": f"Test Region: {region.draft_name}", + "scope_type": "area", + "area_id": region.id, + "include_child_areas": True, + } + ) + + # Compute aggregation + result = self.service.compute_aggregation(scope) + + # Assertions + self.assertGreater(result["total_count"], 0, "Should have registrants in region") + + # Verify count matches actual registrants in this region's hierarchy + region_registrants = self.partner_model.search( + [ + ("is_registrant", "=", True), + ("area_id", "child_of", region.id), + ] + ) + self.assertEqual(result["total_count"], len(region_registrants)) + + def test_multi_dimensional_breakdown_gender_x_area(self): + """Test 2D breakdown: gender × area.""" + if not self.region_areas: + self.skipTest("No regions found in demo data") + + # Pick a region with registrants + region = self.region_areas[0] + + scope = self.scope_model.create( + { + "name": "Test 2D: Gender × Area", + "scope_type": "area", + "area_id": region.id, + "include_child_areas": True, + } + ) + + # Compute with 2D breakdown + result = self.service.compute_aggregation( + scope, + group_by=["gender", "area"], + ) + + # Assertions + self.assertIn("breakdown", result, "Should have breakdown cells") + self.assertGreater(len(result["breakdown"]), 0, "Should have breakdown cells") + + # Check cell structure + first_cell_key = list(result["breakdown"].keys())[0] + first_cell = result["breakdown"][first_cell_key] + self.assertIn("count", first_cell) + self.assertIn("dimensions", first_cell) + self.assertEqual(len(first_cell["dimensions"]), 2, "Should have 2 dimensions") + + # Verify total count matches sum of cells + total_from_cells = sum(cell["count"] for cell in result["breakdown"].values()) + self.assertEqual( + result["total_count"], + total_from_cells, + "Total count should equal sum of breakdown cells", + ) + + def test_multi_dimensional_breakdown_gender_x_disability_x_area(self): + """Test 3D breakdown: gender × disability × area (max dimensions).""" + if not self.province_areas: + self.skipTest("No provinces found in demo data") + + # Pick a province with registrants + province = self.province_areas[0] + + scope = self.scope_model.create( + { + "name": "Test 3D: Gender × Disability × Area", + "scope_type": "area", + "area_id": province.id, + "include_child_areas": True, + } + ) + + # Compute with 3D breakdown (max allowed dimensions) + result = self.service.compute_aggregation( + scope, + group_by=["gender", "disability_status", "area"], + ) + + # Assertions + self.assertIn("breakdown", result) + self.assertGreater(len(result["breakdown"]), 0) + + # Check cell structure has 3 dimensions + first_cell = list(result["breakdown"].values())[0] + self.assertEqual(len(first_cell["dimensions"]), 3) + + # Verify dimension order matches request + dim_names = [d["name"] for d in first_cell["dimensions"]] + self.assertEqual(dim_names, ["gender", "disability_status", "area"]) + + def test_k_anonymity_suppression_with_realistic_data(self): + """Test k-anonymity suppression with real demographic distributions.""" + # Create a restricted user with aggregate-only access + restricted_user = self.env["res.users"].create( + { + "name": "Test Researcher", + "login": "test_researcher", + "groups_id": [(6, 0, [self.ref("base.group_user")])], + } + ) + + # Create access rule for aggregate-only access with k=5 + self.env["spp.aggregation.access.rule"].create( + { + "name": "Researcher Aggregate Access", + "access_level": "aggregate", + "k_threshold": 5, + "group_id": self.ref("base.group_user"), + } + ) + + # Pick a small area that likely has cells with count < 5 + if self.municipality_areas: + small_area = self.municipality_areas[0] + elif self.province_areas: + small_area = self.province_areas[0] + else: + self.skipTest("No suitable areas for k-anonymity testing") + + scope = self.scope_model.create( + { + "name": "Test K-Anonymity", + "scope_type": "area", + "area_id": small_area.id, + "include_child_areas": False, # Don't include children for smaller cells + } + ) + + # Compute with 2D breakdown as restricted user + result = self.service.with_user(restricted_user).compute_aggregation( + scope, + group_by=["gender", "disability_status"], + ) + + # Assertions + self.assertEqual(result["access_level"], "aggregate") + + # Check that small cells are suppressed + suppressed_cells = 0 + visible_cells = 0 + for cell in result["breakdown"].values(): + if cell.get("suppressed"): + suppressed_cells += 1 + # Suppressed cells should not expose count + self.assertNotIn("count", cell) + else: + visible_cells += 1 + # Visible cells must meet k-threshold + self.assertGreaterEqual(cell["count"], 5) + + _logger.info( + "K-anonymity test: %d visible cells, %d suppressed cells", + visible_cells, + suppressed_cells, + ) + + # Should have some suppression with realistic data + self.assertGreater( + suppressed_cells, + 0, + "Should have suppressed some small cells with k=5", + ) + + def test_cache_behavior_repeated_queries(self): + """Test that repeated queries use cache correctly.""" + if not self.region_areas: + self.skipTest("No regions found for cache testing") + + region = self.region_areas[0] + + scope = self.scope_model.create( + { + "name": "Test Cache", + "scope_type": "area", + "area_id": region.id, + "include_child_areas": True, + } + ) + + # First query - should not be cached + result1 = self.service.compute_aggregation(scope) + self.assertFalse(result1["from_cache"]) + computed_at_1 = result1["computed_at"] + + # Second query - should be cached + result2 = self.service.compute_aggregation(scope) + self.assertTrue(result2["from_cache"]) + self.assertEqual(result2["computed_at"], computed_at_1) + self.assertEqual(result2["total_count"], result1["total_count"]) + + # Query with use_cache=False - should recompute + result3 = self.service.compute_aggregation(scope, use_cache=False) + self.assertFalse(result3["from_cache"]) + self.assertNotEqual(result3["computed_at"], computed_at_1) + self.assertEqual(result3["total_count"], result1["total_count"]) + + def test_cache_invalidation_different_breakdowns(self): + """Test that cache properly differentiates breakdown dimensions.""" + if not self.region_areas: + self.skipTest("No regions found for cache testing") + + region = self.region_areas[0] + + scope = self.scope_model.create( + { + "name": "Test Cache Breakdown", + "scope_type": "area", + "area_id": region.id, + "include_child_areas": True, + } + ) + + # Query with gender breakdown + result1 = self.service.compute_aggregation(scope, group_by=["gender"]) + self.assertFalse(result1["from_cache"]) + + # Query with disability breakdown - should NOT use cache + result2 = self.service.compute_aggregation(scope, group_by=["disability_status"]) + self.assertFalse(result2["from_cache"]) + + # Repeat gender query - SHOULD use cache + result3 = self.service.compute_aggregation(scope, group_by=["gender"]) + self.assertTrue(result3["from_cache"]) + + def test_performance_larger_dataset(self): + """Test performance with larger dataset (all demo registrants).""" + # Use entire country for maximum dataset + if self.country_areas: + area = self.country_areas[0] + elif self.all_areas: + area = self.all_areas[0] + else: + self.skipTest("No areas available for performance testing") + + scope = self.scope_model.create( + { + "name": "Test Performance - Large Dataset", + "scope_type": "area", + "area_id": area.id, + "include_child_areas": True, + } + ) + + # Test aggregation with 2D breakdown + import time + + start = time.time() + result = self.service.compute_aggregation( + scope, + group_by=["gender", "age_group"], + ) + duration = time.time() - start + + _logger.info( + "Performance test: aggregated %d registrants with 2D breakdown in %.2fs", + result["total_count"], + duration, + ) + + # Assertions + self.assertGreater(result["total_count"], 50, "Should have substantial dataset") + self.assertLess(duration, 10.0, "Should complete within 10 seconds") + self.assertIn("breakdown", result) + + def test_privacy_differencing_attack_prevention(self): + """Test that differencing attacks are prevented through complementary suppression.""" + # Create restricted user + restricted_user = self.env["res.users"].create( + { + "name": "Test Attacker", + "login": "test_attacker", + "groups_id": [(6, 0, [self.ref("base.group_user")])], + } + ) + + # Create strict access rule with k=10 + self.env["spp.aggregation.access.rule"].create( + { + "name": "Strict Aggregate Access", + "access_level": "aggregate", + "k_threshold": 10, + "group_id": self.ref("base.group_user"), + } + ) + + if not self.province_areas: + self.skipTest("No provinces available for privacy testing") + + province = self.province_areas[0] + + # Attacker tries to isolate small groups by complementary queries + scope_all = self.scope_model.create( + { + "name": "Privacy Test - All", + "scope_type": "area", + "area_id": province.id, + "include_child_areas": True, + } + ) + + # Query 1: All registrants by gender + result_all = self.service.with_user(restricted_user).compute_aggregation( + scope_all, + group_by=["gender"], + ) + + # Count suppressed cells + suppressed_count = sum(1 for cell in result_all["breakdown"].values() if cell.get("suppressed")) + + # With complementary suppression, if one cell is suppressed, + # its complement should also be suppressed to prevent differencing + if suppressed_count > 0: + # If any cell is suppressed, there should be multiple suppressions + # (complementary suppression) + visible_count = sum(1 for cell in result_all["breakdown"].values() if not cell.get("suppressed")) + + _logger.info( + "Privacy test: %d visible cells, %d suppressed cells", + visible_count, + suppressed_count, + ) + + # With 2 genders and small counts, if one is suppressed, + # the other should be too (complementary suppression) + if len(result_all["breakdown"]) == 2: + self.assertEqual( + suppressed_count, + 2, + "Both cells should be suppressed for complementary protection", + ) + + def test_spatial_aggregation_with_gps_coordinates(self): + """Test spatial aggregation using GPS coordinates from demo data.""" + # Check if any registrants have GPS coordinates + registrants_with_gps = self.partner_model.search( + [ + ("is_registrant", "=", True), + ("geo_point", "!=", False), + ] + ) + + if not registrants_with_gps: + self.skipTest("No registrants with GPS coordinates in demo data") + + # Get GPS point from first registrant + sample_registrant = registrants_with_gps[0] + if not sample_registrant.geo_point: + self.skipTest("Sample registrant has no valid GPS point") + + # Create a polygon around the point (approximate 10km radius) + # Using WKT format for PostGIS + try: + from shapely.geometry import Point + except ImportError: + self.skipTest("shapely library not available for spatial tests") + + # Parse geo_point (format: "POINT(lon lat)") + point_wkt = sample_registrant.geo_point + lon, lat = (float(x) for x in point_wkt.replace("POINT(", "").replace(")", "").split()) + + # Create approximate 10km buffer (0.1 degrees ~= 11km at equator) + center = Point(lon, lat) + buffer_geom = center.buffer(0.1) + polygon_wkt = buffer_geom.wkt + + # Create spatial scope + scope = self.scope_model.create( + { + "name": "Test Spatial Aggregation", + "scope_type": "spatial", + "spatial_filter_geom": polygon_wkt, + } + ) + + # Compute aggregation + result = self.service.compute_aggregation(scope) + + # Assertions + self.assertGreater(result["total_count"], 0, "Should find registrants in spatial scope") + + # Verify sample registrant is included + scope_registrant_ids = self.env["spp.aggregation.scope.resolver"].resolve_scope(scope) + self.assertIn( + sample_registrant.id, + scope_registrant_ids, + "Sample registrant should be within spatial scope", + ) + + def test_breakdown_with_area_hierarchy(self): + """Test breakdown by area shows proper hierarchy levels.""" + if not self.region_areas: + self.skipTest("No regions found for hierarchy testing") + + region = self.region_areas[0] + + scope = self.scope_model.create( + { + "name": "Test Area Hierarchy", + "scope_type": "area", + "area_id": region.id, + "include_child_areas": True, + } + ) + + # Break down by area only + result = self.service.compute_aggregation(scope, group_by=["area"]) + + # Assertions + self.assertIn("breakdown", result) + + # Each cell should have area dimension with proper metadata + area_ids_in_breakdown = set() + for cell in result["breakdown"].values(): + area_dim = next(d for d in cell["dimensions"] if d["name"] == "area") + self.assertIn("value", area_dim) + self.assertIn("label", area_dim) + + # Extract area ID from value + area_id = area_dim["value"] + if area_id: + area_ids_in_breakdown.add(int(area_id)) + + # All areas in breakdown should be descendants of region + for area_id in area_ids_in_breakdown: + area = self.area_model.browse(area_id) + self.assertTrue( + area.id == region.id or region.id in area.parent_path_ids, + f"Area {area.draft_name} should be descendant of {region.draft_name}", + ) + + def test_age_group_dimension_with_realistic_data(self): + """Test age group dimension with realistic birth dates from demo data.""" + if not self.region_areas: + self.skipTest("No regions found") + + region = self.region_areas[0] + + scope = self.scope_model.create( + { + "name": "Test Age Groups", + "scope_type": "area", + "area_id": region.id, + "include_child_areas": True, + } + ) + + # Break down by age group + result = self.service.compute_aggregation(scope, group_by=["age_group"]) + + # Assertions + self.assertIn("breakdown", result) + + # Should have multiple age groups + age_groups = set() + for cell in result["breakdown"].values(): + age_dim = next(d for d in cell["dimensions"] if d["name"] == "age_group") + age_groups.add(age_dim["value"]) + + _logger.info("Age groups found in demo data: %s", age_groups) + + # Demo data should include varied ages + self.assertGreaterEqual( + len(age_groups), + 2, + "Should have at least 2 age groups in realistic demo data", + ) + + # Expected age groups: child, adult, elderly, unknown + expected_groups = {"child", "adult", "elderly", "unknown"} + self.assertTrue( + age_groups.issubset(expected_groups), + f"Age groups {age_groups} should be subset of {expected_groups}", + ) + + def test_complementary_suppression_across_dimensions(self): + """Test complementary suppression works across multiple dimensions.""" + # Create restricted user with k=8 + restricted_user = self.env["res.users"].create( + { + "name": "Test Complement User", + "login": "test_complement", + "groups_id": [(6, 0, [self.ref("base.group_user")])], + } + ) + + self.env["spp.aggregation.access.rule"].create( + { + "name": "Complement Test Access", + "access_level": "aggregate", + "k_threshold": 8, + "group_id": self.ref("base.group_user"), + } + ) + + if not self.municipality_areas: + self.skipTest("No municipalities for complementary suppression testing") + + # Pick smallest area to maximize chance of suppression + municipality = self.municipality_areas[0] + + scope = self.scope_model.create( + { + "name": "Test Complementary Suppression", + "scope_type": "area", + "area_id": municipality.id, + "include_child_areas": False, + } + ) + + # 2D breakdown with gender × disability + result = self.service.with_user(restricted_user).compute_aggregation( + scope, + group_by=["gender", "disability_status"], + ) + + # Analyze suppression pattern + cells_by_gender = {} + for _cell_key, cell in result["breakdown"].items(): + gender_val = cell["dimensions"][0]["value"] + if gender_val not in cells_by_gender: + cells_by_gender[gender_val] = [] + cells_by_gender[gender_val].append(cell) + + # For each gender, if one disability status is suppressed, + # its complement should also be suppressed + for gender, cells in cells_by_gender.items(): + suppressed = [c for c in cells if c.get("suppressed")] + visible = [c for c in cells if not c.get("suppressed")] + + if len(suppressed) > 0 and len(suppressed) < len(cells): + # Partial suppression detected - this could allow differencing + # Log warning but don't fail (depends on counts) + _logger.warning( + "Partial suppression for gender %s: %d suppressed, %d visible", + gender, + len(suppressed), + len(visible), + ) + + def test_explicit_scope_with_demo_registrants(self): + """Test explicit scope using specific demo story registrants.""" + # Find demo story personas (if they exist) + story_registrants = self.partner_model.search( + [ + ("is_registrant", "=", True), + ("name", "ilike", "Santos"), + ], + limit=5, + ) + + if not story_registrants: + # Use any registrants + story_registrants = self.all_registrants[:5] + + if not story_registrants: + self.skipTest("No registrants available for explicit scope testing") + + scope = self.scope_model.create( + { + "name": "Test Explicit Scope - Story Registrants", + "scope_type": "explicit", + "explicit_partner_ids": [(6, 0, story_registrants.ids)], + } + ) + + # Compute aggregation + result = self.service.compute_aggregation(scope) + + # Assertions + self.assertEqual(result["total_count"], len(story_registrants)) + + # Test with breakdown + result_breakdown = self.service.compute_aggregation( + scope, + group_by=["gender"], + ) + self.assertIn("breakdown", result_breakdown) + + # Sum of cells should equal total + total_from_cells = sum(cell["count"] for cell in result_breakdown["breakdown"].values()) + self.assertEqual(total_from_cells, len(story_registrants)) + + def test_empty_scope_handling(self): + """Test handling of scope with no registrants.""" + # Create scope with non-existent area or empty criteria + scope = self.scope_model.create( + { + "name": "Test Empty Scope", + "scope_type": "explicit", + "explicit_partner_ids": [(6, 0, [])], + } + ) + + # Compute aggregation + result = self.service.compute_aggregation(scope) + + # Assertions + self.assertEqual(result["total_count"], 0) + self.assertEqual(result["breakdown"], {}) + + def test_program_enrollment_correlation(self): + """Test aggregation correlates with program enrollments from demo data.""" + # Find registrants enrolled in programs + enrolled_registrants = self.partner_model.search( + [ + ("is_registrant", "=", True), + ("program_membership_ids", "!=", False), + ] + ) + + if not enrolled_registrants: + self.skipTest("No enrolled registrants in demo data") + + # Create explicit scope with enrolled registrants + scope = self.scope_model.create( + { + "name": "Test Enrolled Registrants", + "scope_type": "explicit", + "explicit_partner_ids": [(6, 0, enrolled_registrants.ids)], + } + ) + + # Compute aggregation + result = self.service.compute_aggregation(scope) + + # Assertions + self.assertEqual(result["total_count"], len(enrolled_registrants)) + + # Test breakdown to verify data quality + result_breakdown = self.service.compute_aggregation( + scope, + group_by=["gender", "age_group"], + ) + + # Should have multiple cells with varied demographics + self.assertGreater( + len(result_breakdown["breakdown"]), + 1, + "Enrolled registrants should have varied demographics", + ) + + _logger.info( + "Program enrollment test: %d enrolled registrants with %d demographic cells", + result["total_count"], + len(result_breakdown["breakdown"]), + ) diff --git a/spp_aggregation/tests/test_privacy_enforcement.py b/spp_aggregation/tests/test_privacy_enforcement.py new file mode 100644 index 00000000..7966a6db --- /dev/null +++ b/spp_aggregation/tests/test_privacy_enforcement.py @@ -0,0 +1,241 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from odoo.tests.common import TransactionCase + + +class TestPrivacyEnforcement(TransactionCase): + """Tests for spp.metrics.privacy service.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.service = cls.env["spp.metrics.privacy"] + + def test_enforce_empty_result(self): + """Test enforcement on empty result.""" + result = {"total_count": 0, "statistics": {}} + enforced = self.service.enforce(result) + self.assertEqual(enforced["total_count"], 0) + + def test_enforce_strips_ids_for_aggregate(self): + """Test that IDs are stripped for aggregate access.""" + result = { + "total_count": 100, + "registrant_ids": [1, 2, 3, 4, 5], + "partner_ids": [1, 2, 3, 4, 5], + } + enforced = self.service.enforce(result, access_level="aggregate") + + self.assertNotIn("registrant_ids", enforced) + self.assertNotIn("partner_ids", enforced) + + def test_enforce_keeps_ids_for_individual(self): + """Test that IDs are kept for individual access.""" + result = { + "total_count": 100, + "registrant_ids": [1, 2, 3, 4, 5], + } + enforced = self.service.enforce(result, access_level="individual") + + self.assertIn("registrant_ids", enforced) + + def test_k_anonymity_suppresses_small_cells(self): + """Test that cells below k-threshold are suppressed.""" + result = { + "total_count": 100, + "breakdown": { + "male|urban": {"count": 50, "statistics": {}}, + "male|rural": {"count": 3, "statistics": {}}, # Below k=5 + "female|urban": {"count": 47, "statistics": {}}, + }, + } + enforced = self.service.enforce(result, k_threshold=5) + + # male|rural should be suppressed + self.assertTrue(enforced["breakdown"]["male|rural"]["suppressed"]) + self.assertEqual(enforced["breakdown"]["male|rural"]["count"], "<5") + + def test_complementary_suppression(self): + """Test that sibling cells are suppressed to prevent derivation.""" + result = { + "total_count": 100, + "breakdown": { + "male": {"count": 97, "statistics": {}}, + "female": {"count": 3, "statistics": {}}, # Below k=5 + }, + } + enforced = self.service.enforce(result, k_threshold=5) + + # Both should be suppressed to prevent derivation + self.assertTrue(enforced["breakdown"]["female"]["suppressed"]) + self.assertTrue(enforced["breakdown"]["male"]["suppressed"]) + + def test_no_suppression_above_threshold(self): + """Test that cells above threshold are not suppressed.""" + result = { + "total_count": 100, + "breakdown": { + "male": {"count": 60, "statistics": {}}, + "female": {"count": 40, "statistics": {}}, + }, + } + enforced = self.service.enforce(result, k_threshold=5) + + self.assertFalse(enforced["breakdown"]["male"].get("suppressed", False)) + self.assertFalse(enforced["breakdown"]["female"].get("suppressed", False)) + + def test_is_count_suppressed(self): + """Test count suppression check.""" + self.assertTrue(self.service.is_count_suppressed(3, k_threshold=5)) + self.assertTrue(self.service.is_count_suppressed(4, k_threshold=5)) + self.assertFalse(self.service.is_count_suppressed(5, k_threshold=5)) + self.assertFalse(self.service.is_count_suppressed(10, k_threshold=5)) + + def test_format_suppressed_count_less_than(self): + """Test formatting suppressed count with less_than mode.""" + result = self.service.format_suppressed_count(3, k_threshold=5, display_mode="less_than") + self.assertEqual(result, "<5") + + def test_format_suppressed_count_asterisk(self): + """Test formatting suppressed count with asterisk mode.""" + result = self.service.format_suppressed_count(3, k_threshold=5, display_mode="asterisk") + self.assertEqual(result, "*") + + def test_format_suppressed_count_null(self): + """Test formatting suppressed count with null mode.""" + result = self.service.format_suppressed_count(3, k_threshold=5, display_mode="null") + self.assertIsNone(result) + + def test_format_non_suppressed_count(self): + """Test formatting non-suppressed count.""" + result = self.service.format_suppressed_count(10, k_threshold=5) + self.assertEqual(result, "10") + + def test_find_siblings(self): + """Test finding sibling cells.""" + breakdown = { + "male|urban": {}, + "male|rural": {}, + "female|urban": {}, + "female|rural": {}, + } + siblings = self.service._find_siblings("male|urban", breakdown) + + # Siblings differ by exactly one dimension + self.assertIn("female|urban", siblings) # Same location, different gender + self.assertIn("male|rural", siblings) # Same gender, different location + self.assertNotIn("female|rural", siblings) # Different by 2 + + def test_get_smallest_sibling(self): + """Test getting smallest sibling cell.""" + breakdown = { + "male": {"count": 100}, + "female": {"count": 50}, + "other": {"count": 10}, + } + smallest = self.service._get_smallest_sibling(["male", "female", "other"], breakdown) + self.assertEqual(smallest, "other") + + def test_default_k_threshold(self): + """Test default k-anonymity threshold.""" + result = {"total_count": 10, "breakdown": {"a": {"count": 4}}} + enforced = self.service.enforce(result) # No k_threshold specified + + # Should use default (5), so count of 4 should be suppressed + self.assertTrue(enforced["breakdown"]["a"]["suppressed"]) + + +class TestPrivacyKAnonymityAttacks(TransactionCase): + """Tests for k-anonymity differencing attack prevention.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.service = cls.env["spp.metrics.privacy"] + + def test_differencing_attack_prevention_single_sibling(self): + """ + Test prevention of differencing attack when only one sibling exists. + + Scenario: + - Area A total = 1000 (known from parent) + - Area A, Male = 995 + - Area A, Female = 5 (below k) + + Without complementary suppression: + - Female is suppressed, shows "<5" + - Attacker derives: Female = 1000 - 995 = 5 (exact count!) + + With complementary suppression: + - Both Male and Female are suppressed + - No derivation possible + """ + result = { + "total_count": 1000, + "breakdown": { + "male": {"count": 995, "statistics": {}}, + "female": {"count": 5, "statistics": {}}, + }, + } + + # Use k=6 so female (5) is below threshold + enforced = self.service.enforce(result, k_threshold=6) + + # Both must be suppressed + self.assertTrue(enforced["breakdown"]["female"]["suppressed"]) + self.assertTrue(enforced["breakdown"]["male"]["suppressed"]) + + def test_differencing_attack_multiple_siblings(self): + """ + Test that smallest sibling is suppressed with multiple siblings. + + Scenario: + - Gender: male=500, female=497, other=3 + - other is below k, must suppress + - Need to suppress at least one sibling to prevent derivation from total + """ + result = { + "total_count": 1000, + "breakdown": { + "male": {"count": 500, "statistics": {}}, + "female": {"count": 497, "statistics": {}}, + "other": {"count": 3, "statistics": {}}, + }, + } + + enforced = self.service.enforce(result, k_threshold=5) + + # other must be suppressed + self.assertTrue(enforced["breakdown"]["other"]["suppressed"]) + + # At least one sibling should also be suppressed + siblings_suppressed = sum(1 for k in ["male", "female"] if enforced["breakdown"][k].get("suppressed", False)) + self.assertGreaterEqual(siblings_suppressed, 1) + + def test_multi_dimensional_differencing(self): + """ + Test differencing attack with multiple dimensions. + + Scenario: + - gender|location breakdown + - If male|urban=3 is suppressed but we know: + - total male = 100 + - male|rural = 97 + - Attacker derives: male|urban = 100 - 97 = 3 + """ + result = { + "total_count": 200, + "breakdown": { + "male|urban": {"count": 3, "statistics": {}}, + "male|rural": {"count": 97, "statistics": {}}, + "female|urban": {"count": 50, "statistics": {}}, + "female|rural": {"count": 50, "statistics": {}}, + }, + } + + enforced = self.service.enforce(result, k_threshold=5) + + # male|urban should be suppressed + self.assertTrue(enforced["breakdown"]["male|urban"]["suppressed"]) + + # male|rural should also be suppressed (only sibling in male category) + self.assertTrue(enforced["breakdown"]["male|rural"]["suppressed"]) diff --git a/spp_aggregation/tests/test_scope_builder.py b/spp_aggregation/tests/test_scope_builder.py new file mode 100644 index 00000000..92fbde1a --- /dev/null +++ b/spp_aggregation/tests/test_scope_builder.py @@ -0,0 +1,125 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +"""Tests for shared scope builder utilities.""" + +from odoo.addons.spp_aggregation.services import ( + build_area_scope, + build_cel_scope, + build_explicit_scope, +) + +from .common import AggregationTestCase + + +class TestScopeBuilder(AggregationTestCase): + """Tests for scope builder utility functions.""" + + def test_build_explicit_scope_with_list(self): + """Test building explicit scope with list of partner IDs.""" + partner_ids = [1, 2, 3, 4, 5] + scope = build_explicit_scope(partner_ids) + + self.assertEqual(scope["scope_type"], "explicit") + self.assertEqual(scope["explicit_partner_ids"], [1, 2, 3, 4, 5]) + self.assertIsInstance(scope["explicit_partner_ids"], list) + + def test_build_explicit_scope_with_set(self): + """Test building explicit scope with set of partner IDs.""" + partner_ids = {5, 4, 3, 2, 1} + scope = build_explicit_scope(partner_ids) + + self.assertEqual(scope["scope_type"], "explicit") + self.assertIsInstance(scope["explicit_partner_ids"], list) + # Convert both to sets for comparison since set order is not guaranteed + self.assertEqual(set(scope["explicit_partner_ids"]), {1, 2, 3, 4, 5}) + + def test_build_explicit_scope_with_empty_list(self): + """Test building explicit scope with empty list.""" + scope = build_explicit_scope([]) + + self.assertEqual(scope["scope_type"], "explicit") + self.assertEqual(scope["explicit_partner_ids"], []) + + def test_build_area_scope_with_children(self): + """Test building area scope with child areas included.""" + scope = build_area_scope(area_id=123, include_children=True) + + self.assertEqual(scope["scope_type"], "area") + self.assertEqual(scope["area_id"], 123) + self.assertTrue(scope["include_child_areas"]) + + def test_build_area_scope_without_children(self): + """Test building area scope without child areas.""" + scope = build_area_scope(area_id=456, include_children=False) + + self.assertEqual(scope["scope_type"], "area") + self.assertEqual(scope["area_id"], 456) + self.assertFalse(scope["include_child_areas"]) + + def test_build_area_scope_default_includes_children(self): + """Test building area scope defaults to including children.""" + scope = build_area_scope(area_id=789) + + self.assertEqual(scope["scope_type"], "area") + self.assertEqual(scope["area_id"], 789) + self.assertTrue(scope["include_child_areas"]) + + def test_build_cel_scope_with_default_profile(self): + """Test building CEL scope with default profile.""" + scope = build_cel_scope("partner.age > 18") + + self.assertEqual(scope["scope_type"], "cel") + self.assertEqual(scope["cel_expression"], "partner.age > 18") + self.assertEqual(scope["cel_profile"], "registry_individuals") + + def test_build_cel_scope_with_custom_profile(self): + """Test building CEL scope with custom profile.""" + scope = build_cel_scope("partner.is_group == true", profile="registry_groups") + + self.assertEqual(scope["scope_type"], "cel") + self.assertEqual(scope["cel_expression"], "partner.is_group == true") + self.assertEqual(scope["cel_profile"], "registry_groups") + + def test_explicit_scope_resolves_correctly(self): + """Test that explicit scope built by utility resolves correctly.""" + # Create some test registrants + test_registrants = self.registrants[:5] + scope = build_explicit_scope(test_registrants.ids) + + # Resolve the scope using the resolver + resolver = self.env["spp.aggregation.scope.resolver"] + resolved_ids = resolver.resolve(scope) + + self.assertEqual(set(resolved_ids), set(test_registrants.ids)) + + def test_area_scope_resolves_correctly(self): + """Test that area scope built by utility resolves correctly.""" + scope = build_area_scope(area_id=self.area_district.id, include_children=False) + + # Resolve the scope using the resolver + resolver = self.env["spp.aggregation.scope.resolver"] + resolved_ids = resolver.resolve(scope) + + # Check all returned IDs are registrants in the district + partners = self.env["res.partner"].browse(resolved_ids) + for partner in partners: + self.assertEqual(partner.area_id, self.area_district) + + def test_scope_compatible_with_aggregation_service(self): + """Test that scopes built by utilities work with aggregation service.""" + # Create explicit scope for first 10 registrants + test_registrants = self.registrants[:10] + scope = build_explicit_scope(test_registrants.ids) + + # Compute aggregation using the scope + aggregation_service = self.env["spp.aggregation.service"] + result = aggregation_service.compute_aggregation( + scope=scope, + statistics=["count"], + context="test", + ) + + # Verify result structure + self.assertIn("total_count", result) + self.assertEqual(result["total_count"], 10) + self.assertIn("statistics", result) + self.assertIn("count", result["statistics"]) diff --git a/spp_aggregation/tests/test_scope_resolver.py b/spp_aggregation/tests/test_scope_resolver.py new file mode 100644 index 00000000..d489a8e9 --- /dev/null +++ b/spp_aggregation/tests/test_scope_resolver.py @@ -0,0 +1,193 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from .common import AggregationTestCase + + +class TestScopeResolver(AggregationTestCase): + """Tests for spp.aggregation.scope.resolver service.""" + + def test_resolve_explicit_scope(self): + """Test resolving explicit partner IDs.""" + scope = self.create_scope( + "explicit", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve(scope) + self.assertEqual(set(ids), set(self.registrants[:5].ids)) + + def test_resolve_inline_explicit_scope(self): + """Test resolving inline explicit scope definition.""" + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:3].ids, + } + ) + self.assertEqual(set(ids), set(self.registrants[:3].ids)) + + def test_resolve_area_scope(self): + """Test resolving area scope.""" + scope = self.create_scope( + "area", + area_id=self.area_district.id, + include_child_areas=False, + ) + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve(scope) + + # Check all returned IDs are registrants in the district + partners = self.env["res.partner"].browse(ids) + for partner in partners: + self.assertEqual(partner.area_id, self.area_district) + + def test_resolve_area_scope_with_children(self): + """Test resolving area scope including child areas.""" + # First add some registrants to parent area + self.env["res.partner"].create( + { + "name": "Regional Registrant", + "is_registrant": True, + "area_id": self.area_region.id, + } + ) + + scope = self.create_scope( + "area", + area_id=self.area_region.id, + include_child_areas=True, + ) + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve(scope) + + # Should include registrants from both region and district + partners = self.env["res.partner"].browse(ids) + area_ids = partners.mapped("area_id.id") + self.assertIn(self.area_region.id, area_ids) + self.assertIn(self.area_district.id, area_ids) + + def test_resolve_area_tag_scope(self): + """Test resolving area tag scope.""" + scope = self.create_scope( + "area_tag", + area_tag_ids=[(6, 0, [self.tag_urban.id])], + ) + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve(scope) + + # Should find registrants in urban-tagged areas + self.assertGreater(len(ids), 0) + + def test_resolve_multiple_scopes_union(self): + """Test resolving multiple scopes with union.""" + scope1 = self.create_scope( + "explicit", + name="Scope 1", + explicit_partner_ids=[(6, 0, self.registrants[:3].ids)], + ) + scope2 = self.create_scope( + "explicit", + name="Scope 2", + explicit_partner_ids=[(6, 0, self.registrants[2:5].ids)], + ) + + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve_multiple([scope1, scope2]) + + # Union: 0,1,2 + 2,3,4 = 0,1,2,3,4 + self.assertEqual(set(ids), set(self.registrants[:5].ids)) + + def test_resolve_multiple_scopes_intersect(self): + """Test resolving multiple scopes with intersection.""" + scope1 = self.create_scope( + "explicit", + name="Scope 1", + explicit_partner_ids=[(6, 0, self.registrants[:5].ids)], + ) + scope2 = self.create_scope( + "explicit", + name="Scope 2", + explicit_partner_ids=[(6, 0, self.registrants[3:8].ids)], + ) + + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve_intersect([scope1, scope2]) + + # Intersection: 0-4 ∩ 3-7 = 3,4 + self.assertEqual(set(ids), set(self.registrants[3:5].ids)) + + def test_resolve_empty_scope_returns_empty(self): + """Test that resolving empty scopes returns empty list.""" + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve_multiple([]) + self.assertEqual(ids, []) + + def test_resolve_spatial_polygon_without_bridge(self): + """Test spatial polygon returns empty without bridge module.""" + scope = self.create_scope( + "spatial_polygon", + geometry_geojson='{"type": "Polygon", "coordinates": [[[0,0],[1,0],[1,1],[0,1],[0,0]]]}', + ) + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve(scope) + # Without PostGIS bridge, returns empty + self.assertEqual(ids, []) + + def test_resolve_inline_area_scope(self): + """Test resolving inline area scope definition.""" + resolver = self.env["spp.aggregation.scope.resolver"] + ids = resolver.resolve( + { + "scope_type": "area", + "area_id": self.area_district.id, + "include_child_areas": False, + } + ) + self.assertGreater(len(ids), 0) + + +class TestScopeResolverPublicUser(AggregationTestCase): + """Tests for scope resolver running as public user (uid:3). + + The scope resolver must work for unprivileged callers because it uses + sudo() internally for model reads (res.partner, spp.area). + """ + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.public_user = cls.env.ref("base.public_user") + + def test_resolve_explicit_scope_as_public_user(self): + """Test that explicit scope resolution works as public user.""" + resolver = self.env["spp.aggregation.scope.resolver"].with_user(self.public_user) + ids = resolver.resolve( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants[:5].ids, + } + ) + self.assertEqual(set(ids), set(self.registrants[:5].ids)) + + def test_resolve_area_scope_as_public_user(self): + """Test that area scope resolution works as public user.""" + resolver = self.env["spp.aggregation.scope.resolver"].with_user(self.public_user) + ids = resolver.resolve( + { + "scope_type": "area", + "area_id": self.area_district.id, + "include_child_areas": False, + } + ) + self.assertGreater(len(ids), 0) + + def test_resolve_area_tag_scope_as_public_user(self): + """Test that area tag scope resolution works as public user.""" + resolver = self.env["spp.aggregation.scope.resolver"].with_user(self.public_user) + ids = resolver.resolve( + { + "scope_type": "area_tag", + "area_tag_ids": [self.tag_urban.id], + } + ) + self.assertGreater(len(ids), 0) diff --git a/spp_aggregation/tests/test_statistic_registry.py b/spp_aggregation/tests/test_statistic_registry.py new file mode 100644 index 00000000..e87a98ea --- /dev/null +++ b/spp_aggregation/tests/test_statistic_registry.py @@ -0,0 +1,432 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +from odoo.tests.common import TransactionCase + + +class TestStatisticRegistry(TransactionCase): + """Tests for spp.aggregation.statistic.registry.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.stat_registry = cls.env["spp.aggregation.statistic.registry"] + + # Create test registrants + cls.registrants = cls.env["res.partner"] + for i in range(10): + cls.registrants |= cls.env["res.partner"].create( + { + "name": f"Test Registrant {i}", + "is_registrant": True, + } + ) + + def test_compute_count_builtin(self): + """Test that count builtin returns correct count.""" + result = self.stat_registry.compute("count", self.registrants.ids) + self.assertEqual(result, 10) + + def test_compute_count_builtin_empty(self): + """Test that count returns 0 for empty list.""" + result = self.stat_registry.compute("count", []) + self.assertEqual(result, 0) + + def test_compute_gini_builtin(self): + """Test that gini builtin returns None (placeholder).""" + result = self.stat_registry.compute("gini", self.registrants.ids) + self.assertIsNone(result) + + def test_compute_gini_coefficient_builtin(self): + """Test that gini_coefficient is an alias for gini.""" + result = self.stat_registry.compute("gini_coefficient", self.registrants.ids) + self.assertIsNone(result) + + def test_compute_unknown_statistic(self): + """Test that unknown statistic returns None with warning.""" + with self.assertLogs("odoo.addons.spp_aggregation.models.statistic_registry", level="WARNING") as log: + result = self.stat_registry.compute("nonexistent_stat", self.registrants.ids) + self.assertIsNone(result) + self.assertTrue(any("Unknown statistic: nonexistent_stat" in msg for msg in log.output)) + + def test_list_available_includes_builtins(self): + """Test that list_available includes builtin statistics.""" + available = self.stat_registry.list_available() + + # Should include count and gini + stat_names = [s["name"] for s in available] + self.assertIn("count", stat_names) + self.assertIn("gini", stat_names) + self.assertIn("gini_coefficient", stat_names) + + # Check structure + count_stat = next(s for s in available if s["name"] == "count") + self.assertEqual(count_stat["label"], "Total Count") + self.assertEqual(count_stat["source"], "builtin") + + +class TestStatisticRegistryIntegration(TransactionCase): + """Integration tests for statistic registry with spp.statistic and spp.cel.variable.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.stat_registry = cls.env["spp.aggregation.statistic.registry"] + + # Create test registrants + cls.registrants = cls.env["res.partner"] + for i in range(5): + cls.registrants |= cls.env["res.partner"].create( + { + "name": f"Test Registrant {i}", + "is_registrant": True, + } + ) + + def test_compute_from_cel_variable(self): + """Test computing statistic from spp.cel.variable.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + # Create a simple CEL variable + self.env["spp.cel.variable"].create( + { + "name": "test_registry_var", + "cel_accessor": "test_registry_var", + "source_type": "computed", + "cel_expression": "r.is_registrant == true", + "value_type": "number", + "state": "active", + } + ) + + # Compute via registry + result = self.stat_registry.compute("test_registry_var", self.registrants.ids) + + # Should count registrants matching the expression + self.assertIsNotNone(result) + self.assertGreater(result, 0) + + def test_compute_from_statistic_model(self): + """Test computing statistic from spp.statistic via variable.""" + if "spp.statistic" not in self.env: + self.skipTest("spp_statistic module not installed") + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + # Create CEL variable + variable = self.env["spp.cel.variable"].create( + { + "name": "test_registry_stat_var", + "cel_accessor": "test_registry_stat_var", + "source_type": "computed", + "cel_expression": "r.is_registrant == true", + "value_type": "number", + "state": "active", + } + ) + + # Create statistic that uses the variable + self.env["spp.statistic"].create( + { + "name": "test_registry_stat", + "label": "Test Registry Statistic", + "variable_id": variable.id, + "is_published_api": True, + } + ) + + # Compute via registry using statistic name + result = self.stat_registry.compute("test_registry_stat", self.registrants.ids) + + # Should compute via the variable + self.assertIsNotNone(result) + self.assertGreater(result, 0) + + def test_list_available_includes_statistics(self): + """Test that list_available includes spp.statistic records.""" + if "spp.statistic" not in self.env: + self.skipTest("spp_statistic module not installed") + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + # Create a statistic + variable = self.env["spp.cel.variable"].create( + { + "name": "test_list_var", + "cel_accessor": "test_list_var", + "source_type": "computed", + "cel_expression": "true", + "value_type": "number", + "state": "active", + } + ) + self.env["spp.statistic"].create( + { + "name": "test_list_stat", + "label": "Test List Statistic", + "variable_id": variable.id, + "is_published_api": True, + "active": True, + } + ) + + available = self.stat_registry.list_available() + + # Should include the statistic + stat_names = [s["name"] for s in available] + self.assertIn("test_list_stat", stat_names) + + # Check structure + test_stat = next(s for s in available if s["name"] == "test_list_stat") + self.assertEqual(test_stat["label"], "Test List Statistic") + self.assertEqual(test_stat["source"], "statistic") + + def test_list_available_includes_variables(self): + """Test that list_available can find CEL variables.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + available = self.stat_registry.list_available() + + # Should at least include builtins + self.assertGreater(len(available), 0) + + # Check structure is correct for all items + for item in available: + self.assertIn("name", item) + self.assertIn("source", item) + self.assertIn(item["source"], ["builtin", "statistic", "variable"]) + + def test_compute_from_variable_empty_registrants(self): + """Test that computing with empty registrant list returns 0.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + self.env["spp.cel.variable"].create( + { + "name": "test_empty_var", + "cel_accessor": "test_empty_var", + "source_type": "computed", + "cel_expression": "true", + "value_type": "number", + "state": "active", + } + ) + + result = self.stat_registry.compute("test_empty_var", []) + self.assertEqual(result, 0) + + def test_compute_from_variable_handles_group_profile(self): + """Test that registry uses correct CEL profile for group variables.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + # Create groups + groups = self.env["res.partner"] + for i in range(3): + groups |= self.env["res.partner"].create( + { + "name": f"Test Group {i}", + "is_registrant": True, + "is_group": True, + } + ) + + # Create a group-targeted variable + self.env["spp.cel.variable"].create( + { + "name": "test_group_var", + "cel_accessor": "test_group_var", + "source_type": "computed", + "cel_expression": "r.is_group == true", + "value_type": "number", + "applies_to": "group", + "state": "active", + } + ) + + result = self.stat_registry.compute("test_group_var", groups.ids) + + # Should compute successfully + self.assertIsNotNone(result) + self.assertGreaterEqual(result, 0) + + +class TestStatisticRegistryMemberAggregate(TransactionCase): + """Tests for member aggregate sum computation. + + Verifies that aggregate CEL variables (e.g., members.count(m, true)) + return the SUM of per-group values, not the count of groups. + """ + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.stat_registry = cls.env["spp.aggregation.statistic.registry"] + + if "spp.cel.variable" not in cls.env: + return + + # Create 3 groups with different member counts + cls.groups = cls.env["res.partner"] + member_counts = [2, 3, 4] # Total = 9 members + for i, count in enumerate(member_counts): + group = cls.env["res.partner"].create( + { + "name": f"Aggregate Test Group {i}", + "is_registrant": True, + "is_group": True, + } + ) + for j in range(count): + individual = cls.env["res.partner"].create( + { + "name": f"Member {i}-{j}", + "is_registrant": True, + "is_group": False, + } + ) + cls.env["spp.group.membership"].create( + { + "group": group.id, + "individual": individual.id, + } + ) + cls.groups |= group + + def test_aggregate_count_returns_member_sum_not_group_count(self): + """Test that members.count(m, true) returns total members, not group count.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + self.env["spp.cel.variable"].create( + { + "name": "test_agg_total_members", + "cel_accessor": "test_agg_total_members", + "source_type": "aggregate", + "aggregate_type": "count", + "aggregate_target": "members", + "aggregate_filter": "true", + "value_type": "number", + "applies_to": "group", + "state": "active", + } + ) + + result = self.stat_registry.compute("test_agg_total_members", self.groups.ids) + + # Should be 2+3+4=9 (sum of members), NOT 3 (count of groups) + self.assertEqual(result, 9) + + def test_aggregate_count_with_filter(self): + """Test that filtered member aggregates also return correct sums.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + # All members match 'true' filter, so result should be same as total + self.env["spp.cel.variable"].create( + { + "name": "test_agg_filtered", + "cel_accessor": "test_agg_filtered", + "source_type": "aggregate", + "aggregate_type": "count", + "aggregate_target": "members", + "aggregate_filter": "true", + "value_type": "number", + "applies_to": "group", + "state": "active", + } + ) + + result = self.stat_registry.compute("test_agg_filtered", self.groups.ids) + self.assertEqual(result, 9) + + def test_aggregate_count_empty_registrants(self): + """Test aggregate with empty registrant list returns 0.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + self.env["spp.cel.variable"].create( + { + "name": "test_agg_empty", + "cel_accessor": "test_agg_empty", + "source_type": "aggregate", + "aggregate_type": "count", + "aggregate_target": "members", + "aggregate_filter": "true", + "value_type": "number", + "applies_to": "group", + "state": "active", + } + ) + + result = self.stat_registry.compute("test_agg_empty", []) + self.assertEqual(result, 0) + + def test_computed_variable_still_returns_count(self): + """Test that non-aggregate (computed) variables still return group count.""" + if "spp.cel.variable" not in self.env: + self.skipTest("spp_cel module not installed") + + # source_type=computed → compile_expression count path + self.env["spp.cel.variable"].create( + { + "name": "test_computed_count", + "cel_accessor": "test_computed_count", + "source_type": "computed", + "cel_expression": "true", + "value_type": "number", + "applies_to": "group", + "state": "active", + } + ) + + result = self.stat_registry.compute("test_computed_count", self.groups.ids) + # Should be 3 (count of groups matching 'true'), not member sum + self.assertEqual(result, 3) + + +class TestStatisticRegistryViaService(TransactionCase): + """Tests that verify the service correctly uses the registry.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.service = cls.env["spp.aggregation.service"] + cls.registrants = cls.env["res.partner"] + for i in range(5): + cls.registrants |= cls.env["res.partner"].create( + { + "name": f"Test Registrant {i}", + "is_registrant": True, + } + ) + + def test_service_uses_registry_for_count(self): + """Test that service delegates to registry for count statistic.""" + result = self.service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants.ids, + }, + statistics=["count"], + ) + + self.assertIn("statistics", result) + self.assertIn("count", result["statistics"]) + self.assertEqual(result["statistics"]["count"]["value"], 5) + self.assertFalse(result["statistics"]["count"]["suppressed"]) + + def test_service_uses_registry_for_unknown_statistic(self): + """Test that service handles unknown statistics via registry.""" + result = self.service.compute_aggregation( + { + "scope_type": "explicit", + "explicit_partner_ids": self.registrants.ids, + }, + statistics=["nonexistent"], + ) + + self.assertIn("statistics", result) + self.assertIn("nonexistent", result["statistics"]) + # Should have None value due to unknown statistic + self.assertIsNone(result["statistics"]["nonexistent"]["value"]) diff --git a/spp_aggregation/views/aggregation_access_views.xml b/spp_aggregation/views/aggregation_access_views.xml new file mode 100644 index 00000000..9ca48d05 --- /dev/null +++ b/spp_aggregation/views/aggregation_access_views.xml @@ -0,0 +1,146 @@ + + + + + spp.aggregation.access.rule.form + spp.aggregation.access.rule + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + + + spp.aggregation.access.rule.list + spp.aggregation.access.rule + + + + + + + + + + + + + + + + spp.aggregation.access.rule.search + spp.aggregation.access.rule + + + + + + + + + + + + + + + + + + + Access Rules + spp.aggregation.access.rule + list,form + +

+ Create access rules for aggregation queries +

+

+ Control who can access aggregation data and what level + of detail they can see (aggregates only vs individual records). +

+
+
+
diff --git a/spp_aggregation/views/aggregation_scope_views.xml b/spp_aggregation/views/aggregation_scope_views.xml new file mode 100644 index 00000000..307b641e --- /dev/null +++ b/spp_aggregation/views/aggregation_scope_views.xml @@ -0,0 +1,230 @@ + + + + + spp.aggregation.scope.form + spp.aggregation.scope + +
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + spp.aggregation.scope.list + spp.aggregation.scope + + + + + + + + + + + + + spp.aggregation.scope.search + spp.aggregation.scope + + + + + + + + + + + + + + + + + + + Aggregation Scopes + spp.aggregation.scope + list,form + +

+ Create your first aggregation scope +

+

+ Scopes define what data to aggregate. Use CEL expressions, + administrative areas, spatial queries, or explicit registrant lists. +

+
+
+
diff --git a/spp_aggregation/views/menu.xml b/spp_aggregation/views/menu.xml new file mode 100644 index 00000000..6ba2d80c --- /dev/null +++ b/spp_aggregation/views/menu.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + diff --git a/spp_metrics_services/README.md b/spp_metrics_services/README.md new file mode 100644 index 00000000..cfd475bb --- /dev/null +++ b/spp_metrics_services/README.md @@ -0,0 +1,354 @@ +# OpenSPP Metrics Services + +Shared computation and caching services for metrics across OpenSPP modules. + +## Overview + +`spp_metrics_services` provides the core computation engine for all metrics in OpenSPP, +including population statistics, simulation outcomes, fairness analysis, and privacy +protection. These services are used by GIS, dashboards, simulations, and APIs. + +## Architecture + +``` +spp.aggregation.service (Main Entry Point) + │ + ├── spp.metrics.breakdown (Multi-dimensional grouping) + │ └── spp.metrics.dimension.cache (Performance optimization) + ├── spp.metrics.fairness (Equity analysis) + ├── spp.metrics.distribution (Statistical distributions) + └── spp.metrics.privacy (K-anonymity enforcement) +``` + +## Services + +### spp.aggregation.service + +**Main entry point** for all aggregation computations. + +**Key Method:** + +```python +compute_aggregation(scope, statistics=None, group_by=None, context=None) +``` + +**Arguments:** + +- `scope` - Dict defining the population (scope_type + params) +- `statistics` - List of metric names to compute (or None for defaults) +- `group_by` - List of dimension names for breakdown (max 3) +- `context` - Context string ('gis', 'api', 'dashboard', etc.) + +**Returns:** + +```python +{ + 'total_count': 123, + 'statistics': { + 'total_registrants': {'value': 123, 'suppressed': False}, + 'coverage_rate': {'value': 45.2, 'suppressed': False}, + }, + 'breakdown': { + 'dimensions': ['gender', 'age_group'], + 'groups': [ + { + 'dimension_values': {'gender': 'M', 'age_group': '18-24'}, + 'count': 15, + 'statistics': {...}, + }, + ... + ], + }, +} +``` + +**Example:** + +```python +service = env['spp.aggregation.service'] + +scope = { + 'scope_type': 'area', + 'area_id': 42, +} + +result = service.compute_aggregation( + scope=scope, + statistics=['total_registrants', 'coverage_rate'], + group_by=['gender', 'disability'], + context='gis', +) +``` + +### spp.metrics.breakdown + +Computes multi-dimensional breakdowns with caching. + +**Key Method:** + +```python +compute_breakdown(registrant_ids, group_by, statistics=None, context=None) +``` + +**Features:** + +- Supports up to 3 simultaneous dimensions +- Automatic caching via `spp.metrics.dimension.cache` +- Privacy enforcement on small groups + +**Example:** + +```python +breakdown_service = env['spp.metrics.breakdown'] + +result = breakdown_service.compute_breakdown( + registrant_ids=[1, 2, 3, 4, 5], + group_by=['gender', 'age_group'], + statistics=['total_registrants'], + context='gis', +) +``` + +### spp.metrics.fairness + +Computes fairness/equity metrics across demographic groups. + +**Key Method:** + +```python +compute_fairness(registrant_ids, base_domain=None, dimensions=None) +``` + +**Returns:** + +- Equity score (0-100, higher is more equitable) +- Disparity detection (boolean) +- Per-dimension analysis with max ratio + +**Metrics Computed:** + +- Representation ratio (actual / expected) +- Max disparity across all groups +- Coverage by dimension + +**Example:** + +```python +fairness_service = env['spp.metrics.fairness'] + +result = fairness_service.compute_fairness( + registrant_ids=[1, 2, 3], + base_domain=[('active', '=', True)], + dimensions=['gender', 'disability'], +) + +# Result: +# { +# 'equity_score': 85.5, +# 'has_disparity': True, +# 'by_dimension': { +# 'gender': { +# 'max_ratio': 1.8, +# 'groups': {...}, +# }, +# }, +# } +``` + +### spp.metrics.distribution + +Computes distribution statistics for numerical values. + +**Key Method:** + +```python +compute_distribution(amounts) +``` + +**Returns:** + +- Descriptive statistics (mean, median, min, max, std dev, variance) +- Percentiles (p10, p25, p50, p75, p90) +- Gini coefficient (inequality measure) +- Lorenz curve deciles (inequality visualization) + +**Example:** + +```python +distribution_service = env['spp.metrics.distribution'] + +amounts = [100, 200, 150, 300, 250] +stats = distribution_service.compute_distribution(amounts) + +# Result: +# { +# 'mean': 200.0, +# 'median': 200.0, +# 'gini': 0.18, +# 'percentiles': {'p50': 200.0, ...}, +# 'lorenz_curve': [0.0, 0.15, 0.32, ...], +# } +``` + +### spp.metrics.privacy + +Enforces k-anonymity privacy protection on aggregation results. + +**Key Methods:** + +```python +enforce(result, k_threshold=None, access_level="aggregate") +validate_access_level(user=None) +get_k_threshold(user=None, context=None) +``` + +**Features:** + +- K-anonymity with complementary suppression +- Access level enforcement (aggregate vs individual) +- Protection against differencing attacks +- Configurable thresholds per context + +**Example:** + +```python +privacy_service = env['spp.metrics.privacy'] + +result = { + 'total_count': 3, # Below threshold + 'statistics': {'total_registrants': {'value': 3}}, +} + +protected = privacy_service.enforce(result, k_threshold=10) + +# Result: +# { +# 'total_count': 0, # Suppressed +# 'statistics': {'total_registrants': {'value': 0, 'suppressed': True}}, +# 'suppressed': True, +# 'suppression_reason': 'Group size below k-anonymity threshold', +# } +``` + +### spp.metrics.dimension.cache + +Performance cache for dimension evaluations. + +**Key Methods:** + +```python +get_cached_dimension(dimension_id, registrant_ids) +cache_dimension_results(dimension_id, results) +invalidate_dimension(dimension_id) +``` + +**Features:** + +- 5-10x faster repeated evaluations +- Automatic invalidation on dimension changes +- Cache key: (dimension_id, write_date, registrant_ids hash) +- Automatic cleanup of stale entries + +**Cache Strategy:** + +```python +# First call: Evaluates CEL expression +result = breakdown.compute_breakdown(dimension_ids, registrant_ids) + +# Subsequent calls: Uses cached results +result = breakdown.compute_breakdown(dimension_ids, registrant_ids) # Fast! + +# After dimension.write(): Cache invalidated automatically +dimension.write({'cel_expression': 'new_expr'}) +result = breakdown.compute_breakdown(dimension_ids, registrant_ids) # Re-evaluates +``` + +## Dependencies + +- `base` - Odoo core +- `spp_metrics_core` - Base metric models +- `spp_cel_domain` - CEL expression support +- `spp_area` - Administrative areas +- `spp_registry` - Registrant/partner data + +## Used By + +- `spp_aggregation` - Delegates to these services +- `spp_statistic` - Statistics computation +- `spp_simulation` - Simulation metrics +- `spp_api_v2_gis` - GIS statistics API +- `spp_api_v2_simulation` - Simulation API + +## Migration from spp_aggregation + +These services were extracted from `spp_aggregation` to enable reuse across modules. +Model names remain unchanged for backward compatibility. + +**No code changes required** - Existing code continues to work: + +```python +# Still works +fairness = env['spp.metrics.fairness'] +distribution = env['spp.metrics.distribution'] +privacy = env['spp.metrics.privacy'] +breakdown = env['spp.metrics.breakdown'] +``` + +See [Migration Guide](../../docs/migration/statistics-refactoring.md) for details. + +## Performance Considerations + +### Caching + +Dimension cache eliminates redundant CEL evaluations: + +- **First call**: Evaluates CEL expression (~500ms for 10k registrants) +- **Cached calls**: Retrieves cached results (~50ms) +- **Invalidation**: Automatic on dimension.write() + +### Batch Processing + +For large populations (>10,000 registrants): + +```python +# Use explicit scope to avoid repeated CEL evaluation +scope = { + 'scope_type': 'explicit', + 'explicit_partner_ids': large_list_of_ids, +} + +result = service.compute_aggregation(scope=scope, ...) +``` + +### Privacy Overhead + +K-anonymity enforcement adds minimal overhead: + +- Group size check: O(1) +- Suppression logic: O(n) where n = number of groups +- Typical overhead: <1% of total computation time + +## Testing + +Run tests: + +```bash +./scripts/test_single_module.sh spp_metrics_services +``` + +Key test scenarios: + +- Service computation accuracy +- Cache hit/miss rates +- Privacy enforcement +- Multi-dimensional breakdowns +- Large population handling + +## Architecture Documentation + +See [Statistics System Architecture](../../docs/architecture/statistics-systems.md) for +the complete system design. + +## License + +LGPL-3 diff --git a/spp_metrics_services/__init__.py b/spp_metrics_services/__init__.py new file mode 100644 index 00000000..c4ccea79 --- /dev/null +++ b/spp_metrics_services/__init__.py @@ -0,0 +1,3 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from . import models diff --git a/spp_metrics_services/__manifest__.py b/spp_metrics_services/__manifest__.py new file mode 100644 index 00000000..6c12a338 --- /dev/null +++ b/spp_metrics_services/__manifest__.py @@ -0,0 +1,30 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +{ + "name": "OpenSPP Metrics Services", + "summary": "Shared services for fairness, distribution, breakdown, and privacy", + "category": "OpenSPP", + "version": "19.0.2.0.0", + "sequence": 1, + "author": "OpenSPP.org", + "website": "https://github.com/OpenSPP/OpenSPP2", + "license": "LGPL-3", + "development_status": "Alpha", + "maintainers": ["jeremi"], + "depends": [ + "base", + "spp_cel_domain", + "spp_area", + "spp_registry", + ], + "data": [ + "security/ir.model.access.csv", + "data/demographic_dimensions.xml", + "views/demographic_dimension_views.xml", + ], + "assets": {}, + "demo": [], + "images": [], + "application": False, + "installable": True, + "auto_install": False, +} diff --git a/spp_metrics_services/data/demographic_dimensions.xml b/spp_metrics_services/data/demographic_dimensions.xml new file mode 100644 index 00000000..862bb3f5 --- /dev/null +++ b/spp_metrics_services/data/demographic_dimensions.xml @@ -0,0 +1,78 @@ + + + + + gender + Gender + Gender identity from ISO 5218 vocabulary + 10 + field + gender_id + all + {"0": "Not Known", "1": "Male", "2": "Female", "9": "Not Applicable"} + + + + + disability_status + Disability Status + Whether the registrant has a recorded disability + 20 + expression + r.disability_id != null ? "pwd" : "non_pwd" + + individuals + {"pwd": "Persons with Disability", "non_pwd": "Without Disability"} + + + + + area + Administrative Area + Administrative area of the registrant + 30 + field + area_id + all + + + + + registrant_type + Registrant Type + Whether individual or group/household + 40 + field + is_group + all + {"true": "Group/Household", "false": "Individual"} + + + + + age_group + Age Group + Age group based on birth date + 50 + expression + + individuals + {"child": "Child (0-17)", "adult": "Adult (18-59)", "elderly": "Elderly (60+)", "unknown": "Unknown"} + + diff --git a/spp_metrics_services/models/__init__.py b/spp_metrics_services/models/__init__.py new file mode 100644 index 00000000..847bae6c --- /dev/null +++ b/spp_metrics_services/models/__init__.py @@ -0,0 +1,8 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from . import demographic_dimension +from . import dimension_cache +from . import fairness_service +from . import distribution_service +from . import privacy_service +from . import breakdown_service diff --git a/spp_metrics_services/models/breakdown_service.py b/spp_metrics_services/models/breakdown_service.py new file mode 100644 index 00000000..5f9b978e --- /dev/null +++ b/spp_metrics_services/models/breakdown_service.py @@ -0,0 +1,97 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging + +from odoo import api, models + +_logger = logging.getLogger(__name__) + + +class BreakdownService(models.AbstractModel): + """ + Service for computing breakdowns by demographic dimensions. + + Categorizes registrants by one or more dimensions and provides + counts and statistics per dimension combination. + """ + + _name = "spp.metrics.breakdown" + _description = "Breakdown Computation Service" + + @api.model + def compute_breakdown(self, registrant_ids, group_by, statistics=None, context=None): + """ + Compute breakdown by dimensions with caching. + + Uses dimension cache for 5-10x performance improvement. + + :param registrant_ids: List of partner IDs + :param group_by: List of dimension names + :param statistics: List of statistic names (optional) + :param context: Context string (optional) + :returns: Breakdown dictionary keyed by pipe-separated dimension values + :rtype: dict + + Returns: + { + "dimension1|dimension2|...": { + "count": int, + "statistics": {}, + "labels": { + "dimension_name": { + "value": str, + "display": str, + } + } + } + } + """ + if not registrant_ids or not group_by: + return {} + + # Get dimension records (use sudo - they're configuration data) + dimension_model = self.env["spp.demographic.dimension"].sudo() # nosemgrep: odoo-sudo-without-context + dimensions = [dimension_model.get_by_name(name) for name in group_by] + dimensions = [d for d in dimensions if d] # Filter out None + + if not dimensions: + return {} + + # Get cache service + cache_service = self.env["spp.metrics.dimension.cache"] + + # Get cached evaluations for all dimensions + dimension_evaluations = {} + for dimension in dimensions: + dimension_evaluations[dimension.name] = cache_service.evaluate_dimension_batch(dimension, registrant_ids) + + # Build breakdown using cached evaluations + breakdown = {} + for partner_id in registrant_ids: + # Build the breakdown key from cached evaluations + key_parts = [] + for dimension in dimensions: + value = dimension_evaluations[dimension.name].get(partner_id, "unknown") + key_parts.append(str(value)) + + key = "|".join(key_parts) + + if key not in breakdown: + breakdown[key] = { + "count": 0, + "statistics": {}, + "labels": {}, + } + # Store labels for each dimension + for dim, value in zip(dimensions, key_parts, strict=False): + breakdown[key]["labels"][dim.name] = { + "value": value, + "display": dim.get_label_for_value(value), + } + + breakdown[key]["count"] += 1 + + # Optionally compute statistics per cell (expensive) + # For now, just return counts + # TODO: Add per-cell statistics if needed + + return breakdown diff --git a/spp_metrics_services/models/demographic_dimension.py b/spp_metrics_services/models/demographic_dimension.py new file mode 100644 index 00000000..95fad6ef --- /dev/null +++ b/spp_metrics_services/models/demographic_dimension.py @@ -0,0 +1,279 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import json +import logging + +from odoo import _, api, fields, models +from odoo.exceptions import ValidationError + +_logger = logging.getLogger(__name__) + + +class DemographicDimension(models.Model): + """ + Configurable demographic dimensions for group_by breakdowns. + + Examples: + - gender: Field-based lookup on gender_id.code + - disability_status: CEL expression r.disability_id != null + - age_group: CEL expression age_bucket(r.birthdate) + - area: Field-based lookup on area_id + """ + + _name = "spp.demographic.dimension" + _description = "Demographic Dimension" + _order = "sequence, name" + + name = fields.Char( + required=True, + index=True, + help="Technical name for this dimension (e.g., 'gender', 'disability_status').", + ) + label = fields.Char( + string="Label", + required=True, + translate=True, + help="Human-readable label (e.g., 'Gender', 'Disability Status').", + ) + description = fields.Text( + help="Optional description of what this dimension represents.", + ) + sequence = fields.Integer( + default=10, + help="Display order in UI.", + ) + active = fields.Boolean( + default=True, + index=True, + ) + + dimension_type = fields.Selection( + selection=[ + ("field", "Model Field"), + ("expression", "CEL Expression"), + ], + required=True, + default="field", + help="How to evaluate this dimension for a registrant.", + ) + + # ------------------------------------------------------------------------- + # Field-based dimensions + # ------------------------------------------------------------------------- + field_path = fields.Char( + string="Field Path", + help=( + "Dot-notation path to the field value (e.g., 'gender_id.code', 'area_id.id'). " + "For direct fields, just use the field name (e.g., 'age')." + ), + ) + + # ------------------------------------------------------------------------- + # CEL-based dimensions + # ------------------------------------------------------------------------- + cel_expression = fields.Text( + string="CEL Expression", + help=( + "CEL expression that returns a category value for each registrant. " + "Use 'r' for the registrant record. " + "Example: age_bucket(r.birthdate) or r.disability_id != null ? 'pwd' : 'non_pwd'" + ), + ) + + # ------------------------------------------------------------------------- + # Value configuration + # ------------------------------------------------------------------------- + value_labels_json = fields.Json( + string="Value Labels", + help=('JSON mapping of raw values to display labels. Example: {"M": "Male", "F": "Female", "O": "Other"}'), + ) + default_value = fields.Char( + string="Default Value", + default="unknown", + help="Value to use when the dimension cannot be evaluated (null/missing).", + ) + + # ------------------------------------------------------------------------- + # Applicability + # ------------------------------------------------------------------------- + applies_to = fields.Selection( + selection=[ + ("all", "All Registrants"), + ("individuals", "Individuals Only"), + ("groups", "Groups Only"), + ], + default="all", + help="Which registrant types this dimension applies to.", + ) + + # ------------------------------------------------------------------------- + # Validation + # ------------------------------------------------------------------------- + _name_unique = models.Constraint( + "unique(name)", + "Dimension name must be unique.", + ) + + @api.constrains("dimension_type", "field_path") + def _check_field_path(self): + """Validate field path is provided for field-based dimensions.""" + for dim in self: + if dim.dimension_type == "field" and not dim.field_path: + raise ValidationError(_("Field path is required for field-based dimensions.")) + + @api.constrains("dimension_type", "cel_expression") + def _check_cel_expression(self): + """Validate CEL expression is provided for expression-based dimensions.""" + for dim in self: + if dim.dimension_type == "expression" and not dim.cel_expression: + raise ValidationError(_("CEL expression is required for expression-based dimensions.")) + + # ------------------------------------------------------------------------- + # Public API + # ------------------------------------------------------------------------- + def evaluate_for_record(self, record): + """ + Evaluate this dimension for a single registrant record. + + :param record: res.partner record + :returns: The dimension value (string) + :rtype: str + """ + self.ensure_one() + + # Check applicability + if self.applies_to == "individuals" and record.is_group: + return self.default_value or "n/a" + if self.applies_to == "groups" and not record.is_group: + return self.default_value or "n/a" + + try: + if self.dimension_type == "field": + return self._evaluate_field(record) + else: + return self._evaluate_expression(record) + except (ValueError, AttributeError, TypeError, KeyError) as e: + dimension_name = self.name + record_id = record.id + _logger.warning("Error evaluating dimension %s for record %s: %s", dimension_name, record_id, e) + return self.default_value or "error" + + def _evaluate_field(self, record): + """Evaluate a field-based dimension.""" + value = record + for part in self.field_path.split("."): + if value is None: + break + if hasattr(value, part): + value = getattr(value, part) + else: + value = None + break + + if value is None: + return self.default_value or "unknown" + + # Convert to string key + if hasattr(value, "id"): + # Many2one - use code or display_name for meaningful keys + if hasattr(value, "code") and value.code: + key = str(value.code) + else: + key = value.display_name or str(value.id) + elif isinstance(value, bool): + key = str(value).lower() + else: + key = str(value) if value else self.default_value + + return key + + def _evaluate_expression(self, record): + """Evaluate a CEL expression-based dimension.""" + try: + cel_service = self.env["spp.cel.service"].sudo() # nosemgrep: odoo-sudo-without-context + except KeyError: + dimension_name = self.name + _logger.warning("CEL service not available for dimension %s", dimension_name) + return self.default_value or "error" + + context = {"r": record, "me": record} + result = cel_service.evaluate_expression(self.cel_expression, context) + + if result is None: + return self.default_value or "unknown" + + # Convert bool to string category + if isinstance(result, bool): + return "true" if result else "false" + + return str(result) + + def get_label_for_value(self, value): + """ + Get the display label for a dimension value. + + :param value: The raw dimension value + :returns: Display label + :rtype: str + """ + self.ensure_one() + if not self.value_labels_json: + return value + + # Handle case where value_labels_json is a string (JSON not yet parsed) + labels = self.value_labels_json + if isinstance(labels, str): + try: + labels = json.loads(labels) + except (json.JSONDecodeError, TypeError): + return value + + # Convert value to string for lookup (keys are strings in JSON) + str_value = str(value) if value is not None else "null" + if str_value in labels: + return labels[str_value] + return value + + @api.model + def get_by_name(self, name): + """ + Get a dimension by its technical name. + + :param name: Technical name of the dimension + :returns: Dimension record or empty recordset + :rtype: spp.demographic.dimension + """ + return self.search([("name", "=", name), ("active", "=", True)], limit=1) + + @api.model + def get_active_dimensions(self, applies_to=None): + """ + Get all active dimensions, optionally filtered by applicability. + + :param applies_to: Filter by 'individuals', 'groups', or None for all + :returns: Recordset of dimensions + :rtype: spp.demographic.dimension + """ + domain = [("active", "=", True)] + if applies_to: + domain.append("|") + domain.append(("applies_to", "=", "all")) + domain.append(("applies_to", "=", applies_to)) + return self.search(domain, order="sequence, name") + + # ------------------------------------------------------------------------- + # Cache Invalidation + # ------------------------------------------------------------------------- + def write(self, vals): + """Clear cache when dimension configuration changes.""" + result = super().write(vals) + cache_service = self.env["spp.metrics.dimension.cache"] + for record in self: + cache_service.clear_dimension_cache(record.id) + return result + + def unlink(self): + """Clear cache when dimension is deleted.""" + cache_service = self.env["spp.metrics.dimension.cache"] + for record in self: + cache_service.clear_dimension_cache(record.id) + return super().unlink() diff --git a/spp_metrics_services/models/dimension_cache.py b/spp_metrics_services/models/dimension_cache.py new file mode 100644 index 00000000..8460b41c --- /dev/null +++ b/spp_metrics_services/models/dimension_cache.py @@ -0,0 +1,136 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import hashlib +import logging + +from odoo import api, models, tools + +_logger = logging.getLogger(__name__) + + +class DimensionCacheService(models.AbstractModel): + """ + Service for caching dimension evaluation results. + + Dramatically improves breakdown performance by caching: + - Field-based dimension evaluations (gender, disability, etc.) + - CEL expression evaluations (age groups, custom categories) + + Cache key: (dimension_id, dimension_write_date, hash(registrant_ids)) + Cache TTL: 1 hour (default Odoo ormcache) + Invalidation: On dimension write/unlink + """ + + _name = "spp.metrics.dimension.cache" + _description = "Dimension Evaluation Cache" + + @api.model + def _make_registrant_key(self, registrant_ids): + """ + Create a compact hash key from registrant IDs. + + Using a hash instead of frozenset avoids unbounded cache key growth + with large registrant populations (e.g., 100k+ records). + + :param registrant_ids: List or frozenset of registrant IDs + :returns: MD5 hash of sorted IDs + :rtype: str + """ + if isinstance(registrant_ids, frozenset): + registrant_ids = list(registrant_ids) + sorted_ids = sorted(registrant_ids) + id_str = ",".join(str(i) for i in sorted_ids) + return hashlib.md5(id_str.encode(), usedforsecurity=False).hexdigest() # nosec B324 + + @api.model + @tools.ormcache("dimension_id", "dimension_write_date", "registrant_ids_key") + def _compute_evaluations(self, dimension_id, dimension_write_date, registrant_ids_key, registrant_ids_list): + """ + Compute and cache dimension evaluations for a set of registrants. + + This method is cached via @ormcache. The cache key includes write_date + to auto-invalidate when dimension changes. + + :param dimension_id: ID of spp.demographic.dimension + :param dimension_write_date: dimension.write_date (for cache invalidation) + :param registrant_ids_key: Hash of registrant IDs (compact cache key) + :param registrant_ids_list: Actual list of registrant IDs (not cached) + :returns: Dictionary mapping registrant_id → category value + :rtype: dict + """ + dimension = self.env["spp.demographic.dimension"].sudo().browse(dimension_id) # nosemgrep: odoo-sudo-without-context # noqa: E501 # fmt: skip + if not dimension.exists(): + return {} + + result = {} + partner_model = self.env["res.partner"].sudo() # nosemgrep: odoo-sudo-without-context, odoo-sudo-on-sensitive-models # noqa: E501 # fmt: skip + partners = partner_model.browse(registrant_ids_list) + + for partner in partners: + value = dimension.evaluate_for_record(partner) + result[partner.id] = value + + return result + + @api.model + def evaluate_dimension_batch(self, dimension, registrant_ids): + """ + Evaluate dimension for a batch of registrants with caching. + + Uses @ormcache decorator for automatic caching and invalidation. + + :param dimension: spp.demographic.dimension record + :param registrant_ids: List of partner IDs + :returns: Dictionary mapping partner_id → category value + :rtype: dict + """ + if not dimension or not registrant_ids: + return {} + + # Create compact hash key from registrant IDs + registrant_ids_key = self._make_registrant_key(registrant_ids) + + # Get write_date for cache key + write_date = dimension.write_date.isoformat() if dimension.write_date else "none" + + # Call cached method - this will use cache if available + # Pass both hash key (for cache) and actual list (for computation) + result = self._compute_evaluations(dimension.id, write_date, registrant_ids_key, list(registrant_ids)) + + if result: + dimension_name = dimension.name + _logger.debug("Evaluated dimension %s for %d registrants", dimension_name, len(registrant_ids)) + + return result + + @api.model + def clear_dimension_cache(self, dimension_id=None): + """ + Clear dimension evaluation cache. + + Invalidates this model's cache, which is more targeted than clearing + the entire registry but still clears all ormcache entries for this model. + + :param dimension_id: Specific dimension to clear, or None for all + """ + if dimension_id: + _logger.info("Clearing cache for dimension ID %s", dimension_id) + else: + _logger.info("Clearing all dimension caches") + + # Invalidate this model's cache + # More targeted than self.env.registry.clear_cache() but still broad + # within this model. Granular per-dimension clearing would require + # restructuring the cache key to make dimension_id the primary key. + self.invalidate_model() + + @api.model + def warm_cache(self, dimensions, registrant_ids): + """ + Pre-warm cache for common dimension sets. + + :param dimensions: List of spp.demographic.dimension records + :param registrant_ids: List of partner IDs + """ + _logger.info("Warming cache for %d dimensions, %d registrants", len(dimensions), len(registrant_ids)) + for dimension in dimensions: + self.evaluate_dimension_batch(dimension, registrant_ids) diff --git a/spp_metrics_services/models/distribution_service.py b/spp_metrics_services/models/distribution_service.py new file mode 100644 index 00000000..37b58939 --- /dev/null +++ b/spp_metrics_services/models/distribution_service.py @@ -0,0 +1,179 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging +import math + +from odoo import api, models + +_logger = logging.getLogger(__name__) + + +class DistributionService(models.AbstractModel): + """ + Service for computing distribution statistics. + + Provides Gini coefficient, Lorenz curve, percentiles, and other + inequality metrics from a list of numerical values. + """ + + _name = "spp.metrics.distribution" + _description = "Distribution Computation Service" + + @api.model + def compute_distribution(self, amounts): + """ + Compute distribution statistics from a list of amounts. + + :param amounts: List of numerical values (e.g., benefit amounts) + :returns: Dictionary with distribution statistics + :rtype: dict + + Returns: + { + "count": int, + "total": float, + "minimum": float, + "maximum": float, + "mean": float, + "median": float, + "standard_deviation": float, + "percentiles": {"p10": float, "p25": float, "p50": float, "p75": float, "p90": float}, + "gini_coefficient": float, # 0.0 (perfect equality) to 1.0 (perfect inequality) + "lorenz_deciles": [{"population_share": int, "income_share": float}, ...] + } + """ + if not amounts: + return self._empty_distribution() + + sorted_amounts = sorted(amounts) + count = len(sorted_amounts) + total = sum(sorted_amounts) + minimum = sorted_amounts[0] + maximum = sorted_amounts[-1] + mean = total / count if count else 0 + + # Median + mid = count // 2 + if count % 2 == 0: + median = (sorted_amounts[mid - 1] + sorted_amounts[mid]) / 2 + else: + median = sorted_amounts[mid] + + # Standard deviation + variance = sum((x - mean) ** 2 for x in sorted_amounts) / count if count else 0 + standard_deviation = math.sqrt(variance) + + # Percentiles + percentiles = { + "p10": self._percentile(sorted_amounts, 10), + "p25": self._percentile(sorted_amounts, 25), + "p50": self._percentile(sorted_amounts, 50), + "p75": self._percentile(sorted_amounts, 75), + "p90": self._percentile(sorted_amounts, 90), + } + + # Gini coefficient + gini = self._compute_gini(sorted_amounts) + + # Lorenz curve deciles + lorenz_deciles = self._compute_lorenz_deciles(sorted_amounts, total) + + return { + "count": count, + "total": total, + "minimum": minimum, + "maximum": maximum, + "mean": mean, + "median": median, + "standard_deviation": standard_deviation, + "percentiles": percentiles, + "gini_coefficient": gini, + "lorenz_deciles": lorenz_deciles, + } + + @api.model + def _empty_distribution(self): + """Return empty distribution statistics.""" + return { + "count": 0, + "total": 0, + "minimum": 0, + "maximum": 0, + "mean": 0, + "median": 0, + "standard_deviation": 0, + "percentiles": {"p10": 0, "p25": 0, "p50": 0, "p75": 0, "p90": 0}, + "gini_coefficient": 0, + "lorenz_deciles": [], + } + + @api.model + def _percentile(self, sorted_values, percent): + """ + Compute the percentile value using linear interpolation. + + :param sorted_values: List of sorted numerical values + :param percent: Percentile to compute (0-100) + :returns: The percentile value + :rtype: float + """ + if not sorted_values: + return 0 + count = len(sorted_values) + rank = percent / 100.0 * (count - 1) + lower = int(rank) + upper = min(lower + 1, count - 1) + fraction = rank - lower + return sorted_values[lower] + fraction * (sorted_values[upper] - sorted_values[lower]) + + @api.model + def _compute_gini(self, sorted_amounts): + """ + Compute the Gini coefficient from sorted amounts. + + Uses the formula: G = (2 * sum(i * y_i) - (n + 1) * sum(y_i)) / (n * sum(y_i)) + where y_i are sorted in ascending order. + + :param sorted_amounts: List of amounts sorted in ascending order + :returns: Gini coefficient (0.0 to 1.0) + :rtype: float + """ + n = len(sorted_amounts) + if n == 0: + return 0.0 + total = sum(sorted_amounts) + if total == 0: + return 0.0 + + weighted_sum = sum((i + 1) * y for i, y in enumerate(sorted_amounts)) + gini = (2.0 * weighted_sum - (n + 1) * total) / (n * total) + return max(0.0, min(1.0, gini)) + + @api.model + def _compute_lorenz_deciles(self, sorted_amounts, total): + """ + Compute Lorenz curve points at each decile (10%, 20%, ..., 100%). + + :param sorted_amounts: List of amounts sorted in ascending order + :param total: Sum of all amounts + :returns: List of decile points with population_share and income_share + :rtype: list[dict] + """ + if not sorted_amounts or total == 0: + return [] + + n = len(sorted_amounts) + deciles = [] + cumulative = 0.0 + + for decile in range(1, 11): + cutoff_index = int(n * decile / 10) - 1 + cutoff_index = max(0, min(cutoff_index, n - 1)) + cumulative = sum(sorted_amounts[: cutoff_index + 1]) + deciles.append( + { + "population_share": decile * 10, + "income_share": round(cumulative / total * 100, 2), + } + ) + + return deciles diff --git a/spp_metrics_services/models/fairness_service.py b/spp_metrics_services/models/fairness_service.py new file mode 100644 index 00000000..3c60b8e3 --- /dev/null +++ b/spp_metrics_services/models/fairness_service.py @@ -0,0 +1,534 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging + +from odoo import api, models + +_logger = logging.getLogger(__name__) + +# Thresholds for disparity detection +DISPARITY_THRESHOLD = 0.80 +DISPARITY_WARNING_THRESHOLD = 0.70 + + +class FairnessService(models.AbstractModel): + """ + Service for computing fairness/parity metrics across demographic groups. + + Analyzes whether targeting reaches different demographic groups proportionally. + Uses configurable DemographicDimension records for analysis. + """ + + _name = "spp.metrics.fairness" + _description = "Fairness Analysis Service" + + @api.model + def compute_fairness(self, registrant_ids, base_domain=None, dimensions=None): + """ + Compute fairness metrics for a set of registrants. + + Analyzes disparity across demographic dimensions: + - Gender + - Disability status + - Age group + - And other configured dimensions + + :param registrant_ids: List of partner IDs (beneficiaries) + :param base_domain: Domain for the population baseline + :param dimensions: List of dimension names to analyze (or None for all) + :returns: Dictionary with equity score and per-attribute breakdowns + :rtype: dict + """ + if not registrant_ids: + return self._empty_fairness() + + beneficiary_set = set(registrant_ids) + total_beneficiaries = len(beneficiary_set) + + # Get population baseline + partner_model = self.env["res.partner"] + population_domain = base_domain or [("is_registrant", "=", True)] + total_population = partner_model.search_count(population_domain) + + if total_population == 0: + return self._empty_fairness() + + overall_coverage = total_beneficiaries / total_population + + # Get dimensions to analyze + dimension_records = self._get_dimensions(dimensions) + + attributes = {} + equity_score = 100.0 + has_disparity = False + + # Analyze each demographic dimension + for dimension in dimension_records: + attr_result = self._analyze_dimension( + dimension, + beneficiary_set, + population_domain, + overall_coverage, + partner_model, + ) + if attr_result: + attributes[dimension.name] = attr_result + worst_ratio = attr_result.get("worst_ratio", 1.0) + + if worst_ratio < DISPARITY_WARNING_THRESHOLD: + equity_score -= 20 + has_disparity = True + elif worst_ratio < DISPARITY_THRESHOLD: + equity_score -= 5 + has_disparity = True + + equity_score = max(0.0, equity_score) + + return { + "equity_score": equity_score, + "has_disparity": has_disparity, + "overall_coverage": overall_coverage, + "total_beneficiaries": total_beneficiaries, + "total_population": total_population, + "attributes": attributes, + } + + @api.model + def _empty_fairness(self): + """Return empty fairness result.""" + return { + "equity_score": 100.0, + "has_disparity": False, + "overall_coverage": 0, + "total_beneficiaries": 0, + "total_population": 0, + "attributes": {}, + } + + def _get_dimensions(self, dimension_names=None): + """ + Get dimension records to analyze. + + Uses sudo() since dimensions are configuration data that should be + readable by all users who can compute aggregations. + + :param dimension_names: List of dimension names, or None for all active + :returns: Recordset of dimensions + """ + # Use sudo() - dimensions are configuration data, like menu items + dimension_model = self.env["spp.demographic.dimension"].sudo() # nosemgrep: odoo-sudo-without-context + if dimension_names: + return dimension_model.search( + [ + ("name", "in", dimension_names), + ("active", "=", True), + ] + ) + return dimension_model.search([("active", "=", True)]) + + def _analyze_dimension( + self, + dimension, + beneficiary_set, + base_domain, + overall_coverage, + partner_model, + ): + """ + Analyze a single demographic dimension for fairness. + + :param dimension: spp.demographic.dimension record + :param beneficiary_set: Set of beneficiary partner IDs + :param base_domain: Population domain + :param overall_coverage: Overall coverage rate + :param partner_model: res.partner model + :returns: Dictionary with analysis results or None if not applicable + """ + if dimension.dimension_type == "field": + return self._analyze_field_dimension( + dimension, beneficiary_set, base_domain, overall_coverage, partner_model + ) + else: + return self._analyze_expression_dimension( + dimension, beneficiary_set, base_domain, overall_coverage, partner_model + ) + + def _analyze_field_dimension( + self, + dimension, + beneficiary_set, + base_domain, + overall_coverage, + partner_model, + ): + """Analyze a field-based dimension (e.g., gender_id.code).""" + field_path = dimension.field_path + if not field_path: + return None + + # Parse field path + parts = field_path.split(".") + base_field = parts[0] + + # Check if field exists + if base_field not in partner_model._fields: + dimension_name = dimension.name + _logger.warning("Field %s not found on res.partner for dimension %s", base_field, dimension_name) + return None + + field = partner_model._fields[base_field] + + # Handle Many2one fields + if field.type == "many2one": + return self._analyze_many2one_dimension( + dimension, base_field, beneficiary_set, base_domain, overall_coverage, partner_model + ) + + # Handle Selection fields + if field.type == "selection": + return self._analyze_selection_dimension( + dimension, base_field, beneficiary_set, base_domain, overall_coverage, partner_model + ) + + # Handle Boolean fields + if field.type == "boolean": + return self._analyze_boolean_dimension( + dimension, base_field, beneficiary_set, base_domain, overall_coverage, partner_model + ) + + dimension_name = dimension.name + _logger.warning("Unsupported field type %s for dimension %s", field.type, dimension_name) + return None + + def _analyze_many2one_dimension( + self, + dimension, + field_name, + beneficiary_set, + base_domain, + overall_coverage, + partner_model, + ): + """Analyze a Many2one field dimension. + + Uses read_group to count population and beneficiaries per group in two + database queries rather than loading all records into memory. + """ + group_results = [] + worst_ratio = 1.0 + + # Population counts per group - one query + population_groups = partner_model.read_group(base_domain, [field_name], [field_name]) + # Beneficiary counts per group - one query + beneficiary_domain = base_domain + [("id", "in", list(beneficiary_set))] + beneficiary_groups = partner_model.read_group(beneficiary_domain, [field_name], [field_name]) + + # Build lookup dict from group value -> beneficiary count + # read_group returns the Many2one field as (id, display_name) tuple or False + beneficiary_counts = {} + for row in beneficiary_groups: + value = row[field_name] + value_id = value[0] if value else False + beneficiary_counts[value_id] = row[f"{field_name}_count"] + + for row in population_groups: + value = row[field_name] + if not value: + continue + value_id, display_name = value + group_total = row[f"{field_name}_count"] + + if group_total == 0: + continue + + group_beneficiaries = beneficiary_counts.get(value_id, 0) + group_coverage = group_beneficiaries / group_total + + disparity_ratio = self._compute_disparity_ratio(group_coverage, overall_coverage) + worst_ratio = min(worst_ratio, disparity_ratio) + + status = self._get_disparity_status(disparity_ratio) + # For Many2one fields, prefer display_name over dimension label mapping + # since label mappings may use codes rather than database IDs + label = display_name or dimension.get_label_for_value(str(value_id)) + + group_results.append( + { + "key": str(value_id), + "label": label, + "population": group_total, + "beneficiaries": group_beneficiaries, + "coverage": round(group_coverage * 100, 2), + "disparity_ratio": round(disparity_ratio, 4), + "status": status, + } + ) + + if not group_results: + return None + + return { + "attribute": dimension.name, + "display_name": dimension.label, + "groups": group_results, + "worst_ratio": worst_ratio, + "has_disparity": worst_ratio < DISPARITY_THRESHOLD, + } + + def _analyze_selection_dimension( + self, + dimension, + field_name, + beneficiary_set, + base_domain, + overall_coverage, + partner_model, + ): + """Analyze a Selection field dimension. + + Uses read_group to count population and beneficiaries per group in two + database queries rather than one search_count pair per selection value. + """ + group_results = [] + worst_ratio = 1.0 + + field = partner_model._fields[field_name] + selection = field.selection + if callable(selection): + selection = selection(partner_model) + # Build label lookup from the field's selection values + label_by_key = dict(selection) + + # Population counts per group - one query + population_groups = partner_model.read_group(base_domain, [field_name], [field_name]) + # Beneficiary counts per group - one query + beneficiary_domain = base_domain + [("id", "in", list(beneficiary_set))] + beneficiary_groups = partner_model.read_group(beneficiary_domain, [field_name], [field_name]) + + # Build lookup dict from selection key -> beneficiary count + beneficiary_counts = {row[field_name]: row[f"{field_name}_count"] for row in beneficiary_groups} + + for row in population_groups: + key = row[field_name] + if key is False: + continue + group_total = row[f"{field_name}_count"] + + if group_total == 0: + continue + + group_beneficiaries = beneficiary_counts.get(key, 0) + group_coverage = group_beneficiaries / group_total + + disparity_ratio = self._compute_disparity_ratio(group_coverage, overall_coverage) + worst_ratio = min(worst_ratio, disparity_ratio) + + status = self._get_disparity_status(disparity_ratio) + display_label = dimension.get_label_for_value(key) or label_by_key.get(key, key) + + group_results.append( + { + "key": key, + "label": display_label, + "population": group_total, + "beneficiaries": group_beneficiaries, + "coverage": round(group_coverage * 100, 2), + "disparity_ratio": round(disparity_ratio, 4), + "status": status, + } + ) + + if not group_results: + return None + + return { + "attribute": dimension.name, + "display_name": dimension.label, + "groups": group_results, + "worst_ratio": worst_ratio, + "has_disparity": worst_ratio < DISPARITY_THRESHOLD, + } + + def _analyze_boolean_dimension( + self, + dimension, + field_name, + beneficiary_set, + base_domain, + overall_coverage, + partner_model, + ): + """Analyze a Boolean field dimension. + + Uses read_group to count population and beneficiaries per group in two + database queries rather than one search_count pair per boolean value. + """ + group_results = [] + worst_ratio = 1.0 + + # Population counts per group - one query + population_groups = partner_model.read_group(base_domain, [field_name], [field_name]) + # Beneficiary counts per group - one query + beneficiary_domain = base_domain + [("id", "in", list(beneficiary_set))] + beneficiary_groups = partner_model.read_group(beneficiary_domain, [field_name], [field_name]) + + # Build lookup dict from boolean value -> beneficiary count + beneficiary_counts = {row[field_name]: row[f"{field_name}_count"] for row in beneficiary_groups} + + for row in population_groups: + value = row[field_name] + group_total = row[f"{field_name}_count"] + + if group_total == 0: + continue + + group_beneficiaries = beneficiary_counts.get(value, 0) + group_coverage = group_beneficiaries / group_total + + disparity_ratio = self._compute_disparity_ratio(group_coverage, overall_coverage) + worst_ratio = min(worst_ratio, disparity_ratio) + + status = self._get_disparity_status(disparity_ratio) + key = "true" if value else "false" + label = dimension.get_label_for_value(key) or ("Yes" if value else "No") + + group_results.append( + { + "key": key, + "label": label, + "population": group_total, + "beneficiaries": group_beneficiaries, + "coverage": round(group_coverage * 100, 2), + "disparity_ratio": round(disparity_ratio, 4), + "status": status, + } + ) + + if not group_results: + return None + + return { + "attribute": dimension.name, + "display_name": dimension.label, + "groups": group_results, + "worst_ratio": worst_ratio, + "has_disparity": worst_ratio < DISPARITY_THRESHOLD, + } + + def _analyze_expression_dimension( + self, + dimension, + beneficiary_set, + base_domain, + overall_coverage, + partner_model, + ): + """ + Analyze a CEL expression-based dimension. + + Evaluates the expression for each registrant to categorize them, + then computes fairness metrics per category. + + Note: CEL expressions must be evaluated per-record, so some iteration + is unavoidable. We optimize by batching the domain search. + """ + cel_service = self.env.get("spp.cel.service") + if not cel_service: + dimension_name = dimension.name + _logger.warning("CEL service not available for dimension %s", dimension_name) + return None + + # Categorize all registrants in population + # Prefetch all records at once to minimize queries + population = partner_model.search(base_domain) + categories = {} + + # Check if cache service available for batch evaluation + cache_service = self.env.get("spp.metrics.dimension.cache") + if cache_service: + # Use batch evaluation with caching + evaluations = cache_service.evaluate_dimension_batch(dimension, population.ids) + for partner_id in population.ids: + category = evaluations.get(partner_id, dimension.default_value or "unknown") + if category not in categories: + categories[category] = {"population": 0, "beneficiaries": 0} + categories[category]["population"] += 1 + if partner_id in beneficiary_set: + categories[category]["beneficiaries"] += 1 + else: + # Fallback: evaluate per-record (slower but works without cache) + for partner in population: + category = dimension.evaluate_for_record(partner) + if category not in categories: + categories[category] = {"population": 0, "beneficiaries": 0} + categories[category]["population"] += 1 + if partner.id in beneficiary_set: + categories[category]["beneficiaries"] += 1 + + if not categories: + return None + + group_results = [] + worst_ratio = 1.0 + + for category_key, counts in categories.items(): + population_count = counts["population"] + beneficiary_count = counts["beneficiaries"] + + if population_count == 0: + continue + + group_coverage = beneficiary_count / population_count + disparity_ratio = self._compute_disparity_ratio(group_coverage, overall_coverage) + worst_ratio = min(worst_ratio, disparity_ratio) + + status = self._get_disparity_status(disparity_ratio) + label = dimension.get_label_for_value(category_key) or category_key + + group_results.append( + { + "key": category_key, + "label": label, + "population": population_count, + "beneficiaries": beneficiary_count, + "coverage": round(group_coverage * 100, 2), + "disparity_ratio": round(disparity_ratio, 4), + "status": status, + } + ) + + return { + "attribute": dimension.name, + "display_name": dimension.label, + "groups": group_results, + "worst_ratio": worst_ratio, + "has_disparity": worst_ratio < DISPARITY_THRESHOLD, + } + + @api.model + def _compute_disparity_ratio(self, group_coverage, overall_coverage): + """ + Compute the disparity ratio for a group. + + :param group_coverage: Coverage rate for the group (0.0 to 1.0) + :param overall_coverage: Overall coverage rate (0.0 to 1.0) + :returns: Disparity ratio (group_coverage / overall_coverage) + :rtype: float + """ + if overall_coverage == 0: + return 0.0 + return group_coverage / overall_coverage + + @api.model + def _get_disparity_status(self, disparity_ratio): + """ + Get the disparity status label for a ratio. + + :param disparity_ratio: The disparity ratio + :returns: Status string + :rtype: str + """ + if disparity_ratio < DISPARITY_WARNING_THRESHOLD: + return "under_represented" + elif disparity_ratio < DISPARITY_THRESHOLD: + return "low_coverage" + return "proportional" diff --git a/spp_metrics_services/models/privacy_service.py b/spp_metrics_services/models/privacy_service.py new file mode 100644 index 00000000..341d5789 --- /dev/null +++ b/spp_metrics_services/models/privacy_service.py @@ -0,0 +1,398 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import logging + +from odoo import api, models + +_logger = logging.getLogger(__name__) + + +class PrivacyEnforcerService(models.AbstractModel): + """ + Service for enforcing privacy protections on aggregation results. + + Implements k-anonymity with complementary suppression to prevent + differencing attacks. When a cell is suppressed due to low count, + at least one sibling cell is also suppressed to prevent derivation. + + Example of differencing attack prevention: + - Area A total = 1000 + - Area A, Male = 995 → if not suppressed, Female = 5 can be derived + - Solution: If Female is suppressed, suppress at least one Male sibling + + Also handles access level enforcement (aggregate vs individual). + """ + + _name = "spp.metrics.privacy" + _description = "Privacy Enforcement Service" + + DEFAULT_K_THRESHOLD = 5 + + @api.model + def enforce(self, result, k_threshold=None, access_level="aggregate"): + """ + Apply privacy protections to aggregation results. + + :param result: Dictionary with aggregation results + :param k_threshold: Minimum count before suppression (default: 5) + :param access_level: "aggregate" or "individual" + :returns: Privacy-protected result dictionary + :rtype: dict + """ + if k_threshold is None: + k_threshold = self.DEFAULT_K_THRESHOLD + + result = dict(result) # Don't modify original + + # Apply access level restrictions + if access_level == "aggregate": + result = self._strip_individual_ids(result) + + # Apply k-anonymity to breakdowns + if "breakdown" in result: + result["breakdown"] = self._apply_k_anonymity(result["breakdown"], k_threshold) + + return result + + def _strip_individual_ids(self, result): + """ + Remove any individual record IDs from results. + + :param result: Result dictionary + :returns: Result with IDs removed + :rtype: dict + """ + result = dict(result) + + # Remove top-level IDs + for key in ("registrant_ids", "partner_ids", "ids"): + result.pop(key, None) + + # Remove IDs from breakdown cells + if "breakdown" in result: + for cell_data in result["breakdown"].values(): + if isinstance(cell_data, dict): + for id_key in ("registrant_ids", "partner_ids", "ids"): + cell_data.pop(id_key, None) + + return result + + def _apply_k_anonymity(self, breakdown, k_threshold): + """ + Apply k-anonymity with complementary suppression. + + Uses dimension-aware complementary suppression to prevent derivation attacks: + for each suppressed cell, we check each dimension and ensure at least one + sibling in each dimension slice is also suppressed. This prevents derivation + from ANY marginal total. + + :param breakdown: Dictionary of breakdown cells + :param k_threshold: Minimum count threshold + :returns: Suppressed breakdown dictionary + :rtype: dict + """ + if not breakdown: + return breakdown + + breakdown = {k: dict(v) if isinstance(v, dict) else v for k, v in breakdown.items()} + + # Step 1: Mark cells below threshold (primary suppression) + suppressed_keys = set() + for key, cell in breakdown.items(): + if isinstance(cell, dict): + count = cell.get("count", 0) + if isinstance(count, int) and count < k_threshold: + suppressed_keys.add(key) + + # Step 2: Complementary suppression (dimension-aware) + # For each suppressed cell, check EACH dimension slice + for suppressed_key in list(suppressed_keys): + parts = suppressed_key.split("|") + num_dims = len(parts) + + if num_dims == 1: + # Single dimension - use simple sibling logic + siblings = self._find_siblings(suppressed_key, breakdown) + non_suppressed = [s for s in siblings if s not in suppressed_keys] + if len(non_suppressed) == 1: + suppressed_keys.add(non_suppressed[0]) + elif len(non_suppressed) > 1: + smallest = self._get_smallest_sibling(non_suppressed, breakdown) + if smallest: + suppressed_keys.add(smallest) + else: + # Multi-dimensional - check each dimension slice + for dim_idx in range(num_dims): + # Find siblings that share this dimension value + slice_siblings = self._find_dimension_siblings(suppressed_key, dim_idx, breakdown) + non_suppressed = [s for s in slice_siblings if s not in suppressed_keys] + + # If only one cell left in this slice, must suppress it + if len(non_suppressed) == 1: + suppressed_keys.add(non_suppressed[0]) + elif len(non_suppressed) > 1: + # Check if this slice needs any suppression yet + already_suppressed_in_slice = any( + s in suppressed_keys and s != suppressed_key for s in slice_siblings + ) + if not already_suppressed_in_slice: + smallest = self._get_smallest_sibling(non_suppressed, breakdown) + if smallest: + suppressed_keys.add(smallest) + + # Step 3: Apply suppression + for key in suppressed_keys: + if key in breakdown: + original_cell = breakdown[key] + if isinstance(original_cell, dict): + breakdown[key] = { + "count": f"<{k_threshold}", + "suppressed": True, + "statistics": {}, # No statistics for suppressed cells + "original_key": key, + } + # Preserve any non-sensitive metadata + for meta_key in ("label", "display_name"): + if meta_key in original_cell: + breakdown[key][meta_key] = original_cell[meta_key] + + return breakdown + + def _find_dimension_siblings(self, key, dim_idx, breakdown): + """ + Find sibling cells that share the same value at a specific dimension. + + For key "male|urban" and dim_idx=0, this finds cells like "male|rural" + (same first dimension "male", different other dimensions). + + :param key: Cell key (pipe-separated dimensions) + :param dim_idx: Index of dimension that must match + :param breakdown: Breakdown dictionary + :returns: List of sibling keys (excludes the key itself) + :rtype: list[str] + """ + parts = key.split("|") + if dim_idx >= len(parts): + return [] + + target_value = parts[dim_idx] + siblings = [] + + for other_key in breakdown: + if other_key == key: + continue + other_parts = other_key.split("|") + # Must have same number of dimensions + if len(other_parts) != len(parts): + continue + # Must have same value at the target dimension + if other_parts[dim_idx] == target_value: + siblings.append(other_key) + + return siblings + + def _find_cells_in_slice(self, key, dim_idx, breakdown): + """ + Find all cells that share the same value at a specific dimension index. + + For example, for key "male|urban" and dim_idx=0, this finds all cells + starting with "male" (male|urban, male|rural, etc.). + + :param key: Cell key (pipe-separated dimensions) + :param dim_idx: Index of dimension to match + :param breakdown: Breakdown dictionary + :returns: List of keys in this dimension slice + :rtype: list[str] + """ + parts = key.split("|") + if dim_idx >= len(parts): + return [] + + target_value = parts[dim_idx] + slice_cells = [] + + for other_key in breakdown: + other_parts = other_key.split("|") + # Must have same number of dimensions and same value at dim_idx + if len(other_parts) == len(parts) and other_parts[dim_idx] == target_value: + slice_cells.append(other_key) + + return slice_cells + + def _find_siblings(self, key, breakdown): + """ + Find sibling cells that share all but one dimension. + + Siblings are cells that: + - Have the same number of dimension parts + - Differ by exactly one part + + :param key: Cell key (pipe-separated dimensions) + :param breakdown: Breakdown dictionary + :returns: List of sibling keys + :rtype: list[str] + """ + parts = key.split("|") + siblings = [] + + for other_key in breakdown: + if other_key == key: + continue + + other_parts = other_key.split("|") + + # Same number of dimensions + if len(other_parts) != len(parts): + continue + + # Differ by exactly one part + diff_count = sum(1 for a, b in zip(parts, other_parts, strict=False) if a != b) + if diff_count == 1: + siblings.append(other_key) + + return siblings + + def _get_smallest_sibling(self, siblings, breakdown): + """ + Get the sibling with the smallest count. + + :param siblings: List of sibling keys + :param breakdown: Breakdown dictionary + :returns: Key of smallest sibling, or None + :rtype: str or None + """ + smallest_key = None + smallest_count = float("inf") + + for sibling_key in siblings: + cell = breakdown.get(sibling_key) + if isinstance(cell, dict): + count = cell.get("count", 0) + if isinstance(count, int) and count < smallest_count: + smallest_count = count + smallest_key = sibling_key + + return smallest_key + + @api.model + def is_count_suppressed(self, count, k_threshold=None): + """ + Check if a count should be suppressed. + + :param count: The count value + :param k_threshold: Minimum threshold (default: 5) + :returns: True if count should be suppressed + :rtype: bool + """ + if k_threshold is None: + k_threshold = self.DEFAULT_K_THRESHOLD + + if not isinstance(count, int): + return False + + return count < k_threshold + + @api.model + def format_suppressed_count(self, count, k_threshold=None, display_mode="less_than"): + """ + Format a suppressed count for display. + + :param count: The count value + :param k_threshold: Minimum threshold + :param display_mode: "null", "asterisk", or "less_than" + :returns: Formatted display value + :rtype: str or None + """ + if k_threshold is None: + k_threshold = self.DEFAULT_K_THRESHOLD + + if not self.is_count_suppressed(count, k_threshold): + return str(count) + + if display_mode == "null": + return None + elif display_mode == "asterisk": + return "*" + else: # less_than + return f"<{k_threshold}" + + @api.model + def suppress_value(self, value, count, k_threshold=None, stat_config=None): + """ + Unified suppression with precedence: max(user_threshold, stat_threshold). + + Single source of truth for all suppression decisions. + + :param value: The computed value to potentially suppress + :param count: The underlying count for suppression check + :param k_threshold: User-level k-anonymity threshold (from access rules) + :param stat_config: Optional dict with per-statistic config: + - minimum_count: Statistic-level threshold + - suppression_display: How to display ('null', 'asterisk', 'less_than') + :returns: Tuple of (display_value, is_suppressed) + :rtype: tuple + """ + if value is None: + return value, False + + # Determine effective threshold with precedence + base_threshold = k_threshold or self.DEFAULT_K_THRESHOLD + effective_threshold = base_threshold + display_mode = "less_than" + + if stat_config: + stat_threshold = stat_config.get("minimum_count") or 0 + if stat_threshold: + effective_threshold = max(effective_threshold, stat_threshold) + display_mode = stat_config.get("suppression_display") or display_mode + + # Check suppression + if self.is_count_suppressed(count, effective_threshold): + formatted = self.format_suppressed_count(count, k_threshold=effective_threshold, display_mode=display_mode) + return formatted, True + + return value, False + + @api.model + def validate_access_level(self, user=None): + """ + Determine the access level for a user. + + :param user: res.users record (defaults to current user) + :returns: "aggregate" or "individual" + :rtype: str + """ + user = user or self.env.user + + # Check for access rule (use sudo for internal security check) + # Use defensive lookup - model may not be installed + access_rule_model = self.env.get("spp.aggregation.access.rule") + if access_rule_model is not None: + rule = access_rule_model.sudo().get_effective_rule_for_user(user) # nosemgrep: odoo-sudo-without-context + if rule: + return rule.access_level + + # Default to aggregate-only for safety + return "aggregate" + + @api.model + def get_k_threshold(self, user=None, context=None): + """ + Get the k-anonymity threshold for a user/context. + + :param user: res.users record (defaults to current user) + :param context: Optional context string (e.g., "api", "dashboard") + :returns: k threshold value + :rtype: int + """ + user = user or self.env.user + + # Check for access rule (use sudo for internal security check) + # Use defensive lookup - model may not be installed + access_rule_model = self.env.get("spp.aggregation.access.rule") + if access_rule_model is not None: + rule = access_rule_model.sudo().get_effective_rule_for_user(user) # nosemgrep: odoo-sudo-without-context + if rule: + return rule.minimum_k_anonymity + + # Default threshold + return self.DEFAULT_K_THRESHOLD diff --git a/spp_metrics_services/pyproject.toml b/spp_metrics_services/pyproject.toml new file mode 100644 index 00000000..4231d0cc --- /dev/null +++ b/spp_metrics_services/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["whool"] +build-backend = "whool.buildapi" diff --git a/spp_metrics_services/security/ir.model.access.csv b/spp_metrics_services/security/ir.model.access.csv new file mode 100644 index 00000000..92e14342 --- /dev/null +++ b/spp_metrics_services/security/ir.model.access.csv @@ -0,0 +1,3 @@ +id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink +access_spp_demographic_dimension_read,spp.demographic.dimension read,model_spp_demographic_dimension,base.group_user,1,0,0,0 +access_spp_demographic_dimension_write,spp.demographic.dimension write,model_spp_demographic_dimension,base.group_system,1,1,1,1 diff --git a/spp_metrics_services/tests/__init__.py b/spp_metrics_services/tests/__init__.py new file mode 100644 index 00000000..e8f6cd1e --- /dev/null +++ b/spp_metrics_services/tests/__init__.py @@ -0,0 +1,4 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from . import test_services +from . import test_dimension_cache diff --git a/spp_metrics_services/tests/test_dimension_cache.py b/spp_metrics_services/tests/test_dimension_cache.py new file mode 100644 index 00000000..e9a9151b --- /dev/null +++ b/spp_metrics_services/tests/test_dimension_cache.py @@ -0,0 +1,221 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +import time + +from odoo.tests import TransactionCase, tagged + + +@tagged("post_install", "-at_install") +class TestDimensionCache(TransactionCase): + """Test dimension evaluation caching for performance.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.cache_service = cls.env["spp.metrics.dimension.cache"] + cls.dimension_model = cls.env["spp.demographic.dimension"] + cls.partner_model = cls.env["res.partner"] + + # Create test dimension (field-based using is_group boolean field) + cls.registrant_type_dimension = cls.dimension_model.create( + { + "name": "test_registrant_type_cache", + "label": "Registrant Type (Cache Test)", + "dimension_type": "field", + "field_path": "is_group", + } + ) + + # Create test registrants + cls.registrants = [] + for i in range(100): # Create 100 registrants for performance testing + registrant = cls.partner_model.create( + { + "name": f"Test Registrant {i}", + "is_registrant": True, + "is_group": i % 2 == 0, # Alternate between groups and individuals + } + ) + cls.registrants.append(registrant) + + cls.registrant_ids = [r.id for r in cls.registrants] + + def test_cache_basic_functionality(self): + """Test that cache stores and retrieves evaluations correctly.""" + # First call should compute and cache + result1 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids) + + self.assertEqual(len(result1), 100, "Should evaluate all 100 registrants") + self.assertIn(self.registrants[0].id, result1) + + # Second call should hit cache + result2 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids) + + # Results should be identical + self.assertEqual(result1, result2, "Cached result should match original") + + def test_cache_performance(self): + """Test that caching improves performance significantly.""" + # Clear cache first + self.cache_service.clear_dimension_cache() + + # First call (cache miss) - measure time + start = time.time() + result1 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids) + time_uncached = time.time() - start + + # Second call (cache hit) - measure time + start = time.time() + result2 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids) + time_cached = time.time() - start + + # Cache should be faster + # Note: With only 100 records, the speedup might be modest + # In production with 10k+ records, expect 5-10x speedup + self.assertTrue( + time_cached < time_uncached or time_cached < 0.1, + f"Cache should be fast: {time_cached:.4f}s vs {time_uncached:.4f}s", + ) + + # Results should be identical + self.assertEqual(result1, result2) + + def test_cache_invalidation_on_write(self): + """Test that cache is invalidated when dimension is modified.""" + # Populate cache + result1 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:10]) + + # Modify dimension + self.registrant_type_dimension.write({"label": "Gender Modified"}) + + # Evaluate again - should compute fresh (cache cleared) + result2 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:10]) + + # Results should still be valid (same values, just re-computed) + self.assertEqual(len(result2), 10) + self.assertEqual(result1, result2, "Results should be same after re-computation") + + def test_cache_invalidation_on_unlink(self): + """Test that cache is invalidated when dimension is deleted.""" + # Create temporary dimension + temp_dimension = self.dimension_model.create( + { + "name": "temp_dimension", + "label": "Temporary Dimension", + "dimension_type": "field", + "field_path": "name", + } + ) + + # Populate cache + result1 = self.cache_service.evaluate_dimension_batch(temp_dimension, self.registrant_ids[:5]) + self.assertEqual(len(result1), 5) + + # Delete dimension (should clear cache) + temp_dimension.unlink() + + # Cache should be cleared (we can't verify directly, but ensure no errors) + # Just verify the service is still functional + result2 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:5]) + self.assertEqual(len(result2), 5) + + def test_cache_with_different_registrant_sets(self): + """Test that cache handles different registrant sets correctly.""" + # Cache for first 50 registrants + result1 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:50]) + + # Cache for last 50 registrants + result2 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[50:]) + + # Both should be cached independently + self.assertEqual(len(result1), 50) + self.assertEqual(len(result2), 50) + + # Verify no overlap in keys + for key in result1: + self.assertNotIn(key, result2) + + def test_cache_warm_functionality(self): + """Test that cache warming works correctly.""" + # Clear cache first + self.cache_service.clear_dimension_cache() + + # Create multiple dimensions + name_dimension = self.dimension_model.create( + { + "name": "test_name_cache", + "label": "Name (Cache Test)", + "dimension_type": "field", + "field_path": "name", + } + ) + + dimensions = [self.registrant_type_dimension, name_dimension] + + # Warm cache + self.cache_service.warm_cache(dimensions, self.registrant_ids[:20]) + + # Verify cache is populated (by checking that subsequent calls are fast) + start = time.time() + result1 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:20]) + time_cached = time.time() - start + + self.assertEqual(len(result1), 20) + self.assertTrue(time_cached < 0.1, f"Warmed cache should be fast: {time_cached:.4f}s") + + def test_cache_clear_all(self): + """Test that clearing all caches works.""" + # Populate cache for multiple dimensions + result1 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:10]) + + # Clear all caches + self.cache_service.clear_dimension_cache() + + # Next call should re-compute (we can't verify cache miss directly, + # but ensure results are still correct) + result2 = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, self.registrant_ids[:10]) + + self.assertEqual(result1, result2, "Results should be same after cache clear") + + def test_cache_with_empty_registrants(self): + """Test that cache handles empty registrant lists gracefully.""" + result = self.cache_service.evaluate_dimension_batch(self.registrant_type_dimension, []) + + self.assertEqual(result, {}, "Empty registrant list should return empty dict") + + def test_cache_with_none_dimension(self): + """Test that cache handles None dimension gracefully.""" + result = self.cache_service.evaluate_dimension_batch(None, self.registrant_ids) + + self.assertEqual(result, {}, "None dimension should return empty dict") + + def test_breakdown_service_uses_cache(self): + """Test that breakdown service benefits from caching.""" + breakdown_service = self.env["spp.metrics.breakdown"] + + # Clear cache first + self.cache_service.clear_dimension_cache() + + # First breakdown (cache miss) + start = time.time() + result1 = breakdown_service.compute_breakdown(self.registrant_ids, ["test_registrant_type_cache"]) + time_uncached = time.time() - start + + # Second breakdown (cache hit) + start = time.time() + result2 = breakdown_service.compute_breakdown(self.registrant_ids, ["test_registrant_type_cache"]) + time_cached = time.time() - start + + # Results should be identical + self.assertEqual(result1, result2, "Breakdown results should be same with cache") + + # Cache should be faster + self.assertTrue( + time_cached <= time_uncached or time_cached < 0.1, + f"Cached breakdown should be fast: {time_cached:.4f}s vs {time_uncached:.4f}s", + ) + + # Verify breakdown structure + self.assertGreater(len(result1), 0, "Breakdown should have results") + for _key, value in result1.items(): + self.assertIn("count", value) + self.assertIn("labels", value) diff --git a/spp_metrics_services/tests/test_services.py b/spp_metrics_services/tests/test_services.py new file mode 100644 index 00000000..c44fb924 --- /dev/null +++ b/spp_metrics_services/tests/test_services.py @@ -0,0 +1,246 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. + +from odoo.tests import TransactionCase, tagged + + +@tagged("post_install", "-at_install") +class TestMetricsServices(TransactionCase): + """Test that all metrics services are accessible and functional.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + + # Create test registrants + cls.partner_model = cls.env["res.partner"] + cls.registrant1 = cls.partner_model.create( + { + "name": "Test Registrant 1", + "is_registrant": True, + "is_group": False, + } + ) + cls.registrant2 = cls.partner_model.create( + { + "name": "Test Registrant 2", + "is_registrant": True, + "is_group": False, + } + ) + cls.registrant3 = cls.partner_model.create( + { + "name": "Test Registrant 3", + "is_registrant": True, + "is_group": False, + } + ) + + cls.registrant_ids = [cls.registrant1.id, cls.registrant2.id, cls.registrant3.id] + + def test_fairness_service_exists(self): + """Test that fairness service is accessible.""" + fairness_service = self.env.get("spp.metrics.fairness") + self.assertIsNotNone(fairness_service, "Fairness service should be accessible") + + def test_distribution_service_exists(self): + """Test that distribution service is accessible.""" + distribution_service = self.env.get("spp.metrics.distribution") + self.assertIsNotNone(distribution_service, "Distribution service should be accessible") + + def test_privacy_service_exists(self): + """Test that privacy service is accessible.""" + privacy_service = self.env.get("spp.metrics.privacy") + self.assertIsNotNone(privacy_service, "Privacy service should be accessible") + + def test_breakdown_service_exists(self): + """Test that breakdown service is accessible.""" + breakdown_service = self.env.get("spp.metrics.breakdown") + self.assertIsNotNone(breakdown_service, "Breakdown service should be accessible") + + def test_fairness_service_compute(self): + """Test fairness computation works.""" + fairness_service = self.env["spp.metrics.fairness"] + result = fairness_service.compute_fairness( + self.registrant_ids, + base_domain=[("is_registrant", "=", True)], + ) + + self.assertIn("equity_score", result) + self.assertIn("has_disparity", result) + self.assertIn("total_beneficiaries", result) + self.assertEqual(result["total_beneficiaries"], 3) + + def test_distribution_service_compute(self): + """Test distribution computation works.""" + distribution_service = self.env["spp.metrics.distribution"] + amounts = [100, 200, 300, 400, 500] + result = distribution_service.compute_distribution(amounts) + + self.assertIn("count", result) + self.assertIn("mean", result) + self.assertIn("median", result) + self.assertIn("gini_coefficient", result) + self.assertEqual(result["count"], 5) + self.assertEqual(result["mean"], 300) + + def test_privacy_service_enforce(self): + """Test privacy enforcement works.""" + privacy_service = self.env["spp.metrics.privacy"] + test_result = { + "total_count": 10, + "breakdown": { + "male": {"count": 2}, + "female": {"count": 8}, + }, + } + + protected_result = privacy_service.enforce(test_result, k_threshold=5) + + self.assertIn("breakdown", protected_result) + # Cell with count=2 should be suppressed + self.assertTrue(protected_result["breakdown"]["male"].get("suppressed", False)) + + def test_breakdown_service_compute(self): + """Test breakdown computation works.""" + breakdown_service = self.env["spp.metrics.breakdown"] + + # Create a simple dimension (if spp.demographic.dimension exists) + dimension_model = self.env.get("spp.demographic.dimension") + if dimension_model: + # For now, test with empty group_by + result = breakdown_service.compute_breakdown(self.registrant_ids, []) + self.assertEqual(result, {}, "Empty group_by should return empty breakdown") + + def test_empty_inputs(self): + """Test services handle empty inputs gracefully.""" + fairness_service = self.env["spp.metrics.fairness"] + distribution_service = self.env["spp.metrics.distribution"] + breakdown_service = self.env["spp.metrics.breakdown"] + + # Test fairness with empty registrants + fairness_result = fairness_service.compute_fairness([]) + self.assertEqual(fairness_result["total_beneficiaries"], 0) + self.assertEqual(fairness_result["equity_score"], 100.0) + + # Test distribution with empty amounts + dist_result = distribution_service.compute_distribution([]) + self.assertEqual(dist_result["count"], 0) + + # Test breakdown with empty registrants + breakdown_result = breakdown_service.compute_breakdown([], ["gender"]) + self.assertEqual(breakdown_result, {}) + + +@tagged("post_install", "-at_install") +class TestPrivacySuppressionUnified(TransactionCase): + """Test unified suppression logic in privacy service.""" + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.privacy_service = cls.env["spp.metrics.privacy"] + + def test_suppress_value_no_suppression_default_threshold(self): + """Test suppress_value with count above default threshold (5).""" + value = 100 + count = 10 + result_value, is_suppressed = self.privacy_service.suppress_value(value, count) + + self.assertEqual(result_value, 100) + self.assertFalse(is_suppressed) + + def test_suppress_value_with_suppression_default_threshold(self): + """Test suppress_value with count below default threshold (5).""" + value = 50 + count = 3 + result_value, is_suppressed = self.privacy_service.suppress_value(value, count) + + self.assertEqual(result_value, "<5") + self.assertTrue(is_suppressed) + + def test_suppress_value_with_user_threshold(self): + """Test suppress_value with user-level k-anonymity threshold.""" + value = 100 + count = 8 + result_value, is_suppressed = self.privacy_service.suppress_value(value, count, k_threshold=10) + + self.assertEqual(result_value, "<10") + self.assertTrue(is_suppressed) + + def test_suppress_value_with_stat_config_raising_threshold(self): + """Test suppress_value with stat config raising threshold above user threshold.""" + value = 100 + count = 7 + stat_config = {"minimum_count": 10, "suppression_display": "asterisk"} + + result_value, is_suppressed = self.privacy_service.suppress_value( + value, count, k_threshold=5, stat_config=stat_config + ) + + # Effective threshold = max(5, 10) = 10, so count=7 is suppressed + self.assertEqual(result_value, "*") + self.assertTrue(is_suppressed) + + def test_suppress_value_precedence_max(self): + """Test precedence: effective_threshold = max(user, stat).""" + value = 100 + + # Case 1: stat threshold higher (10) than user (5) + count_7 = 7 + stat_config = {"minimum_count": 10, "suppression_display": "less_than"} + result_value, is_suppressed = self.privacy_service.suppress_value( + value, count_7, k_threshold=5, stat_config=stat_config + ) + self.assertEqual(result_value, "<10") + self.assertTrue(is_suppressed) + + # Case 2: user threshold higher (15) than stat (10) + count_12 = 12 + result_value2, is_suppressed2 = self.privacy_service.suppress_value( + value, count_12, k_threshold=15, stat_config=stat_config + ) + self.assertEqual(result_value2, "<15") + self.assertTrue(is_suppressed2) + + def test_suppress_value_display_mode_null(self): + """Test suppress_value with null display mode.""" + value = 50 + count = 3 + stat_config = {"minimum_count": 5, "suppression_display": "null"} + + result_value, is_suppressed = self.privacy_service.suppress_value(value, count, stat_config=stat_config) + + self.assertIsNone(result_value) + self.assertTrue(is_suppressed) + + def test_suppress_value_display_mode_asterisk(self): + """Test suppress_value with asterisk display mode.""" + value = 50 + count = 3 + stat_config = {"minimum_count": 5, "suppression_display": "asterisk"} + + result_value, is_suppressed = self.privacy_service.suppress_value(value, count, stat_config=stat_config) + + self.assertEqual(result_value, "*") + self.assertTrue(is_suppressed) + + def test_suppress_value_display_mode_less_than(self): + """Test suppress_value with less_than display mode.""" + value = 50 + count = 3 + stat_config = {"minimum_count": 8, "suppression_display": "less_than"} + + result_value, is_suppressed = self.privacy_service.suppress_value(value, count, stat_config=stat_config) + + self.assertEqual(result_value, "<8") + self.assertTrue(is_suppressed) + + def test_suppress_value_none_value_passes_through(self): + """Test suppress_value with None value passes through unsuppressed.""" + value = None + count = 3 + + result_value, is_suppressed = self.privacy_service.suppress_value(value, count) + + self.assertIsNone(result_value) + self.assertFalse(is_suppressed) diff --git a/spp_metrics_services/views/demographic_dimension_views.xml b/spp_metrics_services/views/demographic_dimension_views.xml new file mode 100644 index 00000000..99cde3a8 --- /dev/null +++ b/spp_metrics_services/views/demographic_dimension_views.xml @@ -0,0 +1,130 @@ + + + + + spp.demographic.dimension.form + spp.demographic.dimension + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + + + spp.demographic.dimension.list + spp.demographic.dimension + + + + + + + + + + + + + + + spp.demographic.dimension.search + spp.demographic.dimension + + + + + + + + + + + + + + + + + + + Demographic Dimensions + spp.demographic.dimension + list,form + +

+ Configure demographic dimensions +

+

+ Dimensions define how to categorize registrants for breakdowns + (e.g., by gender, disability status, area, age group). +

+
+
+