diff --git a/package.json b/package.json index 657455ebc..988c604a7 100644 --- a/package.json +++ b/package.json @@ -111,7 +111,7 @@ "@types/lodash.pickby": "^4.6.6", "@types/mime-types": "^2.1.3", "@types/multer": "^1.4.9", - "@types/node": "^18.11.4", + "@types/node": "^24.4.0", "@types/react-dom": "^17.0.9", "@types/styled-components": "^5.1.12", "@types/website-scraper": "^1.2.10", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 24eb4ec4a..95d0c36b2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -154,13 +154,13 @@ importers: version: 7.120.4(react@16.14.0) '@ts-rest/core': specifier: ^3.30.5 - version: 3.52.1(@types/node@18.19.130)(zod@3.22.4) + version: 3.52.1(@types/node@24.11.0)(zod@3.22.4) '@ts-rest/express': specifier: ^3.30.5 - version: 3.52.1(@ts-rest/core@3.52.1(@types/node@18.19.130)(zod@3.22.4))(express@4.22.1)(zod@3.22.4) + version: 3.52.1(@ts-rest/core@3.52.1(@types/node@24.11.0)(zod@3.22.4))(express@4.22.1)(zod@3.22.4) '@ts-rest/open-api': specifier: ^3.30.5 - version: 3.52.1(@ts-rest/core@3.52.1(@types/node@18.19.130)(zod@3.22.4))(zod@3.22.4) + version: 3.52.1(@ts-rest/core@3.52.1(@types/node@24.11.0)(zod@3.22.4))(zod@3.22.4) '@types/html-minifier': specifier: ^4.0.1 version: 4.0.6 @@ -186,8 +186,8 @@ importers: specifier: ^1.4.9 version: 1.4.13 '@types/node': - specifier: ^18.11.4 - version: 18.19.130 + specifier: ^24.4.0 + version: 24.11.0 '@types/react-dom': specifier: ^17.0.9 version: 17.0.26(@types/react@16.14.69) @@ -577,10 +577,10 @@ importers: version: 6.37.7(pg@8.18.0) sequelize-typescript: specifier: ^2.1.5 - version: 2.1.6(@types/node@18.19.130)(@types/validator@13.15.10)(reflect-metadata@0.1.14)(sequelize@6.37.7(pg@8.18.0)) + version: 2.1.6(@types/node@24.11.0)(@types/validator@13.15.10)(reflect-metadata@0.1.14)(sequelize@6.37.7(pg@8.18.0)) sequelize-typescript-generator: specifier: ^10.1.2 - version: 10.1.2(@types/node@18.19.130)(@types/validator@13.15.10)(pg@8.18.0)(reflect-metadata@0.1.14)(typescript@5.9.3) + version: 10.1.2(@types/node@24.11.0)(@types/validator@13.15.10)(pg@8.18.0)(reflect-metadata@0.1.14)(typescript@5.9.3) sitemap: specifier: ^6.2.0 version: 6.4.0 @@ -604,7 +604,7 @@ importers: version: 2.1.1 ts-node: specifier: ^10.9.2 - version: 10.9.2(@types/node@18.19.130)(typescript@5.9.3) + version: 10.9.2(@types/node@24.11.0)(typescript@5.9.3) tsx: specifier: ^4.21.0 version: 4.21.0 @@ -628,7 +628,7 @@ importers: version: 1.0.9 vite-tsconfig-paths: specifier: ^5.1.4 - version: 5.1.4(typescript@5.9.3)(vite@7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2)) + version: 5.1.4(typescript@5.9.3)(vite@7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2)) xmlbuilder: specifier: ^13.0.2 version: 13.0.2 @@ -890,7 +890,7 @@ importers: version: 5.9.3 vitest: specifier: ^4.0.10 - version: 4.0.18(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) + version: 4.0.18(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) webpack: specifier: ^4.41.5 version: 4.47.0(webpack-cli@3.3.12) @@ -2159,7 +2159,6 @@ packages: '@esbuild/linux-arm64@0.27.3': resolution: {integrity: sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==} engines: {node: '>=18'} - cpu: [arm64] os: [linux] '@esbuild/linux-arm@0.27.3': @@ -3934,8 +3933,8 @@ packages: '@types/node@16.18.126': resolution: {integrity: sha512-OTcgaiwfGFBKacvfwuHzzn1KLxH/er8mluiy8/uM3sGXHaRe73RrSIj01jow9t4kJEW633Ov+cOexXeiApTyAw==} - '@types/node@18.19.130': - resolution: {integrity: sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==} + '@types/node@24.11.0': + resolution: {integrity: sha512-fPxQqz4VTgPI/IQ+lj9r0h+fDR66bzoeMGHp8ASee+32OSGIkeASsoZuJixsQoVef1QJbeubcPBxKk22QVoWdw==} '@types/node@9.6.61': resolution: {integrity: sha512-/aKAdg5c8n468cYLy2eQrcR5k6chlbNwZNGUj3TboyPa2hcO2QAJcfymlqPzMiRj8B6nYKXjzQz36minFE0RwQ==} @@ -10980,8 +10979,8 @@ packages: undefsafe@2.0.5: resolution: {integrity: sha512-WxONCrssBM8TSPRqN5EmsjVrsv4A8X12J4ArBiiayv3DyyG3ZlIg6yysuuSYdZsVz3TKcTg2fd//Ujd4CHV1iA==} - undici-types@5.26.5: - resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} + undici-types@7.16.0: + resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} unfetch@4.2.0: resolution: {integrity: sha512-F9p7yYCn6cIW9El1zi0HI6vqpeIvBsr3dSuRO6Xuppb1u5rXpCPmMvLSyECLhybr9isec8Ohl0hPekMVrEinDA==} @@ -14052,7 +14051,7 @@ snapshots: '@grpc/grpc-js@1.6.12': dependencies: '@grpc/proto-loader': 0.7.15 - '@types/node': 18.19.130 + '@types/node': 24.11.0 optional: true '@grpc/proto-loader@0.5.6': @@ -14487,7 +14486,7 @@ snapshots: '@remirror/core-helpers': 0.7.6(@emotion/core@10.3.1(react@16.14.0))(@types/prosemirror-view@1.24.0)(@types/react-dom@17.0.26(@types/react@16.14.69))(@types/react@16.14.69)(prosemirror-view@1.41.6)(react-dom@16.14.0(react@16.14.0))(react@16.14.0) '@remirror/core-types': 0.9.0(@emotion/core@10.3.1(react@16.14.0))(@types/prosemirror-view@1.24.0)(@types/react-dom@17.0.26(@types/react@16.14.69))(@types/react@16.14.69)(prosemirror-view@1.41.6)(react-dom@16.14.0(react@16.14.0))(react@16.14.0) '@types/min-document': 2.19.2 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/prosemirror-commands': 1.3.0 '@types/prosemirror-inputrules': 1.2.0 '@types/prosemirror-model': 1.17.0 @@ -15831,22 +15830,22 @@ snapshots: mkdirp: 3.0.1 path-browserify: 1.0.1 - '@ts-rest/core@3.52.1(@types/node@18.19.130)(zod@3.22.4)': + '@ts-rest/core@3.52.1(@types/node@24.11.0)(zod@3.22.4)': optionalDependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 zod: 3.22.4 - '@ts-rest/express@3.52.1(@ts-rest/core@3.52.1(@types/node@18.19.130)(zod@3.22.4))(express@4.22.1)(zod@3.22.4)': + '@ts-rest/express@3.52.1(@ts-rest/core@3.52.1(@types/node@24.11.0)(zod@3.22.4))(express@4.22.1)(zod@3.22.4)': dependencies: - '@ts-rest/core': 3.52.1(@types/node@18.19.130)(zod@3.22.4) + '@ts-rest/core': 3.52.1(@types/node@24.11.0)(zod@3.22.4) express: 4.22.1 optionalDependencies: zod: 3.22.4 - '@ts-rest/open-api@3.52.1(@ts-rest/core@3.52.1(@types/node@18.19.130)(zod@3.22.4))(zod@3.22.4)': + '@ts-rest/open-api@3.52.1(@ts-rest/core@3.52.1(@types/node@24.11.0)(zod@3.22.4))(zod@3.22.4)': dependencies: '@anatine/zod-openapi': 1.14.2(openapi3-ts@2.0.2)(zod@3.22.4) - '@ts-rest/core': 3.52.1(@types/node@18.19.130)(zod@3.22.4) + '@ts-rest/core': 3.52.1(@types/node@24.11.0)(zod@3.22.4) openapi3-ts: 2.0.2 zod: 3.22.4 @@ -15860,12 +15859,12 @@ snapshots: '@types/accepts@1.3.7': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/amqplib@0.5.17': dependencies: '@types/bluebird': 3.5.42 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/archiver@6.0.4': dependencies: @@ -15878,11 +15877,11 @@ snapshots: '@types/body-parser@1.19.6': dependencies: '@types/connect': 3.4.38 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/busboy@1.5.4': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/caseless@0.12.5': {} @@ -15893,11 +15892,11 @@ snapshots: '@types/cheerio@0.22.35': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/clean-css@4.2.11': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 source-map: 0.6.1 '@types/color-convert@2.0.4': @@ -15913,11 +15912,11 @@ snapshots: '@types/compression@1.8.1': dependencies: '@types/express': 4.17.25 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/connect@3.4.38': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/content-disposition@0.5.9': {} @@ -15932,11 +15931,11 @@ snapshots: '@types/connect': 3.4.38 '@types/express': 4.17.25 '@types/keygrip': 1.0.6 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/cors@2.8.19': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/crypto-js@4.2.2': {} @@ -15994,7 +15993,7 @@ snapshots: '@types/express-serve-static-core@4.19.8': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/qs': 6.14.0 '@types/range-parser': 1.2.7 '@types/send': 1.2.1 @@ -16024,14 +16023,14 @@ snapshots: '@types/fs-extra@8.1.5': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/fuzzysearch@1.0.2': {} '@types/glob@7.2.0': dependencies: '@types/minimatch': 6.0.0 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/glob@9.0.0': dependencies: @@ -16078,12 +16077,12 @@ snapshots: '@types/jsonwebtoken@8.5.9': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/jsonwebtoken@9.0.10': dependencies: '@types/ms': 2.1.0 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/katex@0.14.0': {} @@ -16091,7 +16090,7 @@ snapshots: '@types/keyv@3.1.4': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/koa-compose@3.2.9': dependencies: @@ -16106,7 +16105,7 @@ snapshots: '@types/http-errors': 2.0.5 '@types/keygrip': 1.0.6 '@types/koa-compose': 3.2.9 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/lodash.flow@3.5.9': dependencies: @@ -16170,20 +16169,20 @@ snapshots: '@types/nanoid@2.1.0': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/node-fetch@2.6.13': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 form-data: 4.0.5 '@types/node@14.18.63': {} '@types/node@16.18.126': {} - '@types/node@18.19.130': + '@types/node@24.11.0': dependencies: - undici-types: 5.26.5 + undici-types: 7.16.0 '@types/node@9.6.61': {} @@ -16191,7 +16190,7 @@ snapshots: '@types/npmlog@4.1.6': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/object.omit@3.0.3': {} @@ -16276,7 +16275,7 @@ snapshots: '@types/readdir-glob@1.1.5': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/relateurl@0.2.33': {} @@ -16288,13 +16287,13 @@ snapshots: '@types/request@2.48.13': dependencies: '@types/caseless': 0.12.5 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/tough-cookie': 4.0.5 form-data: 2.5.5 '@types/responselike@1.0.3': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/rss@0.0.30': {} @@ -16304,23 +16303,23 @@ snapshots: '@types/sax@1.2.7': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/scheduler@0.16.8': {} '@types/send@0.17.6': dependencies: '@types/mime': 1.3.5 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/send@1.2.1': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/serve-static@1.15.10': dependencies: '@types/http-errors': 2.0.5 - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/send': 0.17.6 '@types/sinon@7.5.2': {} @@ -16341,7 +16340,7 @@ snapshots: dependencies: '@types/cookiejar': 2.1.5 '@types/methods': 1.1.4 - '@types/node': 18.19.130 + '@types/node': 24.11.0 form-data: 4.0.5 '@types/supertest@2.0.16': @@ -16370,13 +16369,13 @@ snapshots: '@types/webpack-sources@3.2.3': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/source-list-map': 0.1.6 source-map: 0.7.6 '@types/webpack@4.41.40': dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/tapable': 1.0.12 '@types/uglify-js': 3.17.5 '@types/webpack-sources': 3.2.3 @@ -16438,13 +16437,13 @@ snapshots: chai: 6.2.2 tinyrainbow: 3.0.3 - '@vitest/mocker@4.0.18(vite@7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2))': + '@vitest/mocker@4.0.18(vite@7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2))': dependencies: '@vitest/spy': 4.0.18 estree-walker: 3.0.3 magic-string: 0.30.21 optionalDependencies: - vite: 7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) + vite: 7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) '@vitest/pretty-format@4.0.18': dependencies: @@ -19407,7 +19406,7 @@ snapshots: dependencies: '@firebase/database-compat': 0.1.8(@firebase/app-compat@0.5.8)(@firebase/app-types@0.6.1) '@firebase/database-types': 0.7.3 - '@types/node': 18.19.130 + '@types/node': 24.11.0 dicer: 0.3.1 jsonwebtoken: 8.5.1 jwks-rsa: 2.1.5 @@ -20744,7 +20743,7 @@ snapshots: jest-worker@26.6.2: dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 merge-stream: 2.0.0 supports-color: 7.2.0 @@ -22659,7 +22658,7 @@ snapshots: '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.0 '@types/long': 4.0.2 - '@types/node': 18.19.130 + '@types/node': 24.11.0 long: 4.0.0 optional: true @@ -22676,7 +22675,7 @@ snapshots: '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.0 '@types/long': 4.0.2 - '@types/node': 18.19.130 + '@types/node': 24.11.0 long: 4.0.0 protobufjs@7.5.4: @@ -22691,7 +22690,7 @@ snapshots: '@protobufjs/path': 1.1.2 '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.0 - '@types/node': 18.19.130 + '@types/node': 24.11.0 long: 5.3.2 proxy-addr@2.0.7: @@ -23721,7 +23720,7 @@ snapshots: sequelize-pool@8.0.1: {} - sequelize-typescript-generator@10.1.2(@types/node@18.19.130)(@types/validator@13.15.10)(pg@8.18.0)(reflect-metadata@0.1.14)(typescript@5.9.3): + sequelize-typescript-generator@10.1.2(@types/node@24.11.0)(@types/validator@13.15.10)(pg@8.18.0)(reflect-metadata@0.1.14)(typescript@5.9.3): dependencies: '@types/eslint': 8.56.12 '@typescript-eslint/parser': 5.62.0(eslint@8.57.1)(typescript@5.9.3) @@ -23729,7 +23728,7 @@ snapshots: eslint: 8.57.1 pluralize: 8.0.0 sequelize: 6.37.7(pg@8.18.0) - sequelize-typescript: 2.1.6(@types/node@18.19.130)(@types/validator@13.15.10)(reflect-metadata@0.1.14)(sequelize@6.37.7(pg@8.18.0)) + sequelize-typescript: 2.1.6(@types/node@24.11.0)(@types/validator@13.15.10)(reflect-metadata@0.1.14)(sequelize@6.37.7(pg@8.18.0)) typescript: 5.9.3 yargs: 17.7.2 transitivePeerDependencies: @@ -23747,9 +23746,9 @@ snapshots: - supports-color - tedious - sequelize-typescript@2.1.6(@types/node@18.19.130)(@types/validator@13.15.10)(reflect-metadata@0.1.14)(sequelize@6.37.7(pg@8.18.0)): + sequelize-typescript@2.1.6(@types/node@24.11.0)(@types/validator@13.15.10)(reflect-metadata@0.1.14)(sequelize@6.37.7(pg@8.18.0)): dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 '@types/validator': 13.15.10 glob: 7.2.0 reflect-metadata: 0.1.14 @@ -24660,14 +24659,14 @@ snapshots: '@ts-morph/common': 0.22.0 code-block-writer: 12.0.0 - ts-node@10.9.2(@types/node@18.19.130)(typescript@5.9.3): + ts-node@10.9.2(@types/node@24.11.0)(typescript@5.9.3): dependencies: '@cspotcode/source-map-support': 0.8.1 '@tsconfig/node10': 1.0.12 '@tsconfig/node12': 1.0.11 '@tsconfig/node14': 1.0.3 '@tsconfig/node16': 1.0.4 - '@types/node': 18.19.130 + '@types/node': 24.11.0 acorn: 8.16.0 acorn-walk: 8.3.5 arg: 4.1.3 @@ -24798,7 +24797,7 @@ snapshots: undefsafe@2.0.5: {} - undici-types@5.26.5: {} + undici-types@7.16.0: {} unfetch@4.2.0: {} @@ -25081,18 +25080,18 @@ snapshots: d3-time: 3.1.0 d3-timer: 3.0.1 - vite-tsconfig-paths@5.1.4(typescript@5.9.3)(vite@7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2)): + vite-tsconfig-paths@5.1.4(typescript@5.9.3)(vite@7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2)): dependencies: debug: 4.4.3(supports-color@5.5.0) globrex: 0.1.2 tsconfck: 3.1.6(typescript@5.9.3) optionalDependencies: - vite: 7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) + vite: 7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) transitivePeerDependencies: - supports-color - typescript - vite@7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2): + vite@7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2): dependencies: esbuild: 0.27.3 fdir: 6.5.0(picomatch@4.0.3) @@ -25101,17 +25100,17 @@ snapshots: rollup: 4.57.1 tinyglobby: 0.2.15 optionalDependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 fsevents: 2.3.3 sass: 1.67.0 terser: 5.46.0 tsx: 4.21.0 yaml: 1.10.2 - vitest@4.0.18(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2): + vitest@4.0.18(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2): dependencies: '@vitest/expect': 4.0.18 - '@vitest/mocker': 4.0.18(vite@7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2)) + '@vitest/mocker': 4.0.18(vite@7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2)) '@vitest/pretty-format': 4.0.18 '@vitest/runner': 4.0.18 '@vitest/snapshot': 4.0.18 @@ -25128,10 +25127,10 @@ snapshots: tinyexec: 1.0.2 tinyglobby: 0.2.15 tinyrainbow: 3.0.3 - vite: 7.3.1(@types/node@18.19.130)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) + vite: 7.3.1(@types/node@24.11.0)(sass@1.67.0)(terser@5.46.0)(tsx@4.21.0)(yaml@1.10.2) why-is-node-running: 2.3.0 optionalDependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 transitivePeerDependencies: - jiti - less @@ -25404,7 +25403,7 @@ snapshots: wkx@0.5.0: dependencies: - '@types/node': 18.19.130 + '@types/node': 24.11.0 word-wrap@1.2.5: {} diff --git a/server/spamTag/commentSpam.ts b/server/spamTag/commentSpam.ts index 2b82c8a33..d892439d2 100644 --- a/server/spamTag/commentSpam.ts +++ b/server/spamTag/commentSpam.ts @@ -22,8 +22,7 @@ const NEW_ACCOUNT_LINK_COMMENT_WINDOW_MINUTES = IS_WINDOW_MINUTES_VALID : DEFAULT_NEW_ACCOUNT_LINK_COMMENT_WINDOW_MINUTES; const NEW_ACCOUNT_LINK_COMMENT_WINDOW_MS = NEW_ACCOUNT_LINK_COMMENT_WINDOW_MINUTES * 60 * 1000; -const URL_REGEX = /\b(?:https?:\/\/|www\.)[^\s<]+/i; -const MAX_TRIGGER_VALUE_LENGTH = 500; +const URL_REGEX = /\b(?:https?:\/\/|www\.)[^\s<]+/gi; type AutoBanNewAccountLinkCommentOptions = { userId: string; @@ -32,17 +31,17 @@ type AutoBanNewAccountLinkCommentOptions = { source: NewAccountLinkCommentTriggerSource; }; -const extractUrlFromString = (value: string): string | null => { +export const extractUrlsFromString = (value: string | null | undefined): string[] | null => { if (!value) { return null; } - const matchedUrl = value.match(URL_REGEX)?.[0]; + const matchedUrl = value.matchAll(URL_REGEX); if (!matchedUrl) { return null; } - return matchedUrl.slice(0, MAX_TRIGGER_VALUE_LENGTH); + return [...matchedUrl].map((match) => match[0]); }; const hasValidContentShape = (value: unknown): value is DocJson => { @@ -54,7 +53,7 @@ const hasValidContentShape = (value: unknown): value is DocJson => { return typeof content.type === 'string'; }; -const extractFirstLinkFromContent = (content: DocJson): string | null => { +export const extractLinksFromContent = (content: DocJson | null | undefined): string[] | null => { if (!hasValidContentShape(content)) { return null; } @@ -71,17 +70,27 @@ const extractFirstLinkFromContent = (content: DocJson): string | null => { }); }); - const linkFromTree = links[0]?.attrs.href; - if (typeof linkFromTree !== 'string' || !linkFromTree.length) { + const hrefs = links + .map((link) => link.attrs.href) + .filter((href) => typeof href === 'string' && href.length); + if (hrefs.length === 0) { return null; } - return linkFromTree.slice(0, MAX_TRIGGER_VALUE_LENGTH); + return hrefs; } catch { return null; } }; +export const extractFirstLinkFromContent = (content: DocJson): string | null => { + const links = extractLinksFromContent(content); + if (!links) { + return null; + } + return links[0]; +}; + const getAccountAgeMs = (createdAt: Date | null | undefined): number => { if (!(createdAt instanceof Date)) { return Number.POSITIVE_INFINITY; @@ -125,9 +134,9 @@ export const autoBanForNewAccountLinkComment = async ( } const linkFromTree = extractFirstLinkFromContent(content); - const linkFromText = extractUrlFromString(text); + const linkFromText = extractUrlsFromString(text); - const firstLink = linkFromTree || linkFromText; + const firstLink = linkFromTree?.[0] || linkFromText?.[0]; if (!firstLink) { return false; diff --git a/server/spamTag/contentAnalysis.ts b/server/spamTag/contentAnalysis.ts new file mode 100644 index 000000000..e596973c6 --- /dev/null +++ b/server/spamTag/contentAnalysis.ts @@ -0,0 +1,55 @@ +import type { DocJson } from 'types'; + +import { extractFirstLinkFromContent, extractUrlsFromString } from './commentSpam'; + +export const containsLink = ( + doc: DocJson | null | undefined, + text: string | null | undefined, +): boolean => { + if (!doc && !text) return false; + if (doc) { + return extractFirstLinkFromContent(doc) !== null; + } + + if (text) { + return extractUrlsFromString(text) !== null; + } + + return false; +}; + +// these are template phrases that show up in spam comments that try to look like genuine engagement. +// only meaningful as a spam signal when the comment also contains a link. +const commentSpamTemplates = [ + 'this blog post was very helpful', + 'this article was very helpful', + 'this post was very helpful', + 'i found the analysis', + 'i found this article', + 'top-tier and highly relevant', + 'maintains such a high level of professionalism', + 'delivering actionable data', + 'delivering actionable insights', + 'very informative article', + 'very informative post', + 'very informative blog', + 'great article', + 'great blog post', + 'really appreciate this content', + 'this is really informative', + 'this is really helpful', + 'thanks for sharing this', + 'amazing content', + 'wonderful article', + 'excellent article', + 'i really enjoyed reading this', + 'keep up the good work', + 'very well written', + 'this is exactly what i was looking for', +]; + +export const matchesCommentSpamTemplate = (text: string | null | undefined): string[] => { + if (!text) return []; + const lower = text.toLowerCase(); + return commentSpamTemplates.filter((template) => lower.includes(template)); +}; diff --git a/server/spamTag/phrases.ts b/server/spamTag/phrases.ts index b05f91f15..901b0bcd0 100644 --- a/server/spamTag/phrases.ts +++ b/server/spamTag/phrases.ts @@ -9,8 +9,6 @@ export const communitySpamPhrases = [ 'porn', 'naked', 'sexy', - 'porn', - 'naked', 'shopping', 'vendor', 'poker', @@ -41,4 +39,26 @@ export const communitySpamPhrases = [ 'our site', 'if you are', 'ivermectin', + 'power washing', + 'real estate', + 'health service', + 'digital strategies', + 'orgasm', + 'orgasms', + 'sex', + 'betting', + 'sportsbook', + 'baccarat', + 'disposable vape', + 'call girls', + 'escort service', + 'weight loss', + 'crypto recovery', + 'online seller', + 'competitive rates', + 'best deals', + 'affordable prices', + 'free quotes', + 'affordable', + 'apk', ]; diff --git a/server/spamTag/userScore.ts b/server/spamTag/userScore.ts index 5592e1156..c6f8ed95e 100644 --- a/server/spamTag/userScore.ts +++ b/server/spamTag/userScore.ts @@ -1,73 +1,405 @@ -import type { SpamTag, User } from 'server/models'; import type * as types from 'types'; +import type { DocJson, Maybe } from 'types'; +import { isUserAffiliatedWithAnyCommunity } from 'server/community/queries'; +import { type SpamTag, ThreadComment, type User } from 'server/models'; + +import { extractLinksFromContent, extractUrlsFromString } from './commentSpam'; +import { containsLink, matchesCommentSpamTemplate } from './contentAnalysis'; import { communitySpamPhrases } from './phrases'; +const CURRENT_SPAM_SCORE_VERSION = 3; + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + type SpamField = { - extract: (value: T) => types.Maybe; + extract: (value: T) => Maybe; isProse: boolean; weight: number; }; -const CURRENT_SPAM_SCORE_VERSION = 1; - -const userSpamFields: Record> = { - fullName: { - extract: (u) => u.fullName, - isProse: true, - weight: 1, - }, - title: { - extract: (u) => u.title, - isProse: true, - weight: 1, - }, - bio: { - extract: (u) => u.bio, - isProse: true, - weight: 1, - }, +const profileSpamFields: Record> = { + fullName: { extract: (u) => u.fullName, isProse: true, weight: 1 }, + bio: { extract: (u) => u.bio, isProse: true, weight: 1 }, }; const getMatchingSpamPhrases = (text: string, isProse: boolean): string[] => { - if (text) { - return communitySpamPhrases.filter((phrase) => { - const lowercaseText = text.toLowerCase(); - const lowercasePhrase = phrase.toLowerCase(); - if (isProse) { - return ( - lowercaseText.includes(' ' + lowercasePhrase) || - lowercaseText.includes(lowercasePhrase + ' ') - ); - } - return lowercaseText.includes(lowercasePhrase); - }); - } - return []; + if (!text) return []; + + return communitySpamPhrases.filter((phrase) => { + const lower = text.toLowerCase(); + const lowerPhrase = phrase.toLowerCase(); + + if (isProse) { + return lower.includes(' ' + lowerPhrase) || lower.includes(lowerPhrase + ' '); + } + + return lower.includes(lowerPhrase); + }); }; -const getUserSpamScoreReport = (user: User) => { - return Object.keys(userSpamFields).reduce( +const getProfilePhraseScore = (user: User) => { + return Object.keys(profileSpamFields).reduce( (report, key) => { - const { extract, weight, isProse } = userSpamFields[key]; + const { extract, weight, isProse } = profileSpamFields[key]; const text = extract(user); - if (text) { - const matchingPhrases = getMatchingSpamPhrases(text, isProse); - if (matchingPhrases.length) { - return { - score: report.score + weight * matchingPhrases.length, - fields: { ...report.fields, [key]: matchingPhrases }, - }; - } - } - return report; + if (!text) return report; + + const matches = getMatchingSpamPhrases(text, isProse); + if (matches.length === 0) return report; + + return { + score: report.score + weight * matches.length, + fields: { ...report.fields, [key]: matches }, + }; }, { score: 0, fields: {} as Record }, ); }; +const VIETNAMESE_SPAM_PATTERNS = [ + /nhà cái/i, + /cá cược/i, + /trực tuyến.*(?:uy tín|casino|slot|cược)/i, + /(?:casino|slot|cược).*trực tuyến/i, + /khuyến mãi.*(?:cược|casino|slot)/i, + /nạp tiền|rút tiền/i, + /tỷ lệ kèo/i, + /kèo nhà cái/i, + /xổ số|nổ hũ/i, +]; + +const GAMBLING_URL_PATTERNS = [ + /\b(?:bet|casino|slot|poker|lottery|baccarat|sportsbook)\b/i, + /\b\d{2,3}(?:win|bet|club|game|play|sport)\b/i, + /\b(?:win|bet|club|game|play|sport)\d{2,3}\b/i, + /\.(?:bet|casino|poker|games)\b/i, +]; + +const DISPOSABLE_EMAIL_DOMAINS = new Set([ + 'mailto.plus', + 'rustyload.com', + 'kvegg.com', + 'pariag.com', + 'fenxz.com', + 'alexida.com', + 'cmhvzylmfc.com', + 'gghs96.org', + 'comfythings.com', +]); + +const SPAM_SLUG_PATTERNS = [ + /^\d{2,3}(?:win|bet|game|play)/i, + /(?:win|bet|game|play)\d{2,3}/i, + /^nha-cai-/i, + /^trang-chu-/i, +]; + +const bioMatchesWebsiteDomain = ( + bio: string | null | undefined, + website: string | null | undefined, +): boolean => { + if (!bio || !website) return false; + + try { + const hostname = new URL(website).hostname.replace(/^www\./, ''); + const baseName = hostname.split('.')[0].toLowerCase(); + + if (baseName.length < 4) return false; + + return bio.toLowerCase().includes(baseName); + } catch { + return false; + } +}; + +type UserCommentData = { + totalComments: number; + commentsWithLinks: number; + linkUrls: string[]; + templateMatches: string[]; + commentsWithLinksAndTemplates: number; +}; + +const getUserCommentData = async (userId: string): Promise => { + const comments = await ThreadComment.findAll({ + where: { userId }, + attributes: ['content', 'text'], + limit: 200, + }); + + let commentsWithLinks = 0; + let commentsWithLinksAndTemplates = 0; + const allLinkUrls: string[] = []; + const allTemplateMatches: string[] = []; + + for (const comment of comments) { + const doc = comment.content as DocJson | null; + const text = comment.text; + const hasLink = containsLink(doc, text); + + if (hasLink) { + commentsWithLinks++; + const docLinks = extractLinksFromContent(doc); + const textLinks = extractUrlsFromString(text); + allLinkUrls.push(...(docLinks ?? []), ...(textLinks ?? [])); + } + + const templates = matchesCommentSpamTemplate(text); + if (templates.length > 0) { + allTemplateMatches.push(...templates); + if (hasLink) { + commentsWithLinksAndTemplates++; + } + } + } + + return { + totalComments: comments.length, + commentsWithLinks, + linkUrls: [...new Set(allLinkUrls)], + templateMatches: [...new Set(allTemplateMatches)], + commentsWithLinksAndTemplates, + }; +}; + +// --------------------------------------------------------------------------- +// signal configuration +// --------------------------------------------------------------------------- + +type SignalContext = { + user: User; + isAffiliated: boolean; + commentData: UserCommentData; + profilePhraseResult: { score: number; fields: Record }; + bioUrls: string[]; +}; + +type SpamSignal = { + name: string; + score: number; + test: (ctx: SignalContext) => boolean; + evidence: (ctx: SignalContext) => string[]; +}; + +const spamSignals: SpamSignal[] = [ + { + name: 'profile-spam-phrases', + score: 0, + test: (ctx) => ctx.profilePhraseResult.score > 0, + evidence: (ctx) => { + const { fields } = ctx.profilePhraseResult; + return Object.entries(fields).flatMap(([field, phrases]) => + phrases.map((p) => `${field}: "${p}"`), + ); + }, + }, + + { + name: 'website-not-affiliated', + score: 3, + test: (ctx) => !!ctx.user.website && !ctx.isAffiliated, + evidence: (ctx) => [ctx.user.website!], + }, + + { + name: 'website-added-quickly', + score: 3, + test: (ctx) => { + if (!ctx.user.website) return false; + const createdAt = new Date(ctx.user.createdAt as unknown as string).getTime(); + const updatedAt = new Date(ctx.user.updatedAt as unknown as string).getTime(); + return (updatedAt - createdAt) / 60_000 < 5; + }, + evidence: (ctx) => [ctx.user.website!], + }, + + { + name: 'bio-contains-url', + score: 2, + test: (ctx) => ctx.bioUrls.length > 0, + evidence: (ctx) => ctx.bioUrls, + }, + + { + name: 'gambling-website', + score: 3, + test: (ctx) => + !!ctx.user.website && GAMBLING_URL_PATTERNS.some((p) => p.test(ctx.user.website!)), + evidence: (ctx) => [ctx.user.website!], + }, + + { + name: 'website-with-88', + score: 2, + test: (ctx) => !!ctx.user.website && ctx.user.website.includes('88'), + evidence: (ctx) => [ctx.user.website!], + }, + + { + name: 'vietnamese-gambling-bio', + score: 3, + test: (ctx) => VIETNAMESE_SPAM_PATTERNS.some((p) => p.test(ctx.user.bio ?? '')), + evidence: (ctx) => { + const bio = ctx.user.bio ?? ''; + return VIETNAMESE_SPAM_PATTERNS.filter((p) => p.test(bio)).map((p) => p.source); + }, + }, + + { + name: 'spam-slug-pattern', + score: 2, + test: (ctx) => SPAM_SLUG_PATTERNS.some((p) => p.test(ctx.user.slug)), + evidence: (ctx) => [ctx.user.slug], + }, + + { + name: 'disposable-email', + score: 2, + test: (ctx) => { + const domain = ctx.user.email?.split('@')[1]?.toLowerCase(); + return DISPOSABLE_EMAIL_DOMAINS.has(domain); + }, + evidence: (ctx) => [ctx.user.email], + }, + + { + name: 'bio-promotes-website', + score: 2, + test: (ctx) => !ctx.isAffiliated && bioMatchesWebsiteDomain(ctx.user.bio, ctx.user.website), + evidence: (ctx) => [ctx.user.website!, ctx.user.bio?.slice(0, 200) ?? ''], + }, + { + name: 'comments-with-links-not-affiliated', + score: 6, + test: (ctx) => ctx.commentData.commentsWithLinks > 0 && !ctx.isAffiliated, + evidence: (ctx) => ctx.commentData.linkUrls.slice(0, 10), + }, + + { + name: 'all-comments-have-links', + score: 2, + test: (ctx) => + ctx.commentData.totalComments > 0 && + ctx.commentData.commentsWithLinks === ctx.commentData.totalComments, + evidence: (ctx) => [ + `${ctx.commentData.commentsWithLinks}/${ctx.commentData.totalComments} comments`, + ], + }, + + { + name: 'template-spam-with-links', + score: 2, + test: (ctx) => ctx.commentData.commentsWithLinksAndTemplates > 0, + evidence: (ctx) => ctx.commentData.templateMatches.slice(0, 10), + }, + + { + name: '-edu-email', + score: -10, + test: (ctx) => !!ctx.user.email?.endsWith('.edu'), + evidence: (ctx) => [ctx.user.email], + }, + { + name: 'bio contains attempted html', + score: 6, + test: (ctx) => + !!ctx.user.bio && (/<(a|p)/.test(ctx.user.bio) || !!ctx.user.bio.includes('href=')), + evidence: (ctx) => [ctx.user.bio!], + }, +]; + +// --------------------------------------------------------------------------- +// report computation +// --------------------------------------------------------------------------- + +export type SignalHit = { + name: string; + score: number; + evidence: string[]; +}; + +export type UserSpamReport = { + score: number; + fields: Record; + signals: string[]; + signalHits: SignalHit[]; +}; + +export const computeUserSpamReport = async (user: User): Promise => { + const profilePhraseResult = getProfilePhraseScore(user); + const bioUrls = extractUrlsFromString(user.bio) ?? []; + + const [isAffiliated, commentData] = await Promise.all([ + isUserAffiliatedWithAnyCommunity(user.id), + getUserCommentData(user.id), + ]); + + const ctx: SignalContext = { + user, + isAffiliated, + commentData, + profilePhraseResult, + bioUrls, + }; + + const signalHits: SignalHit[] = []; + let score = 0; + + // profile-spam-phrases is special: its score comes from the phrase matcher, not from the config + const fields: Record = {}; + + for (const signal of spamSignals) { + if (!signal.test(ctx)) continue; + + const signalScore = + signal.name === 'profile-spam-phrases' ? profilePhraseResult.score : signal.score; + + const evidence = signal.evidence(ctx); + score += signalScore; + signalHits.push({ name: signal.name, score: signalScore, evidence }); + } + + if (profilePhraseResult.score > 0) { + Object.assign(fields, profilePhraseResult.fields); + } + + const commentLinksHit = signalHits.find((h) => h.name === 'comments-with-links-not-affiliated'); + if (commentLinksHit) { + fields.suspiciousCommentLinks = commentLinksHit.evidence; + } + + const templateHit = signalHits.find((h) => h.name === 'template-spam-with-links'); + if (templateHit) { + fields.templateSpamComments = templateHit.evidence; + } + + const websiteHit = signalHits.find((h) => h.name === 'website-not-affiliated'); + if (websiteHit) { + fields.website = websiteHit.evidence; + } + + const bioUrlHit = signalHits.find((h) => h.name === 'bio-contains-url'); + if (bioUrlHit) { + fields.bioUrl = bioUrlHit.evidence; + } + + score = Math.max(score, 0); + + return { + score, + fields, + signals: signalHits.map((h) => h.name), + signalHits, + }; +}; + +// the original synchronous scoring for backward compatibility with existing callers +// that don't need the full async analysis export const getSuspectedUserSpamVerdict = (user: User): types.SpamVerdict => { - const { score, fields } = getUserSpamScoreReport(user); + const { score, fields } = getProfilePhraseScore(user); return { fields, spamScore: score, @@ -75,3 +407,16 @@ export const getSuspectedUserSpamVerdict = (user: User): types.SpamVerdict & { signals: string[] }> => { + const report = await computeUserSpamReport(user); + return { + fields: report.fields, + spamScore: report.score, + spamScoreVersion: CURRENT_SPAM_SCORE_VERSION, + spamScoreComputedAt: new Date(), + signals: report.signals, + }; +}; diff --git a/tools/cron.ts b/tools/cron.ts index ee22105ad..2782c8c3c 100644 --- a/tools/cron.ts +++ b/tools/cron.ts @@ -25,16 +25,34 @@ if (process.env.PUBPUB_PRODUCTION === 'true') { cron.schedule('0 */12 * * *', () => run('Backup DB', 'tools-prod backupDb'), { timezone: 'UTC', }); // Every 6 hours + cron.schedule('0 13 * * *', () => run('Email Digest', 'tools-prod emailActivityDigest'), { timezone: 'UTC', }); + cron.schedule( '0 5 * * 0', () => run('Firebase Cleanup', 'tools-prod cleanupFirebase --execute'), { timezone: 'UTC', }, - ); // Weekly on Sunday at 1 AM UTC + ); // Weekly on Sunday at 5 AM UTC + + cron.schedule( + '0 4 * * *', + () => { + const outputPath = `/tmp/spam-scan-${new Date().toISOString().slice(0, 10)}.json`; + run( + 'Spam Scan (analyze)', + `tools-prod scanSpamUsers --analyze --since 26h --output ${outputPath} --min-score 6`, + ); + run( + 'Spam Scan (execute)', + `tools-prod scanSpamUsers --execute --input ${outputPath} --min-score 6`, + ); + }, + { timezone: 'UTC' }, + ); // Daily at 4 AM UTC } else { const logNotSet = () => { log( diff --git a/tools/index.js b/tools/index.js index 43a7a58ee..11e05fce1 100644 --- a/tools/index.js +++ b/tools/index.js @@ -62,6 +62,7 @@ const commandFiles = { pubCrawl: "./pubCrawl", rerankCollections: "./rerankCollections", rerunExport: "./rerunExport", + scanSpamUsers: "./scanSpamUsers", searchSync: "./searchSync", switchBranchOrders: "./switchBranchOrders", syncDbSchema: "./syncDbSchema", diff --git a/tools/scanSpamUsers.ts b/tools/scanSpamUsers.ts new file mode 100644 index 000000000..0af255a41 --- /dev/null +++ b/tools/scanSpamUsers.ts @@ -0,0 +1,416 @@ +/* +scanSpamUsers -- two-phase bulk spam detection tool + +usage: + npm run tools scanSpamUsers --analyze --output results.json [--min-score N] [--input skip.json] [--since 2024-01-01] [--concurrency 10] + npm run tools scanSpamUsers --execute --input results.json [--min-score N] [--signals sig1,sig2] [--range 0-100] [--concurrency 10] + +analyze phase: + scans all users without an existing spam tag, computes spam scores, and + writes a json file with detailed evidence for each flagged user. the file + contains an array of entries sorted by score descending. + + --output required. where to write the results json. + --min-score minimum score to include in output. default 5. + --input optional. path to an existing results json whose + user ids will be skipped (so you can re-run + incrementally). + --since only scan users created after this date (ISO string) + or relative duration like "24h", "7d". useful for + incremental cron runs. + --concurrency how many users to process in parallel. default 10. + --include-clean also write a separate .clean.json file with users + that scored > 0 but below --min-score. useful for + reviewing false negatives. + +execute phase: + reads a results json produced by --analyze and applies spam tags to the + users in it, subject to filters. + + --input required. the results json from analyze. + --min-score only tag users whose score >= n. + --signals only tag users who have ALL of these signals. + --range - only process entries whose index is in [start, end) + (0-based, as shown in the output file). + --concurrency how many users to tag in parallel. default 5. + +output file format (json array): + each entry has: + index sequential 0-based index + userId user uuid + email user email + slug user slug (username) + fullName user display name + createdAt account creation timestamp + score computed spam score + signals array of signal names + commentCount total number of comments by this user + commentsWithLinks how many of those contain links + recentComments up to 5 most recent comments that contain links, + each with { text, links } + profile { website, bio, bioUrls } -- present when profile + signals fired +*/ +/** biome-ignore-all lint/performance/noAwaitInLoops: batch pagination loop is inherently sequential */ + +import type { DocJson } from 'types'; + +import * as fs from 'fs'; +import { Op } from 'sequelize'; + +import { ThreadComment, User } from 'server/models'; +import { extractLinksFromContent, extractUrlsFromString } from 'server/spamTag/commentSpam'; +import { containsLink } from 'server/spamTag/contentAnalysis'; +import { upsertSpamTag } from 'server/spamTag/userQueries'; +import { computeUserSpamReport, type SignalHit } from 'server/spamTag/userScore'; +import { asyncMap } from 'utils/async'; +import { JsonArrayWriter } from 'utils/jsonArrayWriter'; + +const BATCH_SIZE = 200; +const DEFAULT_MIN_SCORE = 5; +const DEFAULT_ANALYZE_CONCURRENCY = 5; +const DEFAULT_EXECUTE_CONCURRENCY = 5; + +type CommentEvidence = { + text: string; + links: string[]; +}; + +type AnalyzeEntry = { + index: number; + userId: string; + email: string; + slug: string; + fullName: string; + createdAt: string; + score: number; + signals: string[]; + signalHits: SignalHit[]; + commentCount: number; + commentsWithLinks: number; + recentComments: CommentEvidence[]; + profile: { + website: string | null; + bio: string | null; + bioUrls: string[]; + } | null; +}; + +const parseArg = (name: string): string | null => { + const prefix = `--${name}=`; + const combined = process.argv.find((a) => a.startsWith(prefix)); + if (combined) return combined.slice(prefix.length); + + const idx = process.argv.indexOf(`--${name}`); + if (idx === -1 || idx + 1 >= process.argv.length) return null; + + const next = process.argv[idx + 1]; + if (next.startsWith('--')) return null; + + return next; +}; + +const hasFlag = (name: string): boolean => process.argv.includes(`--${name}`); + +const parseSinceArg = (value: string | null): Date | null => { + if (!value) return null; + + const durationMatch = value.match(/^(\d+)([hd])$/); + if (durationMatch) { + const amount = parseInt(durationMatch[1], 10); + const unit = durationMatch[2]; + const ms = unit === 'h' ? amount * 3600_000 : amount * 86_400_000; + return new Date(Date.now() - ms); + } + + const date = new Date(value); + if (Number.isNaN(date.getTime())) { + console.error( + `invalid --since value: ${value} (use ISO date or duration like "24h", "7d")`, + ); + process.exit(1); + } + + return date; +}; + +const getRecentCommentsWithLinks = async ( + userId: string, + limit: number, +): Promise<{ total: number; withLinks: number; evidence: CommentEvidence[] }> => { + const comments = await ThreadComment.findAll({ + where: { userId }, + attributes: ['content', 'text', 'createdAt'], + order: [['createdAt', 'DESC']], + limit: 200, + }); + + const evidence: CommentEvidence[] = []; + let withLinks = 0; + + for (const comment of comments) { + const doc = comment.content as DocJson | null; + const text = comment.text ?? ''; + + if (!containsLink(doc, text)) continue; + + withLinks++; + + if (evidence.length < limit) { + const links = [ + ...(extractLinksFromContent(doc) ?? []), + ...(extractUrlsFromString(text) ?? []), + ]; + evidence.push({ text: text.slice(0, 500), links: [...new Set(links)] }); + } + } + + return { total: comments.length, withLinks, evidence }; +}; + +const buildEntry = async ( + user: User, + report: { score: number; signals: string[]; signalHits: SignalHit[] }, +): Promise => { + const commentInfo = await getRecentCommentsWithLinks(user.id, 5); + const hasProfileSignal = report.signals.some((s) => s.includes('website') || s.includes('bio')); + + const profile = hasProfileSignal + ? { + website: user.website ?? null, + bio: user.bio ?? null, + bioUrls: extractUrlsFromString(user.bio) ?? [], + } + : null; + + return { + index: 0, + userId: user.id, + email: user.email ?? '', + slug: user.slug, + fullName: user.fullName, + createdAt: String(user.createdAt), + score: report.score, + signals: report.signals, + signalHits: report.signalHits, + commentCount: commentInfo.total, + commentsWithLinks: commentInfo.withLinks, + recentComments: commentInfo.evidence, + profile, + }; +}; + +async function analyze() { + const outputPath = parseArg('output'); + if (!outputPath) { + console.error('--output is required for --analyze'); + process.exit(1); + } + + const minScore = parseInt(parseArg('min-score') ?? String(DEFAULT_MIN_SCORE), 10); + const concurrency = parseInt( + parseArg('concurrency') ?? String(DEFAULT_ANALYZE_CONCURRENCY), + 10, + ); + const sinceDate = parseSinceArg(parseArg('since')); + const includeClean = hasFlag('include-clean'); + const cleanPath = includeClean ? outputPath.replace(/\.json$/, '.clean.json') : null; + + const skipIds = new Set(); + const inputPath = parseArg('input'); + if (inputPath) { + const existing: AnalyzeEntry[] = JSON.parse(fs.readFileSync(inputPath, 'utf-8')); + for (const e of existing) skipIds.add(e.userId); + console.log(`loaded ${skipIds.size} user ids to skip from ${inputPath}`); + } + + const sinceLabel = sinceDate ? ` since=${sinceDate.toISOString()}` : ''; + const cleanLabel = cleanPath ? ` clean=${cleanPath}` : ''; + console.log( + `analyzing users (min-score=${minScore}, concurrency=${concurrency}${sinceLabel}${cleanLabel}, output=${outputPath})`, + ); + + const writer = new JsonArrayWriter(outputPath); + const cleanWriter = cleanPath ? new JsonArrayWriter(cleanPath) : null; + + let offset = 0; + let scanned = 0; + let errors = 0; + + const whereClause: Record = { + spamTagId: { [Op.is]: null as any }, + }; + + if (sinceDate) { + whereClause.createdAt = { [Op.gte]: sinceDate }; + } + + while (true) { + const users = await User.findAll({ + where: whereClause, + attributes: [ + 'id', + 'fullName', + 'email', + 'slug', + 'title', + 'bio', + 'website', + 'createdAt', + 'updatedAt', + ], + limit: BATCH_SIZE, + offset, + order: [['createdAt', 'DESC']], + }); + + if (users.length === 0) break; + + const toProcess = users.filter((u) => !skipIds.has(u.id)); + scanned += users.length; + + const results = await asyncMap( + toProcess, + async (user) => { + try { + const report = await computeUserSpamReport(user); + return { user, report }; + } catch (err) { + errors++; + console.error(`error analyzing user ${user.id}:`, err); + return null; + } + }, + { concurrency }, + ); + + for (const result of results) { + if (!result) continue; + + const { user, report } = result; + + if (report.score >= minScore) { + const entry = await buildEntry(user, report); + entry.index = writer.length; + writer.push(entry); + } else if (cleanWriter && report.score > 0) { + const entry = await buildEntry(user, report); + entry.index = cleanWriter.length; + cleanWriter.push(entry); + } + } + + console.log( + `[${new Date().toISOString()}] scanned=${scanned} flagged=${writer.length}` + + `${cleanWriter ? ` clean=${cleanWriter.length}` : ''} errors=${errors}`, + ); + offset += BATCH_SIZE; + } + + writer.close(); + cleanWriter?.close(); + + console.log(`done. scanned=${scanned}, wrote ${writer.length} entries to ${outputPath}`); + + if (cleanWriter) { + console.log(`wrote ${cleanWriter.length} clean (below-threshold) entries to ${cleanPath}`); + } +} + +async function execute() { + const inputPath = parseArg('input'); + if (!inputPath) { + console.error('--input is required for --execute'); + process.exit(1); + } + + const entries: AnalyzeEntry[] = JSON.parse(fs.readFileSync(inputPath, 'utf-8')); + const minScore = parseInt(parseArg('min-score') ?? '0', 10); + const concurrency = parseInt( + parseArg('concurrency') ?? String(DEFAULT_EXECUTE_CONCURRENCY), + 10, + ); + const signalsArg = parseArg('signals'); + const requiredSignals = signalsArg ? signalsArg.split(',') : []; + + const rangeArg = parseArg('range'); + let rangeStart = 0; + let rangeEnd = entries.length; + if (rangeArg) { + const [s, e] = rangeArg.split('-').map(Number); + rangeStart = s; + rangeEnd = e; + } + + const filtered = entries.filter((entry) => { + if (entry.index < rangeStart || entry.index >= rangeEnd) return false; + if (entry.score < minScore) return false; + + if (requiredSignals.length > 0) { + const hasAll = requiredSignals.every((s) => entry.signals.includes(s)); + if (!hasAll) return false; + } + + return true; + }); + + console.log( + `executing on ${inputPath}: ${entries.length} total, ${filtered.length} after filters, ` + + `min-score=${minScore}, signals=${requiredSignals.join(',') || 'any'}, ` + + `range=[${rangeStart}, ${rangeEnd}), concurrency=${concurrency}`, + ); + + let tagged = 0; + let errors = 0; + + await asyncMap( + filtered, + async (entry) => { + try { + await upsertSpamTag({ + userId: entry.userId, + fields: { + suspiciousComments: entry.recentComments + .flatMap((c) => c.links) + .slice(0, 10), + automatedScan: [ + { + score: entry.score, + signals: entry.signals, + signalHits: entry.signalHits, + scannedAt: new Date().toISOString(), + }, + ], + }, + }); + + tagged++; + if (tagged % 50 === 0) { + console.log(`progress: tagged=${tagged} errors=${errors}`); + } + } catch (err) { + errors++; + console.error(`error tagging user ${entry.userId} (${entry.slug}):`, err); + } + }, + { concurrency }, + ); + + console.log( + `done. tagged=${tagged} skipped=${entries.length - filtered.length} errors=${errors}`, + ); +} + +async function main() { + if (hasFlag('analyze')) return analyze(); + if (hasFlag('execute')) return execute(); + console.error('specify --analyze or --execute'); + process.exit(1); +} + +main() + .then(() => process.exit(0)) + .catch((err) => { + console.error('fatal error:', err); + process.exit(1); + }); diff --git a/types/spam.ts b/types/spam.ts index e27b86194..fb762b4bc 100644 --- a/types/spam.ts +++ b/types/spam.ts @@ -173,4 +173,10 @@ export type UserSpamTagFields = { userName: string; at: string; }[]; + automatedScan?: { + score: number; + signals: string[]; + signalHits?: { name: string; score: number; evidence: string[] }[]; + scannedAt: string; + }[]; }; diff --git a/utils/jsonArrayWriter.ts b/utils/jsonArrayWriter.ts new file mode 100644 index 000000000..4d989770b --- /dev/null +++ b/utils/jsonArrayWriter.ts @@ -0,0 +1,32 @@ +import * as fs from 'fs'; + +/** + * little utility that writes json objects to a file, keeping the file valid json at all times. + */ +export class JsonArrayWriter { + private fd: number; + private count = 0; + + constructor(path: string) { + this.fd = fs.openSync(path, 'w'); + fs.writeSync(this.fd, '[]'); + } + + push(entry: T): void { + const json = JSON.stringify(entry, null, 2); + const stat = fs.fstatSync(this.fd); + // overwrite the trailing `]` + const pos = stat.size - 1; + const chunk = this.count === 0 ? `\n${json}\n]` : `,\n${json}\n]`; + fs.writeSync(this.fd, chunk, pos); + this.count++; + } + + get length(): number { + return this.count; + } + + close(): void { + fs.closeSync(this.fd); + } +}