diff --git a/.changeset/curly-quote-capitalization.md b/.changeset/curly-quote-capitalization.md new file mode 100644 index 0000000..56e70d6 --- /dev/null +++ b/.changeset/curly-quote-capitalization.md @@ -0,0 +1,9 @@ +--- +"@fujocoded/remark-capitalize-titles": patch +--- + +Capitalize words correctly when a heading contains curly quotes (as produced by +`remark-smartypants`). Previously, the upstream `title` library only recognized +straight quotes as punctuation, so a word following a curly `“` or `’` would +stay lowercase. The plugin now converts curly quotes to straight quotes before +title-casing, then restores the original curly characters in the output. diff --git a/.changeset/hyphenated-compound-tails.md b/.changeset/hyphenated-compound-tails.md new file mode 100644 index 0000000..4540a48 --- /dev/null +++ b/.changeset/hyphenated-compound-tails.md @@ -0,0 +1,12 @@ +--- +"@fujocoded/remark-capitalize-titles": minor +--- + +Lowercase the second-and-later segments of a hyphenated compound during title +casing, so output follows AP-style ("Three-way Merges", "Pre-commit Hooks", +"Up-to-date") instead of capitalizing every segment ("Three-Way", "Pre-Commit", +"Up-To-Date"). A segment is kept capitalized when either the full compound or +the individual segment is listed in `special`. + +This is a breaking change for callers that expected every segment of a +hyphenated word to be capitalized. diff --git a/package.json b/package.json index 472c9cb..5182a1a 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,9 @@ "lint:fix": "oxlint --type-aware --fix", "lint:ci": "oxlint --type-aware --deny-warnings", "format": "oxfmt && prettier --write \"**/*.astro\"", - "format:check": "oxfmt --check && prettier --check \"**/*.astro\"" + "format:check": "oxfmt --check && prettier --check \"**/*.astro\"", + "check": "npm run format:check && npm run lint:ci && npm run sherif && npm run typecheck && npm run build", + "fix": "npm run format && npm run lint:fix" }, "devDependencies": { "@changesets/cli": "^2.29.8", diff --git a/remark-capitalize-titles/index.ts b/remark-capitalize-titles/index.ts index 9269428..7b5bcc7 100644 --- a/remark-capitalize-titles/index.ts +++ b/remark-capitalize-titles/index.ts @@ -7,13 +7,59 @@ import type { MdxJsxFlowElement } from "mdast-util-mdx-jsx"; import { DEFAULT_CAPITALIZATIONS as DEFAULT_CAPITALIZATIONS_ } from "./capitalizations.ts"; +// Astro's MDX integration runs remark-smartypants before user plugins, so +// straight quotes arrive here as curly. The `title` library's regex only lists +// straight quotes as punctuation, so curly quotes would otherwise prevent the +// next word from being capitalized. +const CURLY_TO_STRAIGHT: Record = { + "\u201C": '"', + "\u201D": '"', + "\u2018": "'", + "\u2019": "'", +}; +const CURLY_QUOTE_REGEX = /[\u201C\u201D\u2018\u2019]/g; + +// Matches a hyphenated compound like "Three-Way" or "Up-To-Date" so the +// second-and-later segments can be lowercased (AP-style: "Three-way"). +const HYPHENATED_COMPOUND_REGEX = /[A-Za-z][A-Za-z']*(?:-[A-Za-z][A-Za-z']*)+/g; + +const lowercaseHyphenatedTails = (text: string, special: string[]) => + text.replace(HYPHENATED_COMPOUND_REGEX, (match) => { + if (special.includes(match)) return match; + const parts = match.split("-"); + return parts + .map((part, index) => { + if (index === 0) return part; + if (special.includes(part)) return part; + return part.charAt(0).toLowerCase() + part.slice(1); + }) + .join("-"); + }); + const title = (...params: Parameters) => { const [text, options] = params; - const textChunks = text.split(")"); - const intermediateTitle = textChunks - .map((title) => libraryTitle(title, options)) + const curlyPositions: Array<[number, string]> = []; + const normalized = text.replace( + CURLY_QUOTE_REGEX, + (match, offset: number) => { + curlyPositions.push([offset, match]); + return CURLY_TO_STRAIGHT[match] ?? match; + }, + ); + const textChunks = normalized.split(")"); + const titleCased = textChunks + .map((chunk) => libraryTitle(chunk, options)) .join(")"); - return intermediateTitle; + const intermediateTitle = lowercaseHyphenatedTails( + titleCased, + options?.special ?? [], + ); + if (curlyPositions.length === 0) return intermediateTitle; + const chars = intermediateTitle.split(""); + for (const [offset, original] of curlyPositions) { + chars[offset] = original; + } + return chars.join(""); }; type PluginArgs = { special: string[]; componentNames: string[] }; diff --git a/remark-capitalize-titles/package.json b/remark-capitalize-titles/package.json index 17983e4..e93977c 100644 --- a/remark-capitalize-titles/package.json +++ b/remark-capitalize-titles/package.json @@ -38,6 +38,7 @@ "scripts": { "build": "tsup index.ts --format cjs,esm --dts --clean", "validate": " npx publint", + "test": "vitest", "typecheck": "tsc --noEmit" }, "dependencies": { @@ -51,6 +52,7 @@ "remark": "^15.0.1", "tsup": "^8.1.0", "typescript": "^5.5.2", - "unified": "^11.0.4" + "unified": "^11.0.4", + "vitest": "^3.0.5" } } diff --git a/remark-capitalize-titles/tests/index.test.ts b/remark-capitalize-titles/tests/index.test.ts new file mode 100644 index 0000000..c31bb73 --- /dev/null +++ b/remark-capitalize-titles/tests/index.test.ts @@ -0,0 +1,151 @@ +import { describe, expect, test } from "vitest"; +import { remark } from "remark"; +import type { Compatible } from "vfile"; +import remarkCapitalizeTitles from "../index.ts"; + +const processMarkdown = async (value: Compatible) => { + const file = await remark().use(remarkCapitalizeTitles).process(value); + return file.toString().slice(0, -1); +}; + +describe("Handles the basics", () => { + test("title-cases a simple heading", async () => { + expect( + await processMarkdown("# cloning: not just for mad scientists"), + ).toBe("# Cloning: Not Just for Mad Scientists"); + }); + + test("leaves non-heading text untouched", async () => { + expect( + await processMarkdown( + "git mindwiped: fully discarding changes\n\n# git mindwiped: fully discarding changes", + ), + ).toBe( + "git mindwiped: fully discarding changes\n\n# Git Mindwiped: Fully Discarding Changes", + ); + }); +}); + +describe("Preserves special cases", () => { + test("preserves GitHub, FujoCoded, LLC, and NPM", async () => { + expect( + await processMarkdown( + "## merging with github's interface: pull requests", + ), + ).toBe("## Merging with GitHub's Interface: Pull Requests"); + expect(await processMarkdown("### an intro to fujocoded llc")).toBe( + "### An Intro to FujoCoded LLC", + ); + expect(await processMarkdown("## next up: building with npm")).toBe( + "## Next Up: Building with NPM", + ); + }); + + test("preserves SHA", async () => { + expect(await processMarkdown("#### sha: your commit's unique name")).toBe( + "#### SHA: Your Commit's Unique Name", + ); + }); + + test("preserves TL;DR", async () => { + expect(await processMarkdown("# tl;dr: why this matters")).toBe( + "# TL;DR: Why This Matters", + ); + }); +}); + +describe("Respects tricky punctuations", () => { + test("handles curly quotes (as produced by smartypants)", async () => { + expect( + await processMarkdown( + "#### “answer me, darling~”: git & github’s connection check!", + ), + ).toBe("#### “Answer Me, Darling~”: Git & GitHub’s Connection Check!"); + }); + + test("handles apostrophes inside a word", async () => { + expect( + await processMarkdown("## git'ing good: more commit scenarios"), + ).toBe("## Git'ing Good: More Commit Scenarios"); + }); + + test("handles a trailing question mark with an inner apostrophe", async () => { + expect(await processMarkdown("## i'm ready to practice, now what?")).toBe( + "## I'm Ready to Practice, Now What?", + ); + }); + + test("handles repeated punctuation (???)", async () => { + expect(await processMarkdown("### step ???: git advanced")).toBe( + "### Step ???: Git Advanced", + ); + }); + + test("handles a leading ellipsis", async () => { + expect(await processMarkdown("### ...and more!")).toBe("### ...and More!"); + }); + + test("handles parenthesized possessives", async () => { + expect( + await processMarkdown("### traveling through (your code's) history"), + ).toBe("### Traveling Through (Your Code's) History"); + }); +}); + +describe("Handles inline code spans", () => { + test("handles an inline code span inside a heading", async () => { + expect( + await processMarkdown( + "### the flavors of `git reset`: soft, hard, or mixed", + ), + ).toBe("### The Flavors of `git reset`: Soft, Hard, or Mixed"); + }); + + test("handles multiple inline code spans with separators", async () => { + expect(await processMarkdown("## git & github's `push`/`pull` dance")).toBe( + "## Git & GitHub's `push`/`pull` Dance", + ); + }); + + test("handles punctuation immediately following an inline code span", async () => { + expect( + await processMarkdown("## multiverse collapse: prepare to `merge`!"), + ).toBe("## Multiverse Collapse: Prepare to `merge`!"); + }); + + test("handles a comma-separated list of inline code spans with a hyphenated term", async () => { + expect( + await processMarkdown( + "#### the jokes write themselves: `ours`, `theirs`, and three-way merges", + ), + ).toBe( + "#### The Jokes Write Themselves: `ours`, `theirs`, and Three-way Merges", + ); + }); +}); + +describe("Handles hyphenated compound words", () => { + test("keeps the second word lowercase in a hyphenated name with a possessive", async () => { + expect( + await processMarkdown("### our toy project: boba-tan's sexyman shrine"), + ).toBe("### Our Toy Project: Boba-tan's Sexyman Shrine"); + }); + + test("handles hyphenated words alongside a slash separator", async () => { + expect(await processMarkdown("## push/pull: git's memory-sync dance")).toBe( + "## Push/pull: Git's Memory-sync Dance", + ); + }); + + test("handles common hyphenated prefixes", async () => { + expect( + await processMarkdown("### pre-commit hooks for post-merge cleanups"), + ).toBe("### Pre-commit Hooks for Post-merge Cleanups"); + }); + + test("handles hyphenated phrases with small words inside", async () => { + expect(await processMarkdown("## up-to-date and ready-to-merge")).toBe( + "## Up-to-date and Ready-to-merge", + ); + }); +});