Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions src/checks/markdown-availability/markdown-url-support.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,41 @@ interface PageResult {
error?: string;
}

/**
* Detect whether the site prefers `page.md` (direct) or `page/index.md` (index)
* based on which candidate succeeded in previous results.
* Returns 'index' if `page/index.md` wins, 'direct' if `page.md` wins, or null if
* there's no clear winner yet.
*/
function detectPreferredMdForm(results: PageResult[]): 'direct' | 'index' | null {
let directWins = 0;
let indexWins = 0;
for (const r of results) {
if (!r.supported || !r.mdUrl) continue;
if (r.mdUrl.endsWith('/index.md') || r.mdUrl.endsWith('/index.mdx')) {
indexWins++;
} else {
directWins++;
}
}
const total = directWins + indexWins;
if (total < 2) return null;
if (indexWins / total >= 0.8) return 'index';
if (directWins / total >= 0.8) return 'direct';
return null;
}

/**
* Reorder toMdUrls() candidates based on the detected site preference.
* 'index' puts `page/index.md` first; 'direct' keeps the default order (`page.md` first).
*/
function orderCandidates(candidates: string[], preference: 'direct' | 'index' | null): string[] {
if (preference === 'index') {
return [...candidates].reverse();
}
return candidates;
}

async function check(ctx: CheckContext): Promise<CheckResult> {
const id = 'markdown-url-support';
const category = 'markdown-availability';
Expand All @@ -27,6 +62,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {

const results: PageResult[] = [];
const concurrency = ctx.options.maxConcurrency;
let mdFormPreference: 'direct' | 'index' | null = null;

for (let i = 0; i < pageUrls.length; i += concurrency) {
const batch = pageUrls.slice(i, i + concurrency);
Expand All @@ -38,8 +74,9 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
return { url, mdUrl: url, supported: false, skipped: true, status: 0 };
}
const alreadyMd = /\.mdx?$/i.test(new URL(url).pathname);
const ordered = orderCandidates(candidates, mdFormPreference);
let lastError: string | undefined;
for (const mdUrl of candidates) {
for (const mdUrl of ordered) {
try {
const response = await ctx.http.fetch(mdUrl);
const body = await response.text();
Expand All @@ -62,7 +99,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
}
return {
url,
mdUrl: candidates[0],
mdUrl: ordered[0],
supported: false,
alreadyMd,
status: 0,
Expand All @@ -71,6 +108,13 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
}),
);
results.push(...batchResults);

// After each batch, re-evaluate the preferred .md URL form.
// Once a clear pattern emerges (80%+ one form), subsequent batches
// try the preferred form first, saving one request per page.
if (mdFormPreference === null) {
mdFormPreference = detectPreferredMdForm(results);
}
}

const testedResults = results.filter((r) => !r.skipped);
Expand Down
70 changes: 70 additions & 0 deletions test/unit/checks/markdown-url-support.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,4 +409,74 @@ describe('markdown-url-support', () => {
expect(cached?.markdown?.content).toBe(mdContent);
expect(cached?.markdown?.source).toBe('md-url');
});

it('auto-detects page/index.md preference and tries it first in later batches', async () => {
// 3 pages, all served at page/index.md (not page.md). With concurrency=1,
// each page is a separate batch, so after page 1+2 the check should
// detect the page/index.md pattern and try it first for page 3.
const md = '# Page\n\nContent here.';
const requestLog: string[] = [];

server.use(
// page.md forms — all 404
http.get('http://test.local/docs/a.md', () => {
requestLog.push('/docs/a.md');
return new HttpResponse('Not found', { status: 404 });
}),
http.get('http://test.local/docs/b.md', () => {
requestLog.push('/docs/b.md');
return new HttpResponse('Not found', { status: 404 });
}),
http.get('http://test.local/docs/c.md', () => {
requestLog.push('/docs/c.md');
return new HttpResponse('Not found', { status: 404 });
}),
// index.md forms — all succeed
http.get('http://test.local/docs/a/index.md', () => {
requestLog.push('/docs/a/index.md');
return new HttpResponse(md, {
status: 200,
headers: { 'Content-Type': 'text/markdown' },
});
}),
http.get('http://test.local/docs/b/index.md', () => {
requestLog.push('/docs/b/index.md');
return new HttpResponse(md, {
status: 200,
headers: { 'Content-Type': 'text/markdown' },
});
}),
http.get('http://test.local/docs/c/index.md', () => {
requestLog.push('/docs/c/index.md');
return new HttpResponse(md, {
status: 200,
headers: { 'Content-Type': 'text/markdown' },
});
}),
);

const content = `# Docs
> Summary
## Links
- [A](http://test.local/docs/a): A
- [B](http://test.local/docs/b): B
- [C](http://test.local/docs/c): C
`;
const ctx = makeCtx({ content });
// Force concurrency=1 so each page is its own batch
ctx.options.maxConcurrency = 1;
const result = await check.run(ctx);

expect(result.status).toBe('pass');

// Pages A and B: tried page.md first (default order), got 404, then page/index.md
// Page C: after detecting page/index.md preference, should try page/index.md first
// So /docs/c.md should NOT appear in the request log
expect(requestLog).toContain('/docs/a.md');
expect(requestLog).toContain('/docs/a/index.md');
expect(requestLog).toContain('/docs/b.md');
expect(requestLog).toContain('/docs/b/index.md');
expect(requestLog).not.toContain('/docs/c.md');
expect(requestLog).toContain('/docs/c/index.md');
});
});