Skip to content

Commit d1cb5d9

Browse files
committed
Fix author archive parity and pagination
1 parent 37ebe81 commit d1cb5d9

9 files changed

Lines changed: 1118 additions & 323 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"migrate:download:rest": "node scripts/export-wp-rest.mjs",
1212
"migrate:convert:wxr": "node scripts/convert-wxr-to-content.mjs",
1313
"migrate:convert:rest": "node scripts/convert-rest-to-content.mjs",
14+
"migrate:authors:map": "node scripts/build-author-post-paths.mjs",
1415
"migrate:related:legacy": "node scripts/extract-legacy-related-posts.mjs",
1516
"migrate:media:manifest": "node scripts/build-media-manifest.mjs",
1617
"migrate:media:manifest:site": "node scripts/build-wp-content-manifest.mjs",
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
#!/usr/bin/env node
2+
import fs from 'node:fs/promises';
3+
import path from 'node:path';
4+
import process from 'node:process';
5+
import matter from 'gray-matter';
6+
7+
const args = parseArgs(process.argv.slice(2));
8+
const outPath = path.resolve(args.out ?? 'src/data/author-post-paths.json');
9+
const postsDir = path.resolve(args.postsDir ?? 'src/content/posts');
10+
const wxrDir = path.resolve(args.wxrDir ?? 'data/raw');
11+
12+
const AUTHOR_ALIASES = {
13+
Tiffany: 'tiffany',
14+
'alan@rentmoreweeks.com': 'alan',
15+
'Our Discount Desk': 'our-discount-desk',
16+
'Our Travel Reporter': 'our-travel-reporter',
17+
};
18+
19+
const ROUTE_ALIASES = new Map([
20+
['/where-am-i-24-2/', '/where-am-i-24/'],
21+
]);
22+
23+
const localPosts = await loadLocalPosts(postsDir);
24+
const authorPathMap = await buildAuthorPathMap(wxrDir, localPosts);
25+
26+
await fs.mkdir(path.dirname(outPath), { recursive: true });
27+
await fs.writeFile(outPath, `${JSON.stringify(authorPathMap, null, 2)}\n`);
28+
29+
console.log(`Author post paths written: ${outPath}`);
30+
for (const [slug, routes] of Object.entries(authorPathMap)) {
31+
console.log(`- ${slug}: ${routes.length}`);
32+
}
33+
34+
function parseArgs(argv) {
35+
const out = {};
36+
for (let i = 0; i < argv.length; i += 1) {
37+
const arg = argv[i];
38+
if (!arg.startsWith('--')) continue;
39+
const [key, inlineValue] = arg.split('=');
40+
const name = key.slice(2);
41+
if (inlineValue !== undefined) {
42+
out[name] = inlineValue;
43+
continue;
44+
}
45+
const next = argv[i + 1];
46+
if (!next || next.startsWith('--')) {
47+
out[name] = true;
48+
} else {
49+
out[name] = next;
50+
i += 1;
51+
}
52+
}
53+
return out;
54+
}
55+
56+
async function loadLocalPosts(rootDir) {
57+
const files = (await fs.readdir(rootDir))
58+
.filter((entry) => entry.endsWith('.md'))
59+
.map((entry) => path.join(rootDir, entry));
60+
61+
const byWordpressId = new Map();
62+
const byRoute = new Map();
63+
64+
for (const filePath of files) {
65+
const raw = await fs.readFile(filePath, 'utf8');
66+
const { data } = matter(raw);
67+
if (data.status !== 'publish' || data.draft === true) continue;
68+
69+
const wordpressId = String(data.wordpressId ?? '').trim();
70+
const route = normalizeRoutePath(data.path);
71+
const dateValue = toTimestamp(data.date);
72+
if (!route || !dateValue) continue;
73+
74+
const record = {
75+
wordpressId,
76+
route,
77+
date: dateValue,
78+
};
79+
80+
if (wordpressId) byWordpressId.set(wordpressId, record);
81+
byRoute.set(route, record);
82+
}
83+
84+
return { byWordpressId, byRoute };
85+
}
86+
87+
async function buildAuthorPathMap(wxrDir, localPosts) {
88+
const files = (await fs.readdir(wxrDir))
89+
.filter((entry) => /^wordpress-export-posts-\d{4}\.xml$/.test(entry))
90+
.map((entry) => path.join(wxrDir, entry))
91+
.sort();
92+
93+
const pathsBySlug = new Map(Object.values(AUTHOR_ALIASES).map((slug) => [slug, new Map()]));
94+
95+
for (const filePath of files) {
96+
const raw = await fs.readFile(filePath, 'utf8');
97+
for (const item of iterateItems(raw)) {
98+
const slug = AUTHOR_ALIASES[item.creator];
99+
if (!slug) continue;
100+
if (item.postType !== 'post' || item.status !== 'publish') continue;
101+
102+
const sourceRoute = normalizeRoutePath(item.link);
103+
const preferred = localPosts.byWordpressId.get(item.wordpressId);
104+
const fallback = localPosts.byRoute.get(sourceRoute);
105+
const aliasTarget = ROUTE_ALIASES.get(sourceRoute) ?? '';
106+
const aliasResolved = aliasTarget ? localPosts.byRoute.get(aliasTarget) : null;
107+
const resolved = preferred ?? fallback ?? aliasResolved;
108+
if (!resolved) continue;
109+
110+
const recordedRoute =
111+
preferred || fallback || !aliasResolved ? resolved.route : sourceRoute;
112+
113+
pathsBySlug.get(slug).set(recordedRoute, resolved.date);
114+
}
115+
}
116+
117+
return Object.fromEntries(
118+
[...pathsBySlug.entries()].map(([slug, routes]) => {
119+
const orderedRoutes = [...routes.entries()]
120+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
121+
.map(([route]) => route);
122+
return [slug, orderedRoutes];
123+
})
124+
);
125+
}
126+
127+
function *iterateItems(xml) {
128+
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
129+
for (const match of xml.matchAll(itemRegex)) {
130+
const item = match[1];
131+
yield {
132+
creator: decodeXml(extractCdata(item, 'dc:creator')),
133+
status: extractCdata(item, 'wp:status'),
134+
postType: extractCdata(item, 'wp:post_type'),
135+
wordpressId: extractTag(item, 'wp:post_id'),
136+
link: decodeXml(extractTag(item, 'link')),
137+
};
138+
}
139+
}
140+
141+
function extractCdata(source, tagName) {
142+
const match = source.match(new RegExp(`<${escapeRegExp(tagName)}><!\\[CDATA\\[([\\s\\S]*?)\\]\\]><\\/${escapeRegExp(tagName)}>`, 'i'));
143+
return match ? match[1].trim() : '';
144+
}
145+
146+
function extractTag(source, tagName) {
147+
const match = source.match(new RegExp(`<${escapeRegExp(tagName)}>([\\s\\S]*?)<\\/${escapeRegExp(tagName)}>`, 'i'));
148+
return match ? match[1].trim() : '';
149+
}
150+
151+
function escapeRegExp(value) {
152+
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
153+
}
154+
155+
function decodeXml(value) {
156+
return String(value || '')
157+
.replace(/&#038;/g, '&')
158+
.replace(/&#8211;/g, '–')
159+
.replace(/&#8217;/g, '’')
160+
.replace(/&#8220;/g, '“')
161+
.replace(/&#8221;/g, '”')
162+
.replace(/&amp;/g, '&')
163+
.replace(/&quot;/g, '"')
164+
.replace(/&#39;/g, "'")
165+
.replace(/&lt;/g, '<')
166+
.replace(/&gt;/g, '>');
167+
}
168+
169+
function normalizeRoutePath(value) {
170+
const raw = String(value || '')
171+
.replace(/https?:\/\/blog\.hichee\.com/i, '')
172+
.replace(/%ef%bf%bc/gi, '')
173+
.replace(/\uFFFC/g, '')
174+
.trim();
175+
176+
if (!raw) return '';
177+
178+
const withLeadingSlash = raw.startsWith('/') ? raw : `/${raw}`;
179+
return withLeadingSlash.endsWith('/') ? withLeadingSlash : `${withLeadingSlash}/`;
180+
}
181+
182+
function toTimestamp(value) {
183+
if (value instanceof Date) return value.getTime();
184+
const parsed = Date.parse(String(value || ''));
185+
return Number.isFinite(parsed) ? parsed : 0;
186+
}

src/components/PaginationNav.astro

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
---
2+
interface Props {
3+
basePath: string;
4+
currentPage: number;
5+
totalPages: number;
6+
}
7+
8+
const { basePath, currentPage, totalPages } = Astro.props;
9+
const items = totalPages > 1 ? buildItems(currentPage, totalPages) : [];
10+
const previousHref = totalPages > 1 && currentPage > 1 ? pageHref(basePath, currentPage - 1) : null;
11+
const nextHref = totalPages > 1 && currentPage < totalPages ? pageHref(basePath, currentPage + 1) : null;
12+
13+
function buildItems(current: number, total: number) {
14+
const pages = new Set([1, total, current - 1, current, current + 1]);
15+
if (current <= 3) {
16+
pages.add(2);
17+
pages.add(3);
18+
}
19+
if (current >= total - 2) {
20+
pages.add(total - 1);
21+
pages.add(total - 2);
22+
}
23+
24+
const ordered = [...pages]
25+
.filter((page) => page >= 1 && page <= total)
26+
.sort((a, b) => a - b);
27+
28+
const items = [];
29+
let previous = 0;
30+
for (const page of ordered) {
31+
if (previous && page - previous > 1) {
32+
items.push({ type: 'ellipsis', key: `ellipsis-${previous}-${page}` });
33+
}
34+
items.push({ type: 'page', key: `page-${page}`, page });
35+
previous = page;
36+
}
37+
return items;
38+
}
39+
40+
function pageHref(rootPath: string, page: number) {
41+
if (page <= 1) return rootPath;
42+
return `${rootPath}page/${page}/`;
43+
}
44+
---
45+
46+
{
47+
totalPages > 1 && (
48+
<nav class="pagination-nav" aria-label="Author pages">
49+
<div class="pagination-nav__list">
50+
{
51+
previousHref && (
52+
<a class="page-numbers page-numbers--nav" href={previousHref} rel="prev">
53+
Previous
54+
</a>
55+
)
56+
}
57+
58+
{
59+
items.map((item) =>
60+
item.type === 'ellipsis' ? (
61+
<span class="page-numbers page-numbers--ellipsis" aria-hidden="true">
62+
63+
</span>
64+
) : item.page === currentPage ? (
65+
<span class="page-numbers current" aria-current="page">
66+
{item.page}
67+
</span>
68+
) : (
69+
<a class="page-numbers" href={pageHref(basePath, item.page)}>
70+
{item.page}
71+
</a>
72+
)
73+
)
74+
}
75+
76+
{
77+
nextHref && (
78+
<a class="page-numbers page-numbers--nav" href={nextHref} rel="next">
79+
Next
80+
</a>
81+
)
82+
}
83+
</div>
84+
</nav>
85+
)
86+
}

0 commit comments

Comments
 (0)