66ALL_PAGES = []
77
88def promote_headings_outside_fences (content ):
9- """Safely promotes headings only when they are outside of code blocks."""
109 in_fence = False
1110 out = []
1211 for line in content .splitlines ():
@@ -20,7 +19,6 @@ def promote_headings_outside_fences(content):
2019 return "\n " .join (out )
2120
2221def prettify_tech_name (name ):
23- """Dynamically formats tech slugs with standard overrides."""
2422 if not name : return ""
2523 overrides = {
2624 "nextjs" : "Next.js" ,
@@ -31,48 +29,92 @@ def prettify_tech_name(name):
3129 }
3230 return overrides .get (name .lower (), name .capitalize ())
3331
32+ def resolve_includes (content , current_dir , docs_dir , visited = None ):
33+ if visited is None :
34+ visited = set ()
35+ include_pattern = re .compile (r'\{%\s*include\s*[\'"](.+?)[\'"]\s*%\}' )
36+ frontmatter_re = re .compile (r'^---.*?---\s*' , re .DOTALL )
37+
38+ def replace_match (match ):
39+ rel_path = match .group (1 )
40+ target = os .path .abspath (os .path .join (docs_dir if rel_path .startswith ('/' ) else current_dir , rel_path .lstrip ('/' )))
41+ if target in visited : return ""
42+ if os .path .exists (target ):
43+ visited .add (target )
44+ with open (target , 'r' , encoding = 'utf-8' ) as f :
45+ body = frontmatter_re .sub ('' , f .read ())
46+ return resolve_includes (body , os .path .dirname (target ), docs_dir , visited )
47+ return ""
48+ return include_pattern .sub (replace_match , content )
49+
50+ def inject_api_spec (content , page_src_path , docs_dir ):
51+ redoc_pattern = re .compile (r'<redoc[^>]*\sspec-url=["\']([^"\']+)["\'][^>]*>.*?</redoc>|<redoc[^>]*\sspec-url=["\']([^"\']+)["\'][^>]*/>' , re .IGNORECASE | re .DOTALL )
52+
53+ def replace_with_yaml (match ):
54+ spec_url = match .group (1 ) or match .group (2 )
55+ if not spec_url : return match .group (0 )
56+
57+ spec_url = re .sub (r'\{\{\s*base_path\s*\}\}' , '' , spec_url )
58+ filename = os .path .basename (spec_url )
59+
60+ # 1. Try standard relative/absolute paths first
61+ page_dir = os .path .dirname (page_src_path )
62+ paths_to_check = [
63+ os .path .abspath (os .path .join (page_dir , spec_url )),
64+ os .path .abspath (os .path .join (docs_dir , spec_url .lstrip ('/' ))),
65+ os .path .abspath (os .path .join (docs_dir , "apis" , "restapis" , filename ))
66+ ]
67+
68+ target_path = next ((p for p in paths_to_check if os .path .exists (p )), None )
69+
70+ # 2. THE NUCLEAR OPTION: If still not found, search the whole project for the filename
71+ if not target_path :
72+ # Search upwards from docs_dir to find the project root if necessary
73+ search_root = os .path .dirname (docs_dir )
74+ for root , dirs , files in os .walk (search_root ):
75+ if filename in files :
76+ target_path = os .path .join (root , filename )
77+ break
78+
79+ if target_path :
80+ try :
81+ with open (target_path , 'r' , encoding = 'utf-8' ) as f :
82+ yaml_content = f .read ()
83+ # Success! Log where we actually found it to help debug
84+ print (f"FOUND: { filename } at { target_path } " )
85+ return f"\n \n ## API Specification (OpenAPI)\n \n ```yaml\n { yaml_content } \n ```\n "
86+ except Exception as e :
87+ return f"\n \n "
88+
89+ print (f"CRITICAL FAILURE: Cannot find { filename } anywhere in { search_root } " )
90+ return f"\n \n "
91+
92+ return redoc_pattern .sub (replace_with_yaml , content )
93+
3494def on_pre_build (config ):
35- """Reset global trackers at the start of every build to prevent accumulation across repeated runs."""
3695 global GENERATED_GUIDES , ALL_PAGES
3796 GENERATED_GUIDES = []
3897 ALL_PAGES = []
3998
4099def on_nav (nav , config , files ):
41- """
42- Scans navigation to identify unique sections within 'complete-guides/'.
43- """
44100 TARGET_PARENT_DIR = "complete-guides/"
45- processed_folders = set ()
101+ processed_paths = set ()
46102
47103 def walk_nav (items ):
48104 for item in items :
49105 if item .is_section :
50106 first_page = find_first_page (item )
51107 if first_page and TARGET_PARENT_DIR in first_page .file .src_path :
52- parts = first_page .file .src_path .split ('/' )
53- try :
54- cg_idx = parts .index ('complete-guides' )
55- if len (parts ) > cg_idx + 1 :
56- folder_name = parts [cg_idx + 1 ]
57-
58- if folder_name not in processed_folders :
59- processed_folders .add (folder_name )
60- tech_slug = folder_name .split ('-' )[0 ]
61- tech_prefix = prettify_tech_name (tech_slug )
62-
63- clean_item_title = item .title
64- if clean_item_title .lower ().startswith (tech_prefix .lower ()):
65- display_title = clean_item_title
66- elif tech_prefix .lower () in clean_item_title .lower ():
67- display_title = clean_item_title
68- else :
69- display_title = f"{ tech_prefix } { clean_item_title } "
70-
71- create_merged_guide (item , folder_name , display_title , config )
72- except ValueError :
73- pass
108+ guide_dir = os .path .dirname (first_page .file .src_path )
109+ if guide_dir != TARGET_PARENT_DIR .rstrip ('/' ) and guide_dir not in processed_paths :
110+ processed_paths .add (guide_dir )
111+ path_parts = guide_dir .split ('/' )
112+ cg_idx = path_parts .index ('complete-guides' )
113+ guide_slug = "-" .join (path_parts [cg_idx + 1 :])
114+ tech_prefix = prettify_tech_name (path_parts [cg_idx + 1 ].split ('-' )[0 ])
115+ display_title = item .title if tech_prefix .lower () in item .title .lower () else f"{ tech_prefix } { item .title } "
116+ create_merged_guide (item , guide_slug , display_title , config )
74117 walk_nav (item .children )
75-
76118 walk_nav (nav )
77119 return nav
78120
@@ -84,129 +126,63 @@ def find_first_page(section):
84126 if res : return res
85127 return None
86128
87- def inject_api_spec (content , page_src_path , docs_dir ):
88- docs_root = os .path .abspath (os .path .normpath (docs_dir ))
89-
90- def _within_docs_dir (path ):
91- abs_path = os .path .abspath (os .path .normpath (path ))
92- return os .path .commonpath ([docs_root ]) == os .path .commonpath ([docs_root , abs_path ])
93-
94- redoc_pattern = re .compile (r'<redoc[^>]*\sspec-url=["\']([^"\']+)["\']' , re .IGNORECASE | re .DOTALL )
95- match = redoc_pattern .search (content )
96- if match :
97- spec_relative_path = match .group (1 )
98- page_dir = os .path .dirname (page_src_path )
99- if spec_relative_path .startswith ('/' ):
100- spec_path = os .path .abspath (os .path .normpath (os .path .join (docs_dir , spec_relative_path .lstrip ('/' ))))
101- elif spec_relative_path .startswith ('../' ):
102- spec_path = os .path .abspath (os .path .normpath (os .path .join (page_dir , spec_relative_path )))
103- else :
104- spec_path = os .path .abspath (os .path .normpath (os .path .join (page_dir , spec_relative_path )))
105- if not _within_docs_dir (spec_path ):
106- print (f"ERROR: spec-url resolves outside docs_dir, skipping: { spec_path } " )
107- return content
108- if not os .path .exists (spec_path ):
109- filename = os .path .basename (spec_relative_path )
110- fallback_path = os .path .abspath (os .path .normpath (os .path .join (docs_dir , "apis" , "restapis" , filename )))
111- if not _within_docs_dir (fallback_path ):
112- print (f"ERROR: fallback path resolves outside docs_dir, skipping: { fallback_path } " )
113- return content
114- if os .path .exists (fallback_path ):
115- spec_path = fallback_path
116- if os .path .exists (spec_path ):
117- try :
118- with open (spec_path , 'r' , encoding = 'utf-8' ) as f :
119- yaml_content = f .read ()
120- return content + f"\n \n ## API Specification (OpenAPI)\n \n ```yaml\n { yaml_content } \n ```\n "
121- except Exception as e :
122- print (f"ERROR: Read failed: { e } " )
123- return content
124-
125- def resolve_includes (content , current_dir , docs_dir , visited = None ):
126- if visited is None :
127- visited = set ()
128- docs_root = os .path .abspath (docs_dir )
129- include_pattern = re .compile (r'\{%\s*include\s*[\'"](.+?)[\'"]\s*%\}' )
130- def replace_match (match ):
131- rel_path = match .group (1 )
132- raw = os .path .join (docs_dir , rel_path .lstrip ('/' )) if rel_path .startswith ('/' ) else os .path .join (current_dir , rel_path )
133- target = os .path .abspath (raw )
134- if os .path .commonpath ([docs_root ]) != os .path .commonpath ([docs_root , target ]):
135- return ""
136- if target in visited : return ""
137- if os .path .exists (target ):
138- visited .add (target )
139- with open (target , 'r' , encoding = 'utf-8' ) as f :
140- return resolve_includes (f .read (), os .path .dirname (target ), docs_dir , visited )
141- return ""
142- return include_pattern .sub (replace_match , content )
143-
144- def create_merged_guide (section_item , folder_name , title , config ):
145- combined_md = f"# { title } Complete Guide\n \n "
129+ def create_merged_guide (section_item , guide_slug , title , config ):
146130 docs_dir = config ['docs_dir' ]
147- first_page = find_first_page ( section_item )
148- if not first_page : return
131+ frontmatter_re = re . compile ( r'^---.*?---\s*' , re . DOTALL )
132+ combined_md = f"# { title } Complete Guide \n \n "
149133
150134 def collect_md (items ):
151135 md_text = ""
152136 for item in items :
153137 if item .is_page :
154138 try :
155139 with open (item .file .abs_src_path , 'r' , encoding = 'utf-8' ) as f :
156- content = f .read ()
157- content = resolve_includes (content , os .path .dirname (item .file .abs_src_path ), docs_dir )
158- content = inject_api_spec (content , item .file .abs_src_path , docs_dir )
159- content = promote_headings_outside_fences (content )
160- md_text += f" \n \n --- \n ## Section: { item . title } \n \n { content } \n "
161- except Exception as e :
162- print (f"Error merging { item .title } : { e } " )
163- elif item .is_section :
140+ body = frontmatter_re . sub ( '' , f .read () )
141+ res = resolve_includes (body , os .path .dirname (item .file .abs_src_path ), docs_dir )
142+ res = inject_api_spec (res , item .file .abs_src_path , docs_dir )
143+ res = promote_headings_outside_fences (res )
144+ if res . strip ():
145+ md_text += f" \n \n --- \n ## Section: { item . title } \n \n { res } \n "
146+ except Exception as e : print (f"Error merging { item .title } : { e } " )
147+ elif item .is_section :
164148 md_text += collect_md (item .children )
165149 return md_text
166150
167151 combined_md += collect_md (section_item .children )
168- full_dest_path = first_page .file .abs_dest_path
169- dest_dir = os .path .join (full_dest_path .split ("complete-guides/" )[0 ], "complete-guides" ) if "complete-guides/" in full_dest_path else os .path .dirname (os .path .dirname (full_dest_path ))
170- dest_path = os .path .join (dest_dir , f"{ folder_name } .md" )
171- rel_url = os .path .relpath (dest_path , config ['site_dir' ])
172-
173- GENERATED_GUIDES .append ({"title" : title , "url" : rel_url , "desc" : f"Comprehensive { title .lower ()} integration" })
152+ dest_path = os .path .join (config ['site_dir' ], "complete-guides" , f"{ guide_slug } .md" )
174153 os .makedirs (os .path .dirname (dest_path ), exist_ok = True )
175154 with open (dest_path , 'w' , encoding = 'utf-8' ) as f : f .write (combined_md )
155+
156+ GENERATED_GUIDES .append ({"title" : title , "url" : f"complete-guides/{ guide_slug } .md" , "desc" : f"Comprehensive { title .lower ()} integration" })
176157
177158def on_post_page (output , page , config ):
178- # Skip index files inside complete-guides
179- if "complete-guides/" in page .file .src_path :
180- parts = page .file .src_path .split ('/' )
181- if len (parts ) > 2 and parts [2 ].startswith ("index" ): return
159+ if "complete-guides/" in page .file .src_path and page .file .src_path .endswith ("index.md" ): return
182160
183161 docs_dir = config ['docs_dir' ]
184- current_file_dir = os .path .dirname (page .file .abs_src_path )
185- content = resolve_includes (page .markdown , current_file_dir , docs_dir )
186- content = inject_api_spec (content , page .file .abs_src_path , docs_dir )
187- content = promote_headings_outside_fences (content )
162+ abs_dest = page .file .abs_dest_path
188163
189- abs_dest_path = page .file .abs_dest_path
190- if config .get ('use_directory_urls' ):
191- current_dir = os .path .dirname (abs_dest_path )
192- parent_dir = os .path .dirname (current_dir )
193- folder_name = os .path .basename (current_dir )
194- is_version = re .match (r'^\d+\.\d+\.\d+$' , folder_name )
195- if folder_name in ['en' , 'next' , 'latest' ] or is_version :
196- dest_path = os .path .join (current_dir , "index.md" )
197- else :
198- dest_path = os .path .join (parent_dir , f"{ folder_name } .md" )
164+ # Handle Directory URLs vs Direct File URLs to fix 404s
165+ if abs_dest .endswith ("index.html" ):
166+ parent_dir = os .path .dirname (os .path .dirname (abs_dest ))
167+ folder_name = os .path .basename (os .path .dirname (abs_dest ))
168+ dest_path = os .path .join (parent_dir , f"{ folder_name } .md" )
199169 else :
200- dest_path = os .path .splitext (abs_dest_path )[0 ] + ".md"
170+ dest_path = os .path .splitext (abs_dest )[0 ] + ".md"
201171
202172 os .makedirs (os .path .dirname (dest_path ), exist_ok = True )
203- with open (dest_path , 'w' , encoding = 'utf-8' ) as f : f .write (content )
204173
174+ content = resolve_includes (page .markdown , os .path .dirname (page .file .abs_src_path ), docs_dir )
175+ content = inject_api_spec (content , page .file .abs_src_path , docs_dir )
176+ content = promote_headings_outside_fences (content )
177+
178+ with open (dest_path , 'w' , encoding = 'utf-8' ) as f : f .write (content )
179+
205180 rel_url = os .path .relpath (dest_path , config ['site_dir' ])
206181 if not any (p ["url" ] == rel_url for p in ALL_PAGES ):
207182 ALL_PAGES .append ({"title" : page .title , "url" : rel_url })
208183
209184def on_post_build (config ):
185+ # llms.txt and llms-full.txt generation
210186 llms_path = os .path .join (config ['site_dir' ], "llms.txt" )
211187 lines = [
212188 "# WSO2 Identity Server Documentation" ,
@@ -216,20 +192,15 @@ def on_post_build(config):
216192 "## Complete Integration Guides (Flattened)" ,
217193 "End-to-end framework-specific implementation guides with all details:" ,
218194 ]
219-
220- seen_urls = set ()
221- for guide in sorted (GENERATED_GUIDES , key = lambda x : x ['title' ]):
222- if guide ['url' ] not in seen_urls :
223- lines .append (f"- [{ guide ['title' ]} Complete Guide](./{ guide ['url' ]} ) - { guide ['desc' ]} " )
224- seen_urls .add (guide ['url' ])
195+ for g in sorted (GENERATED_GUIDES , key = lambda x : x ['title' ]):
196+ lines .append (f"- [{ g ['title' ]} Complete Guide](./{ g ['url' ]} ) - { g ['desc' ]} " )
225197
226198 lines .extend (["" , "---" , "## Site Map" , "- [Comprehensive file index for advanced discovery](./llms-full.txt)" ])
227-
228199 with open (llms_path , "w" , encoding = "utf-8" ) as f : f .write ("\n " .join (lines ))
229200
230201 full_path = os .path .join (config ['site_dir' ], "llms-full.txt" )
231202 full_lines = ["# WSO2 Identity Server - Full Document Index" , "" ]
232- for page in sorted (ALL_PAGES , key = lambda x : x ['url' ]):
233- full_lines .append (f"- [{ page ['title' ]} ](./{ page ['url' ]} )" )
203+ for p in sorted (ALL_PAGES , key = lambda x : x ['url' ]):
204+ full_lines .append (f"- [{ p ['title' ]} ](./{ p ['url' ]} )" )
234205 with open (full_path , "w" , encoding = "utf-8" ) as f : f .write ("\n " .join (full_lines ))
235206 print (f"SUCCESS - llms.txt and llms-full.txt generated." )
0 commit comments