@@ -27,20 +27,57 @@ text_group_types:
2727 list : A list of items (ordered or unordered)
2828 other : Anything that doesn't fit the above
2929
30+ section_types :
31+ front_cover : Front cover page (first page only)
32+ inside_cover : Inside cover page
33+ back_cover : Back cover page (last page only)
34+ separator : Separator page between logical sections
35+ credits : Credits and acknowledgements
36+ foreword : Introduction, overview, or author's note
37+ table_of_contents : Table of contents
38+ boxed_text : Text in a box or callout
39+ text_only : Reading section with only text
40+ text_and_single_image : Section with text and a single image
41+ text_and_images : Reading section with text and multiple images
42+ images_only : Section with only images
43+ activity_matching : Matching activity
44+ activity_fill_in_a_table : Table fill-in activity
45+ activity_multiple_choice : Multiple choice activity
46+ activity_true_false : True or false activity
47+ activity_open_ended_answer : Open-ended text response activity
48+ activity_fill_in_the_blank : Fill in the blank activity
49+ activity_sorting : Sorting activity
50+ other : Any other section type
51+
3052metadata :
3153 prompt : metadata_extraction
3254 model : openai:gpt-4o
3355
3456text_classification :
3557 prompt : text_classification
3658 model : openai:gpt-4o
37- concurrency : 5
59+ concurrency : 16
60+
61+ page_sectioning :
62+ prompt : page_sectioning
63+ model : openai:gpt-4o
64+
65+ web_rendering :
66+ prompt : web_generation_html
67+ model : openai:gpt-4o
68+ concurrency : 16
69+ max_retries : 8
3870
3971pruned_text_types :
4072 - header_text
4173 - footer_text
4274 - page_number
4375
76+ pruned_section_types :
77+ - back_cover
78+ - credits
79+ - inside_cover
80+
4481image_filters :
4582 min_side : 100
4683 max_side : 5000
0 commit comments