diff --git a/.gitignore b/.gitignore index 1ca77ccb2049..039402eeb513 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ node_modules/ mytarball.tar -mytarball.tar.gz \ No newline at end of file +mytarball.tar.gz +.git/ +sqlite_files/ +compressed_hadith_sqlite/ \ No newline at end of file diff --git a/README.md b/README.md index aa7e96e9a664..07d8a6836869 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ - No Rate limits - Multiple Languages - Multiple Grades +- **Offline Support**: SQLite databases with FTS5 search (new) **URL Structure:** @@ -57,6 +58,13 @@ The above formats also work for fallback i.e if `.min.json` link fails, you can > Get Section 7:
[https://cdn.jsdelivr.net/gh/fawazahmed0/hadith-api@1/editions/eng-abudawud/sections/7.json](https://cdn.jsdelivr.net/gh/fawazahmed0/hadith-api@1/editions/eng-abudawud/sections/7.json "https://cdn.jsdelivr.net/gh/fawazahmed0/hadith-api@1/editions/eng-abudawud/sections/7.json") +- **SQLite Offline API** (Beta)
+> Optimized for mobile/offline apps with FTS5 search. +> [Documentation & Interactive Guide](https://ismailhosenismailjames.github.io/hadith-api/ "Documentation")
+> **Hosted Compressed Files:** [https://ismailhosenismailjames.github.io/compressed_hadith_sqlite/](https://ismailhosenismailjames.github.io/compressed_hadith_sqlite/)
+> **Public Repo:** [https://github.com/IsmailHosenIsmailJames/compressed_hadith_sqlite](https://github.com/IsmailHosenIsmailJames/compressed_hadith_sqlite)
+> Master Metadata: `compressed_hadith_sqlite/all_info.json` + - `/info`
> Get all the details about hadith book, such as hadith grades, books reference etc
@@ -65,6 +73,12 @@ The above formats also work for fallback i.e if `.min.json` link fails, you can ### Contribution: Without your contribution, this work won't survive, whenever you find any issue, please let me [Know](https://github.com/fawazahmed0/hadith-api/issues/new "Know"), so that I can also fix it and people can benefit from it, incase of any question, issue or problems etc
you can let me [Know](https://github.com/fawazahmed0/hadith-api/issues/new "Know") +#### Generating SQLite Databases +If you have updated the translations and want to regenerate the SQLite databases: +1. Run `python3 json_to_sqlite.py` (Generates relational DBs with FTS5). +2. Run `python3 compress_and_info.py` (Organizes, zips, and updates metadata). +3. The output will be in `compressed_hadith_sqlite/`. + - Please help by adding new translations to this repo, you can share me the translation [here](https://github.com/fawazahmed0/hadith-api/issues/new "here") or diff --git a/compress_and_info.py b/compress_and_info.py new file mode 100644 index 000000000000..8c815d1778f9 --- /dev/null +++ b/compress_and_info.py @@ -0,0 +1,110 @@ +import os +import json +import sqlite3 +import zipfile +import hashlib +from typing import Dict, List, Any + +def get_sha256(file_path: str) -> str: + """Calculates the SHA256 checksum of a file.""" + sha256_hash = hashlib.sha256() + with open(file_path, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + +def get_db_metadata(db_path: str) -> Dict[str, Any]: + """Extracts metadata from the SQLite database.""" + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get book info + cursor.execute("SELECT book_name, hadith_count FROM book_info LIMIT 1") + book_row = cursor.fetchone() + book_name = book_row[0] if book_row else "Unknown" + hadith_count = book_row[1] if book_row else 0 + + # Get section count + cursor.execute("SELECT COUNT(*) FROM sections") + section_count = cursor.fetchone()[0] + + conn.close() + return { + "name": book_name, + "hadith_count": hadith_count, + "section_count": section_count + } + +def compress_file(src_path: str, dest_path: str): + """Compresses a file into a ZIP archive.""" + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + with zipfile.ZipFile(dest_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + zipf.write(src_path, os.path.basename(src_path)) + +def process_all(): + src_dir = "sqlite_files" + dest_base_dir = "compressed_hadith_sqlite" + + if not os.path.exists(src_dir): + print(f"Error: {src_dir} not found.") + return + + if not os.path.exists(dest_base_dir): + os.makedirs(dest_base_dir) + + db_files = [f for f in os.listdir(src_dir) if f.endswith(".sqlite")] + languages: Dict[str, List[Dict[str, Any]]] = {} + + for db_file in db_files: + # Assuming filename format is [lang]-[edition].sqlite + lang = db_file.split('-')[0] + db_path = os.path.join(src_dir, db_file) + + print(f"Processing {db_file} (Language: {lang})...") + + # 1. Get metadata and checksum + metadata = get_db_metadata(db_path) + checksum = get_sha256(db_path) + + # 2. Compress + zip_filename = f"{db_file}.zip" + zip_path = os.path.join(dest_base_dir, lang, zip_filename) + compress_file(db_path, zip_path) + + # 3. Add to language info + if lang not in languages: + languages[lang] = [] + + languages[lang].append({ + "book": db_file.replace(".sqlite", ""), + "name": metadata["name"], + "hadith_count": metadata["hadith_count"], + "section_count": metadata["section_count"], + "checksum": checksum, + "zip_path": f"{lang}/{zip_filename}", + "file_size": os.path.getsize(db_path), + "zip_size": os.path.getsize(zip_path) + }) + + # 4. Generate info.json for each language + for lang, books in languages.items(): + lang_dir = os.path.join(dest_base_dir, lang) + info_json_path = os.path.join(lang_dir, "info.json") + + with open(info_json_path, "w", encoding="utf-8") as f: + json.dump({ + "language": lang, + "books": books + }, f, indent=4, ensure_ascii=False) + + print(f"Generated info.json for {lang} in {lang_dir}") + + # 5. Generate master info.json for all languages + master_info_path = os.path.join(dest_base_dir, "all_info.json") + with open(master_info_path, "w", encoding="utf-8") as f: + json.dump(languages, f, indent=4, ensure_ascii=False) + print(f"Generated master info file: {master_info_path}") + +if __name__ == "__main__": + process_all() + print("Done!") diff --git a/json_to_sqlite.py b/json_to_sqlite.py new file mode 100644 index 000000000000..1a0ee424db0e --- /dev/null +++ b/json_to_sqlite.py @@ -0,0 +1,208 @@ +import json +import sqlite3 +import os +from dataclasses import dataclass +from typing import List, Optional + +@dataclass +class HadithSection: + section_name: str + start_hadith_number: int + end_hadith_number: int + hadith_count: int + +@dataclass +class HadithGrade: + name: str + grade: str + +@dataclass +class HadithReference: + book: int + hadith: int + +@dataclass +class Hadith: + hadith_number: int + text: str + grades: List[HadithGrade] + reference: HadithReference + +def create_database(db_path: str): + """Creates the SQLite database with the required 4-table schema and FTS5.""" + if os.path.exists(db_path): + os.remove(db_path) + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # 1. Hadith sections info + cursor.execute(''' + CREATE TABLE sections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + section_name TEXT, + start_hadith_number INTEGER, + end_hadith_number INTEGER, + hadith_count INTEGER + ) + ''') + + # 2. For All hadith table + cursor.execute(''' + CREATE TABLE hadiths ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + hadith_number INTEGER, + text TEXT, + section_id INTEGER, + book_id INTEGER, + FOREIGN KEY (section_id) REFERENCES sections (id) + ) + ''') + + # 3. a grades table + cursor.execute(''' + CREATE TABLE grades ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + hadith_id INTEGER, + scholar_name TEXT, + grade TEXT, + FOREIGN KEY (hadith_id) REFERENCES hadiths (id) + ) + ''') + + # 4. basic info of hadith book + cursor.execute(''' + CREATE TABLE book_info ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + book_name TEXT, + hadith_count INTEGER + ) + ''') + + # FTS5 Optimization for search + cursor.execute(''' + CREATE VIRTUAL TABLE hadiths_fts USING fts5( + text, + content='hadiths', + content_rowid='id' + ) + ''') + + # Triggers to keep FTS index in sync + cursor.execute(''' + CREATE TRIGGER hadiths_ai AFTER INSERT ON hadiths BEGIN + INSERT INTO hadiths_fts(rowid, text) VALUES (new.id, new.text); + END; + ''') + + cursor.execute(''' + CREATE TRIGGER hadiths_ad AFTER DELETE ON hadiths BEGIN + INSERT INTO hadiths_fts(hadiths_fts, rowid, text) VALUES('delete', old.id, old.text); + END; + ''') + + cursor.execute(''' + CREATE TRIGGER hadiths_au AFTER UPDATE ON hadiths BEGIN + INSERT INTO hadiths_fts(hadiths_fts, rowid, text) VALUES('delete', old.id, old.text); + INSERT INTO hadiths_fts(rowid, text) VALUES (new.id, new.text); + END; + ''') + + conn.commit() + return conn + +def get_list_of_hadith_folder(base_path: str) -> List[str]: + list_of_all = os.listdir(base_path) + return [item for item in list_of_all if os.path.isdir(os.path.join(base_path, item))] + +def get_sections_list(hadith_folder_path: str) -> List[str]: + sections_path = os.path.join(hadith_folder_path, "sections") + if not os.path.exists(sections_path): + return [] + list_of_sections_all = os.listdir(sections_path) + # Sort numerically to maintain order if possible + sections = [item for item in list_of_sections_all if item.endswith(".json") and not item.endswith(".min.json") and item != "0.json"] + return sorted(sections, key=lambda x: int(x.split('.')[0])) + +def process_edition(base_path: str, edition_folder: str): + edition_path = os.path.join(base_path, edition_folder) + output_dir = "sqlite_files" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + db_path = os.path.join(output_dir, f"{edition_folder}.sqlite") + print(f"Processing {edition_folder} -> {db_path}...") + + conn = create_database(db_path) + cursor = conn.cursor() + + sections_list = get_sections_list(edition_path) + total_hadiths = 0 + book_name = "" + + for section_file in sections_list: + section_path = os.path.join(edition_path, "sections", section_file) + with open(section_path, "r", encoding='utf-8') as f: + data = json.load(f) + metadata = data["metadata"] + hadiths_data = data["hadiths"] + + if not book_name: + book_name = metadata.get("name", edition_folder) + + # Extract section metadata + section_index = section_file.split('.')[0] + section_name = metadata["section"].get(section_index, "Unknown Section") + section_detail = metadata["section_detail"].get(section_index, {}) + + start_hadith = section_detail.get("hadithnumber_first", 0) + end_hadith = section_detail.get("hadithnumber_last", 0) + # Use arabicnumber_last - arabicnumber_first + 1 if available, otherwise len of hadiths + count = section_detail.get("arabicnumber_last", 0) - section_detail.get("arabicnumber_first", 0) + 1 + if count <= 0: + count = len(hadiths_data) + + # Insert section + cursor.execute(''' + INSERT INTO sections (section_name, start_hadith_number, end_hadith_number, hadith_count) + VALUES (?, ?, ?, ?) + ''', (section_name, start_hadith, end_hadith, count)) + section_id = cursor.lastrowid + + # Insert hadiths and grades + for h in hadiths_data: + hadith_num = h.get("hadithnumber", 0) + text = h.get("text", "") + + cursor.execute(''' + INSERT INTO hadiths (hadith_number, text, section_id, book_id) + VALUES (?, ?, ?, ?) + ''', (hadith_num, text, section_id, 1)) # book_id is dummy for now + hadith_id = cursor.lastrowid + + for g in h.get("grades", []): + cursor.execute(''' + INSERT INTO grades (hadith_id, scholar_name, grade) + VALUES (?, ?, ?) + ''', (hadith_id, g["name"], g["grade"])) + + total_hadiths += 1 + + # Update book info + cursor.execute('INSERT INTO book_info (book_name, hadith_count) VALUES (?, ?)', (book_name, total_hadiths)) + + conn.commit() + conn.close() + print(f"Finished {edition_folder}. Total Hadiths: {total_hadiths}") + +if __name__ == "__main__": + base_path = "editions/" + if not os.path.exists(base_path): + print(f"Error: {base_path} not found.") + exit(1) + + editions = get_list_of_hadith_folder(base_path) + for edition in editions: + try: + process_edition(base_path, edition) + except Exception as e: + print(f"Error processing {edition}: {e}") \ No newline at end of file