-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf_parser.py
More file actions
25 lines (22 loc) · 807 Bytes
/
pdf_parser.py
File metadata and controls
25 lines (22 loc) · 807 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import os
from PyPDF2 import PdfReader
def extract_text_from_pdf(pdf_path: str) -> str:
"""Extracts text from a PDF file."""
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() or ""
return text
def save_text_to_file(text: str, output_path: str):
"""Saves text to a .txt file."""
with open(output_path, 'w', encoding='utf-8') as f:
f.write(text)
if __name__ == "__main__":
# Example of use
pdf_file = "docs/sample.pdf"
if os.path.exists(pdf_file):
text = extract_text_from_pdf(pdf_file)
save_text_to_file(text, "docs/sample.txt")
print("The text has been extracted and saved. в docs/sample.txt")
else:
print("File not found. Put it down sample.pdf to the folder docs/")