-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
101 lines (88 loc) · 3.26 KB
/
test.py
File metadata and controls
101 lines (88 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import re
import pysrt
from openpyxl import Workbook
import os
# Function to detect if a word is too long
def word_too_long(text):
return any(len(word) > 13 for word in text.split())
# Function to detect if there is only one word
def one_word(text):
return len(text.split()) == 1
# Function to detect three successive identical letters
def three_successive_letters(text):
return bool(re.search(r'\b(\w)\1{2,}\b', text))
# Function to detect a single letter (except 'و')
def one_letter(text):
return bool(re.search(r'\b(?![و])\w\b', text))
# Function to detect certain keywords related to numbers
def maybe_numbers_missing(text):
return bool(re.search(r'\b(رقم|عدد|عنوان|جنيه|ريال)\b', text))
# Function to detect repetitions within a phrase
def repetition(text):
return bool(re.search(r'\b(\S+(\s+\S+)*)\s+\1\b', text))
def detect_alerts(subs):
alerts = []
for sub in subs:
if word_too_long(sub.text):
alerts.append({
'Alert Type': 'Word too long',
'Phrase': sub.text,
'Timecode': f"{sub.start} --> {sub.end}"
})
if one_word(sub.text):
alerts.append({
'Alert Type': 'One word',
'Phrase': sub.text,
'Timecode': f"{sub.start} --> {sub.end}"
})
if three_successive_letters(sub.text):
alerts.append({
'Alert Type': 'Three successive letters',
'Phrase': sub.text,
'Timecode': f"{sub.start} --> {sub.end}"
})
if one_letter(sub.text):
alerts.append({
'Alert Type': 'One letter',
'Phrase': sub.text,
'Timecode': f"{sub.start} --> {sub.end}"
})
if maybe_numbers_missing(sub.text):
alerts.append({
'Alert Type': 'Maybe numbers are missing',
'Phrase': sub.text,
'Timecode': f"{sub.start} --> {sub.end}"
})
if repetition(sub.text):
alerts.append({
'Alert Type': 'Repetition',
'Phrase': sub.text,
'Timecode': f"{sub.start} --> {sub.end}"
})
return alerts
def generate_excel(alerts):
wb = Workbook()
ws = wb.active
ws.append(['Alert Type', 'Phrase', 'Timecode'])
for alert in alerts:
ws.append([alert['Alert Type'], alert['Phrase'], alert['Timecode']])
excel_file_name = "alerts.xlsx"
wb.save(excel_file_name)
print(f"Alerts have been saved to '{excel_file_name}'.")
def main():
file_path = 'path\to\youre\data.srt'
try:
if not os.path.isfile(file_path):
raise FileNotFoundError(f"The file '{file_path}' could not be found.")
subs = pysrt.open(file_path, encoding='utf-8')
if subs:
alerts = detect_alerts(subs)
generate_excel(alerts)
else:
print(f"Error: No subtitles found in '{file_path}'.")
except FileNotFoundError as e:
print(f"Error: {e}")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()