Skip to content

Commit 1b816cc

Browse files
committed
Simpler type hints! and simplifying requirements
1 parent d597b3d commit 1b816cc

3 files changed

Lines changed: 22 additions & 24 deletions

File tree

ioc_finder/ioc_finder.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,18 @@
22

33
import json
44
import urllib.parse as urlparse
5-
from typing import Callable, Dict, Iterable, List, Mapping, Union
5+
from collections.abc import Callable, Iterable, Mapping
66

77
import click
88
import ioc_fanger
9-
from d8s_strings import string_remove_from_end
109
from pyparsing import ParseResults
1110

1211
from ioc_finder import ioc_grammars
1312

14-
IndicatorList = List[str]
15-
IndicatorDict = Dict[str, IndicatorList]
13+
IndicatorList = list[str]
14+
IndicatorDict = dict[str, IndicatorList]
1615
# using `Mapping` b/c it is covariant (https://mypy.readthedocs.io/en/stable/generics.html#variance-of-generic-types)
17-
IndicatorData = Mapping[str, Union[IndicatorList, IndicatorDict]]
16+
IndicatorData = Mapping[str, IndicatorList | IndicatorDict]
1817

1918
DEFAULT_IOC_TYPES = [
2019
"asns",
@@ -49,17 +48,17 @@
4948
]
5049

5150

52-
def _deduplicate(indicator_list: Iterable) -> List:
51+
def _deduplicate(indicator_list: Iterable) -> list:
5352
"""Deduplicate the list of observables."""
5453
return list(set(indicator_list))
5554

5655

57-
def _listify(indicator_list: ParseResults) -> List:
56+
def _listify(indicator_list: ParseResults) -> list:
5857
"""Convert the multi-dimensional list into a one-dimensional list with empty entries and duplicates removed."""
5958
return _deduplicate([indicator[0] for indicator in indicator_list if indicator[0]])
6059

6160

62-
def _remove_items(items: List[str], text: str) -> str:
61+
def _remove_items(items: list[str], text: str) -> str:
6362
"""Remove each item from the text."""
6463
for item in items:
6564
text = text.replace(item, " ")
@@ -98,13 +97,13 @@ def _clean_url(url: str) -> str:
9897
url = url.rstrip('"').rstrip("'")
9998

10099
# remove `'/>` and `"/>` from the end of a URL (this character string occurs at the end of an HMTL tag with )
101-
url = string_remove_from_end(url, "'/>")
102-
url = string_remove_from_end(url, '"/>')
100+
url = url.removesuffix("'/>")
101+
url = url.removesuffix('"/>')
103102

104103
return url
105104

106105

107-
def parse_urls(text: str, *, parse_urls_without_scheme: bool = True) -> List:
106+
def parse_urls(text: str, *, parse_urls_without_scheme: bool = True) -> list:
108107
"""."""
109108
if parse_urls_without_scheme:
110109
url_parse_results = ioc_grammars.scheme_less_url.searchString(text)
@@ -118,15 +117,15 @@ def parse_urls(text: str, *, parse_urls_without_scheme: bool = True) -> List:
118117
return _deduplicate(clean_urls)
119118

120119

121-
def _remove_url_domain_name(urls: List, text) -> str:
120+
def _remove_url_domain_name(urls: list, text: str) -> str:
122121
"""Remove the domain name of each url from the text."""
123122
for url in urls:
124123
parsed_url = ioc_grammars.scheme_less_url.parseString(url)
125124
text = text.replace(parsed_url.url_authority, " ")
126125
return text
127126

128127

129-
def _remove_url_paths(urls: List, text: str) -> str:
128+
def _remove_url_paths(urls: list, text: str) -> str:
130129
"""Remove the path of each url from the text."""
131130
for url in urls:
132131
parsed_url = ioc_grammars.scheme_less_url.parseString(url)
@@ -139,7 +138,7 @@ def _remove_url_paths(urls: List, text: str) -> str:
139138
return text
140139

141140

142-
def _percent_decode_url(urls: List, text: str) -> str:
141+
def _percent_decode_url(urls: list, text: str) -> str:
143142
for url in urls:
144143
text = text.replace(url, urlparse.unquote_plus(url))
145144
return text
@@ -163,20 +162,20 @@ def parse_ipv6_addresses(text):
163162
return _listify(addresses)
164163

165164

166-
def parse_complete_email_addresses(text: str) -> List:
165+
def parse_complete_email_addresses(text: str) -> list:
167166
"""."""
168167
email_addresses = ioc_grammars.complete_email_address.searchString(text)
169168
return _listify(email_addresses)
170169

171170

172-
def parse_email_addresses(text: str) -> List:
171+
def parse_email_addresses(text: str) -> list:
173172
"""."""
174173
email_addresses = ioc_grammars.email_address.searchString(text)
175174
return _listify(email_addresses)
176175

177176

178177
# there is a trailing underscore on this function to differentiate it from the argument with the same name
179-
def parse_imphashes_(text: str) -> List:
178+
def parse_imphashes_(text: str) -> list:
180179
"""."""
181180
full_imphash_instances = _listify(ioc_grammars.imphash.searchString(text.lower()))
182181

@@ -189,7 +188,7 @@ def parse_imphashes_(text: str) -> List:
189188

190189

191190
# there is a trailing underscore on this function to differentiate it from the argument with the same name
192-
def parse_authentihashes_(text: str) -> List:
191+
def parse_authentihashes_(text: str) -> list:
193192
"""."""
194193
full_authentihash_instances = _listify(ioc_grammars.authentihash.searchString(text.lower()))
195194

@@ -243,7 +242,7 @@ def parse_cves(text):
243242
return _listify(cves)
244243

245244

246-
def parse_ipv4_cidrs(text: str) -> List:
245+
def parse_ipv4_cidrs(text: str) -> list:
247246
"""."""
248247
cidrs = ioc_grammars.ipv4_cidr.searchString(text)
249248
return _listify(cidrs)
@@ -294,13 +293,13 @@ def parse_monero_addresses(text):
294293
return _listify(monero_addresses)
295294

296295

297-
def parse_xmpp_addresses(text: str) -> List:
296+
def parse_xmpp_addresses(text: str) -> list:
298297
"""."""
299298
xmpp_addresses = ioc_grammars.xmpp_address.searchString(text)
300299
return _listify(xmpp_addresses)
301300

302301

303-
def _remove_xmpp_local_part(xmpp_addresses: List, text: str) -> str:
302+
def _remove_xmpp_local_part(xmpp_addresses: list, text: str) -> str:
304303
"""Remove the local part of each xmpp_address from the text."""
305304
for address in xmpp_addresses:
306305
text = text.replace(address.split("@")[0] + "@", " ")
@@ -452,7 +451,7 @@ def find_iocs( # noqa: CCR001 pylint: disable=R0912,R0915
452451
parse_urls_without_scheme: bool = True,
453452
parse_imphashes: bool = True,
454453
parse_authentihashes: bool = True,
455-
included_ioc_types: List[str] = DEFAULT_IOC_TYPES,
454+
included_ioc_types: list[str] = DEFAULT_IOC_TYPES,
456455
) -> IndicatorData:
457456
"""Find observables (a.k.a. indicators of compromise) in the given text."""
458457
iocs = {}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ max-line-length = 120
1010
disable = "C0114, R1705, C0103"
1111

1212
[tool.pytest.ini_options]
13-
addopts = "-n auto -vv --cov=. --cov-report term-missing --cov-fail-under 95"
13+
addopts = "-n auto -vv --cov=. --cov-report term-missing --cov-fail-under 100"
1414
python_files = "tests/test_*.py"
1515

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
click>=7.1.2,<9.0
22
ioc-fanger>=4.2.1,<4.3
33
pyparsing>=3.0,<=3.1.1
4-
d8s-strings>=0.5.0,<1.0

0 commit comments

Comments
 (0)