Skip to content

Commit 3fa2825

Browse files
fhightowerclaude
andauthored
Simplify DEFAULT_IOC_TYPES to common indicators (#340)
* Simplify DEFAULT_IOC_TYPES to common indicators Reduce DEFAULT_IOC_TYPES from 30 entries down to 9 common indicator types (domains, urls, ipv4s, ipv6s, email_addresses, md5s, sha1s, sha256s, cves) to speed parsing in the typical case. Add a new SUPPORTED_IOC_TYPES constant exposing the full list of parseable types so callers can opt back into the obscure types via included_ioc_types. This is a breaking change in the find_iocs() default behavior. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * Improve cli and fix lint errors --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent a5fe54b commit 3fa2825

8 files changed

Lines changed: 65 additions & 16 deletions

File tree

ioc_finder/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/usr/bin/env python3
22

33
from .ioc_finder import (
4+
DEFAULT_IOC_TYPES,
5+
SUPPORTED_IOC_TYPES,
46
find_iocs,
57
parse_asns,
68
parse_authentihashes_,
@@ -37,6 +39,8 @@
3739
)
3840

3941
__all__ = [
42+
"DEFAULT_IOC_TYPES",
43+
"SUPPORTED_IOC_TYPES",
4044
"find_iocs",
4145
"parse_asns",
4246
"parse_authentihashes_",

ioc_finder/ioc_finder.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
# using `Mapping` b/c it is covariant (https://mypy.readthedocs.io/en/stable/generics.html#variance-of-generic-types)
127127
IndicatorData = Mapping[str, IndicatorList | IndicatorDict]
128128

129-
DEFAULT_IOC_TYPES = [
129+
SUPPORTED_IOC_TYPES = [
130130
"asns",
131131
"attack_mitigations",
132132
"attack_tactics",
@@ -159,6 +159,18 @@
159159
"xmpp_addresses",
160160
]
161161

162+
DEFAULT_IOC_TYPES = [
163+
"cves",
164+
"domains",
165+
"email_addresses",
166+
"ipv4s",
167+
"ipv6s",
168+
"md5s",
169+
"sha1s",
170+
"sha256s",
171+
"urls",
172+
]
173+
162174

163175
def _deduplicate(indicator_list: Iterable) -> list:
164176
"""Deduplicate the list of observables."""
@@ -555,6 +567,12 @@ def parse_tlp_labels(text):
555567
help="Using this flag will parse URLs with and without a scheme (default is True)",
556568
default=True,
557569
)
570+
@click.option(
571+
"--all",
572+
"parse_all",
573+
is_flag=True,
574+
help="Parse every supported indicator type instead of the common defaults.",
575+
)
558576
def cli_find_iocs(
559577
text,
560578
no_url_domain_parsing,
@@ -563,6 +581,7 @@ def cli_find_iocs(
563581
no_cidr_address_parsing,
564582
no_xmpp_addr_domain_parsing,
565583
parse_urls_without_scheme,
584+
parse_all,
566585
):
567586
"""CLI interface for parsing observables."""
568587
stdin_text = click.get_text_stream("stdin")
@@ -572,6 +591,7 @@ def cli_find_iocs(
572591
text = "\n".join(stdin_text)
573592
# text = '\n'.join([line for line in stdin_text])
574593

594+
included_ioc_types = list(SUPPORTED_IOC_TYPES if parse_all else DEFAULT_IOC_TYPES)
575595
iocs = find_iocs(
576596
text,
577597
parse_domain_from_url=not no_url_domain_parsing,
@@ -580,6 +600,7 @@ def cli_find_iocs(
580600
parse_address_from_cidr=not no_cidr_address_parsing,
581601
parse_domain_name_from_xmpp_address=not no_xmpp_addr_domain_parsing,
582602
parse_urls_without_scheme=parse_urls_without_scheme,
603+
included_ioc_types=included_ioc_types,
583604
)
584605
ioc_string = json.dumps(iocs, indent=4, sort_keys=True)
585606
print(ioc_string)
@@ -613,9 +634,10 @@ def find_iocs(
613634
addresses. Only applicable when ``"domains"`` is in ``included_ioc_types``.
614635
parse_urls_without_scheme: Whether to parse URLs without a scheme. Only applicable
615636
when ``"urls"`` or ``"urls_complete"`` is in ``included_ioc_types``.
616-
included_ioc_types: Collection of IOC type names to parse. If ``None``, all
617-
default types are parsed. See ``DEFAULT_IOC_TYPES`` for valid values.
618-
When specified, the boolean options above only take effect if their
637+
included_ioc_types: Collection of IOC type names to parse. If ``None``,
638+
the common default types are parsed (see ``DEFAULT_IOC_TYPES``). For
639+
the full list of parseable types, see ``SUPPORTED_IOC_TYPES``. When
640+
specified, the boolean options above only take effect if their
619641
corresponding IOC type is included.
620642
"""
621643
if included_ioc_types is None:

tests/find_iocs_cases/feature__included_ioc_types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from pytest import param
77

8-
from ioc_finder.ioc_finder import DEFAULT_IOC_TYPES
8+
from ioc_finder.ioc_finder import SUPPORTED_IOC_TYPES
99

1010
IOC_EXAMPLES = {
1111
"domains": ["abc.py", "bar.com", "example.com", "foo.com", "swissjabber.de"],
@@ -67,7 +67,7 @@
6767

6868
individual_included_ioc_types_tests = []
6969

70-
for type_ in DEFAULT_IOC_TYPES:
70+
for type_ in SUPPORTED_IOC_TYPES:
7171
individual_included_ioc_types_tests.append(
7272
param(
7373
all_ioc_text,

tests/test_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_cli_without_domain_from_url_parsing():
3131
runner = CliRunner()
3232
result = runner.invoke(
3333
ioc_finder.cli_find_iocs,
34-
["This is just an example.com https://example.org/test/bingo.php", "--no_url_domain_parsing"],
34+
["This is just an example.com https://example.org/test/bingo.php", "--no_url_domain_parsing", "--all"],
3535
)
3636
assert result.exit_code == 0
3737
print(result.output.strip())
@@ -115,7 +115,7 @@ def test_cli_disabling_parsing_urls_without_scheme():
115115

116116
def test_cli_parses_imphashes_by_default():
117117
runner = CliRunner()
118-
result = runner.invoke(ioc_finder.cli_find_iocs, ["imphash 18ddf28a71089acdbab5038f58044c0a"])
118+
result = runner.invoke(ioc_finder.cli_find_iocs, ["imphash 18ddf28a71089acdbab5038f58044c0a", "--all"])
119119
assert result.exit_code == 0
120120
json_results = json.loads(result.output.strip())
121121
assert json_results["imphashes"] == ["18ddf28a71089acdbab5038f58044c0a"]

tests/test_edge_cases.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22

33
import pytest
44

5-
from ioc_finder import find_iocs
6-
from ioc_finder.ioc_finder import DEFAULT_IOC_TYPES
5+
from ioc_finder import find_iocs as _find_iocs
6+
from ioc_finder.ioc_finder import SUPPORTED_IOC_TYPES
7+
8+
9+
def find_iocs(*args, **kwargs):
10+
kwargs.setdefault("included_ioc_types", SUPPORTED_IOC_TYPES)
11+
return _find_iocs(*args, **kwargs)
712

813

914
@pytest.fixture
@@ -490,14 +495,14 @@ def test_google_casing_deduplication():
490495
def test_excluding_imphashes_from_included_ioc_types():
491496
"""Omitting 'imphashes' from included_ioc_types disables imphash parsing."""
492497
s = "imphash 18ddf28a71089acdbab5038f58044c0a"
493-
types_without_imphashes = [t for t in DEFAULT_IOC_TYPES if t != "imphashes"]
498+
types_without_imphashes = [t for t in SUPPORTED_IOC_TYPES if t != "imphashes"]
494499
iocs = find_iocs(s, included_ioc_types=types_without_imphashes)
495500
assert "imphashes" not in iocs
496501

497502

498503
def test_excluding_authentihashes_from_included_ioc_types():
499504
s = "authentihash 3f1b149d07e7e8636636b8b7f7043c40ed64a10b28986181fb046c498432c2d4"
500-
types_without_authentihashes = [t for t in DEFAULT_IOC_TYPES if t != "authentihashes"]
505+
types_without_authentihashes = [t for t in SUPPORTED_IOC_TYPES if t != "authentihashes"]
501506
iocs = find_iocs(s, included_ioc_types=types_without_authentihashes)
502507
assert "authentihashes" not in iocs
503508

tests/test_find_iocs.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,17 @@
22

33
import pytest
44

5-
from ioc_finder import find_iocs
6-
from ioc_finder.ioc_finder import IndicatorDict, IndicatorList
5+
from ioc_finder import find_iocs as _find_iocs
6+
from ioc_finder.ioc_finder import SUPPORTED_IOC_TYPES, IndicatorDict, IndicatorList
77

88
from .find_iocs_cases import ALL_TESTS
99

1010

11+
def find_iocs(*args, **kwargs):
12+
kwargs.setdefault("included_ioc_types", SUPPORTED_IOC_TYPES)
13+
return _find_iocs(*args, **kwargs)
14+
15+
1116
@pytest.mark.parametrize("text, results, args", ALL_TESTS)
1217
def test_find_iocs(text: str, results: Dict, args: Dict) -> None:
1318
# Parse input

tests/test_ioc_finder.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
from ioc_finder import find_iocs
1+
from ioc_finder import find_iocs as _find_iocs
2+
from ioc_finder.ioc_finder import SUPPORTED_IOC_TYPES
3+
4+
5+
def find_iocs(*args, **kwargs):
6+
kwargs.setdefault("included_ioc_types", SUPPORTED_IOC_TYPES)
7+
return _find_iocs(*args, **kwargs)
28

39

410
def test_tlp_labels():

tests/test_urls.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22

33
from d8s_lists import iterables_have_same_items
44

5-
from ioc_finder import find_iocs
5+
from ioc_finder import find_iocs as _find_iocs
6+
from ioc_finder.ioc_finder import SUPPORTED_IOC_TYPES
7+
8+
9+
def find_iocs(*args, **kwargs):
10+
kwargs.setdefault("included_ioc_types", SUPPORTED_IOC_TYPES)
11+
return _find_iocs(*args, **kwargs)
12+
613

714
# VALID_URLS = [
815
# 'http://foo.com/blah_blah',

0 commit comments

Comments
 (0)