Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### New Features

- Add "loop" static characteristic in Extractors

### Breaking Changes

### New Rules (1)
Expand Down
13 changes: 13 additions & 0 deletions capa/features/extractors/binexport2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@
from pefile import PE
from elftools.elf.elffile import ELFFile

from capa.features.address import AbsoluteVirtualAddress
import capa.features.common
import capa.features.extractors.common
import capa.features.extractors.binexport2.helpers
from capa.features.extractors import loops
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -415,6 +417,17 @@ class FunctionContext:
os: set[str]
arch: set[str]

def __post_init__(self):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we haven't used post_init yet in this codebase so i wonder if you could do this in a way that's more consistent.

perhaps via a constructor or from_foo(...) classmethod.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe this could be placed into AnalysisContext (as a dict keyed by vertex)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think put it into AnalysisContext is a good idea for now, as AnalysisContext is more about global data can be repeated used, but this one is only used within a function scope. Depending on the program complexity, the data may not be very small if we keep the all loops information across the entire scan life-span.

Keeping it inside FunctionContext will just let the data in RAM within single function feature generation, and will be GC-ed right after.

Although I feel post_init is kinda standard approach for dataclass, I can also just put the code inside get_functions of BinExport2FeatureExtractor, make it part of the intialiser list.

How about that?

flow_graph = self.ctx.be2.flow_graph[self.flow_graph_index]
edges: list[tuple[int, int]] = []
for edge in flow_graph.edge:
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
looping_indices = loops.get_loop_vertices(edges)
self.ctx.cyclic_loop = {
AbsoluteVirtualAddress(self.ctx.idx.get_basic_block_address(idx_val))
for idx_val in looping_indices
}


@dataclass
class BasicBlockContext:
Expand Down
11 changes: 10 additions & 1 deletion capa/features/extractors/binexport2/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[F
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)


def extract_bb_inside_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
fhi: FunctionContext = fh.inner
bbi: BasicBlockContext = bbh.inner

if bbi.basic_block_index in fhi.looping_vertices:
basic_block_address: int = fhi.ctx.idx.get_basic_block_address(bbi.basic_block_index)
yield Characteristic("inside loop"), AbsoluteVirtualAddress(basic_block_address)


def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract basic block features"""
for bb_handler in BASIC_BLOCK_HANDLERS:
Expand All @@ -44,4 +53,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
yield BasicBlock(), bbh.address


BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop, extract_bb_inside_loop)
18 changes: 1 addition & 17 deletions capa/features/extractors/binexport2/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,6 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Add
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)


def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
fhi: FunctionContext = fh.inner

be2: BinExport2 = fhi.ctx.be2

flow_graph_index: int = fhi.flow_graph_index
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]

edges: list[tuple[int, int]] = []
for edge in flow_graph.edge:
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))

if loops.has_loop(edges):
yield Characteristic("loop"), fh.address


def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
fhi: FunctionContext = fh.inner

Expand All @@ -76,4 +60,4 @@ def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
yield feature, addr


FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_function_name)
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_name)
8 changes: 8 additions & 0 deletions capa/features/extractors/binexport2/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,13 @@ def extract_function_indirect_call_characteristic_features(
)


def extract_insn_loop(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
"""extract loop characteristic feature at the instruction scope if inside a cycle"""
fhi: FunctionContext = fh.inner
if "cyclic_loop" in fhi.ctx and bbh.address in fhi.ctx["cyclic_loop"]:
yield Characteristic("loop"), ih.address


def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
"""extract instruction features"""
for inst_handler in INSTRUCTION_HANDLERS:
Expand All @@ -258,4 +265,5 @@ def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iter
extract_insn_mnemonic_features,
extract_function_calls_from,
extract_function_indirect_call_characteristic_features,
extract_insn_loop,
)
23 changes: 23 additions & 0 deletions capa/features/extractors/loops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,26 @@ def has_loop(edges, threshold=2):
g = networkx.DiGraph()
g.add_edges_from(edges)
return any(len(comp) >= threshold for comp in strongly_connected_components(g))


def get_loop_vertices(edges, threshold=2):
"""find vertices that are part of a cycle in a directed graph

args:
edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)]
threshold: min number of nodes contained in loop

returns:
set of vertex IDs
"""
g = networkx.DiGraph()
g.add_edges_from(edges)
loop_vertices = set()
for comp in strongly_connected_components(g):
if len(comp) >= threshold:
loop_vertices.update(comp)
# Also include any vertices with self-loops (for tight loops)
for u, v in edges:
if u == v:
loop_vertices.add(u)
return loop_vertices
2 changes: 1 addition & 1 deletion capa/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ def from_dict(cls, scopes: dict[str, str]) -> "Scopes":
capa.features.basicblock.BasicBlock,
capa.features.common.Characteristic("calls from"),
capa.features.common.Characteristic("calls to"),
capa.features.common.Characteristic("loop"),
capa.features.common.Characteristic("recursive call"),
# plus basic block scope features, see below
},
Expand All @@ -235,6 +234,7 @@ def from_dict(cls, scopes: dict[str, str]) -> "Scopes":
capa.features.insn.Mnemonic,
capa.features.insn.OperandNumber,
capa.features.insn.OperandOffset,
capa.features.common.Characteristic("loop"),
capa.features.common.Characteristic("nzxor"),
capa.features.common.Characteristic("peb access"),
capa.features.common.Characteristic("fs access"),
Expand Down
Loading