Skip to content

Commit b1a172d

Browse files
authored
Merge pull request #6 from WISPR-lab/extended-manifests
Extended manifests
2 parents 5e134f3 + 5e1a102 commit b1a172d

23 files changed

Lines changed: 1301 additions & 39 deletions

manifests/google.yaml

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
manifest_version: 2
2+
id: google
3+
ecs_version: "9.4.0-dev"
4+
target: "Google"
5+
last_updated: "2025-03-20"
6+
7+
files:
8+
# Security & Account Data
9+
- id: "ggl_subscriber_info"
10+
path: "Google Account/bob.researcher24.SubscriberInfo.html"
11+
parser:
12+
format: "html_table"
13+
14+
15+
- id: "ggl_change_history"
16+
path: "Google Account/bob.researcher24.ChangeHistory.html"
17+
parser:
18+
format: "html_table"
19+
20+
# - id: "ggl_search_activity"
21+
# path: "My Activity/Search/MyActivity.html"
22+
# parser:
23+
# format: "html"
24+
# - id: "ggl_gmail_activity"
25+
# path: "My Activity/Gmail/MyActivity.html"
26+
# parser:
27+
# format: "html"
28+
29+
# - id: "ggl_drive_activity"
30+
# path: "My Activity/Drive/MyActivity.html"
31+
# parser:
32+
# format: "html"
33+
34+
# - id: "ggl_discover_activity"
35+
# path: "My Activity/Discover/MyActivity.html"
36+
# parser:
37+
# format: "html"
38+
39+
- id: "ggl_takeout_activity"
40+
path: "My Activity/Takeout/MyActivity.html"
41+
parser:
42+
format: "html_ggl_myactivity"
43+
44+
45+
# - id: "ggl_access_log_activity"
46+
# path: "Access Log Activity/Activities - A list of Google services*"
47+
# parser:
48+
# format: "csv"
49+
# parse_dates: ["Last Activity Time"]
50+
51+
52+
# Devices & Sessions
53+
- id: "ggl_access_log_devices"
54+
path: "Access Log Activity/Devices - A list of devices (i.e. Nest, Pixel, iPh.csv"
55+
parser:
56+
format: "csv"
57+
parse_dates: ["Last Activity Time"]
58+
59+
# - id: "ggl_timeline_settings"
60+
# path: "Timeline/Settings.json"
61+
# parser:
62+
# format: "json"
63+
# json_root: "deviceSettings[]"
64+
65+
# - id: "ggl_profile"
66+
# path: "Profile/Profile.json"
67+
# parser:
68+
# format: "json"
69+
70+
views:
71+
72+
# -------------------------- ACCOUNT PROFILE --------------------------
73+
# - file:
74+
# id: "ggl_subscriber_info"
75+
# static:
76+
# event.kind: "asset"
77+
# entity.type: "user_profile"
78+
# fields:
79+
# - {target: "user.full_name", source: "name", type: "string"}
80+
# - {target: "user.email", source: "email", type: "string"}
81+
# - {target: "user.profile.created_date", source: "created_on", type: "datetime"}
82+
# - {target: "user.profile.status", source: "status", type: "string"}
83+
84+
# ------------------------ AUTH EVENTS ------------------------
85+
86+
- file:
87+
id: "ggl_subscriber_info"
88+
where:
89+
{source: "Activity Type", op: "==", value: "Login"}
90+
static:
91+
event.action: "user_login_success"
92+
event.kind: "event"
93+
event.category: ["authentication", "session"]
94+
event.type: ["start"]
95+
event.outcome: "success"
96+
fields:
97+
- {target: "@timestamp", source: "Timestamp", type: "datetime"}
98+
- {target: "client.ip", source: "IP Address", type: "string"}
99+
- {target: "client.geo.name", source: "Geo", type: "string"}
100+
- {target: "user_agent.original", source: "Raw User Agents", type: "string"}
101+
102+
# ------------------------ PASSWORD/EMAIL EVENTS ------------------------
103+
- file:
104+
id: "ggl_change_history"
105+
where:
106+
{source: "Change Type", op: "==", value: "PASSWORD"}
107+
static:
108+
event.action: "user_password_change"
109+
event.kind: "event"
110+
event.category: ["configuration"]
111+
event.type: ["change"]
112+
fields:
113+
- {target: "@timestamp", source: "Timestamp", type: "datetime"}
114+
- {target: "client.ip", source: "IP Address", type: "string"}
115+
116+
- file:
117+
id: "ggl_change_history"
118+
where:
119+
logic: "any"
120+
conditions:
121+
- {source: "Change Type", op: "==", value: "SECONDARY_EMAIL_ADDED"}
122+
static:
123+
event.action: "email_addition"
124+
event.kind: "event"
125+
event.category: ["configuration"]
126+
event.type: ["change"]
127+
fields:
128+
- {target: "@timestamp", source: "Timestamp", type: "datetime"}
129+
- {target: "client.ip", source: "IP Address", type: "string"}
130+
- {target: "user.email.old", source: "Old Value", type: "string"}
131+
- {target: "user.email.new", source: "New Value", type: "string"}
132+
133+
134+
# ------------------------ ACCESS ACTIVITY ------------------------
135+
136+
137+
# ------------------------ SEARCH ACTIVITY ------------------------
138+
# - file:
139+
# id: "ggl_search_activity"
140+
# static:
141+
# event.action: "web_search"
142+
# event.kind: "event"
143+
# event.category: ["web"]
144+
# event.type: ["access"]
145+
# fields: "" # TODO parser
146+
147+
# # ------------------------ GMAIL ACTIVITY ------------------------
148+
# - file:
149+
# id: "ggl_gmail_activity"
150+
# static:
151+
# event.action: "email_send"
152+
# event.kind: "event"
153+
# event.category: ["email"]
154+
# event.type: ["info"]
155+
# fields:
156+
# - {target: "@timestamp", source: "timestamp", type: "datetime"}
157+
# - {target: "email.sender", source: "from", type: "string"}
158+
# - {target: "email.to", source: "to", type: "string"}
159+
# - {target: "email.subject", source: "subject", type: "string"}
160+
161+
# # ------------------------ DRIVE ACTIVITY ------------------------
162+
# - file:
163+
# id: "ggl_drive_activity"
164+
# static:
165+
# event.action: "cloud_storage_access"
166+
# event.kind: "event"
167+
# event.category: ["web"]
168+
# event.type: ["access"]
169+
# fields:
170+
# - {target: "@timestamp", source: "timestamp", type: "datetime"}
171+
# - {target: "file.name", source: "file_name", type: "string"}
172+
# - {target: "event.action_type", source: "activity_type", type: "string"}
173+
174+
# ------------------------ AUTH DEVIES ------------------------
175+
- file:
176+
id: "ggl_access_log_devices"
177+
static:
178+
event.kind: "asset"
179+
event.category: ["authentication", "session"]
180+
event.type: ["info"]
181+
entity.type: "authenticated_device"
182+
fields:
183+
# - {target: "entity.last_seen_timestamp", source: "Last Activity Time", type: "datetime"} # TODO
184+
- {target: "device.manufacturer", source: "Brand Name", type: "string"}
185+
- {target: "device.model.name", source: "Marketing Name", type: "string"}
186+
- {target: "device.model.identifier", source: "Device Model", type: "string"}
187+
- {target: "user_agent.os.name", source: "OS", type: "string"}
188+
- {target: "user_agent.os.version", source: "OS Version", type: "string"}
189+
190+
# - file:
191+
# id: "ggl_timeline_settings"
192+
# static:
193+
# event.kind: "asset"
194+
# event.category: ["authentication", "session"]
195+
# event.type: ["info"]
196+
# entity.type: "authenticated_device"
197+
# fields:
198+
# - {target: "entity.first_seen_timestamp", source: "deviceCreationTime", type: "datetime"}
199+
# - {target: "device.model.identifier", source: "device", type: "string"}
200+
# - {target: "user_agent.os.name", source: "platformType", type: "string"}
201+
# - {target: "user_agent.os.version", source: "iosVersion", type: "string"}
202+
# - {target: "device.name", source: "devicePrettyName", type: "string"}
203+
# - {target: "device.id.google", source: "deviceTag", type: "string"}
204+
205+
# ------------------------ USER PROFILE ------------------------
206+
# - file:
207+
# id: "ggl_profile"
208+
# static:
209+
# event.kind: "asset"
210+
# entity.type: "user_profile"
211+
# fields:
212+
# - {target: "user.full_name", source: "displayName", type: "string"}
213+
# - {target: "user.first_name", source: "name.givenName", type: "string"}
214+
# - {target: "user.email", source: "emails[0].value", type: "string"}
215+
# - {target: "user.profile.birthday", source: "birthday", type: "date"}
216+
# - {target: "user.profile.gender", source: "gender.type", type: "string"}
217+
218+
# # ------------------------ SECURITY ALERTS ------------------------
219+
# - file:
220+
# id: "ggl_alerts"
221+
# static:
222+
# event.kind: "event"
223+
# event.category: ["security"]
224+
# event.type: ["info"]
225+
# fields:
226+
# - {target: "@timestamp", source: "timestamp", type: "datetime"}
227+
# - {target: "event.reason", source: "alert_type", type: "string"}
228+
# - {target: "message", source: "alert_message", type: "string"}
229+

python_core/extractors/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,18 @@
33
from .csv_ import CSVParser
44
from .csv_multi import CSVMultiParser
55
from .json_label_values import JSONLabelValuesParser
6+
from .html_table import HTMLTableParser
7+
from .html_ggl_myactivity import HTMLMyActvityParser
68

79
REGISTRY = {
810
'json': JSONParser,
911
'jsonl': JSONLParser,
1012
'csv': CSVParser,
1113
'csv_multi': CSVMultiParser,
1214
'json_label_values': JSONLabelValuesParser,
15+
'html': HTMLTableParser,
16+
'html_table': HTMLTableParser,
17+
'html_ggl_myactivity': HTMLMyActvityParser,
1318
}
1419

1520
def get_parser(fmt: str):

python_core/extractors/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def _is_trivial(cls, x):
1616

1717

1818
@abstractmethod
19-
def extract(self, content: str, config: Optional[Dict] = None) -> List[Dict[str, Any]]:
19+
def extract(self, content: str, config: Optional[Dict] = None, filepath: str = None) -> List[Dict[str, Any]]:
2020
"""
2121
Parses content into a flat list of dicts.
2222
config: The 'parser' section from the YAML manifest (e.g., {'json_root': '...'})

python_core/extractors/csv_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
class CSVParser(BaseParser):
99

1010
@classmethod
11-
def extract(cls, content: str, config: Optional[Dict] = None) -> List[Dict[str, Any]]:
11+
def extract(cls, content: str, config: Optional[Dict] = None, filepath: str = None) -> List[Dict[str, Any]]:
1212
config = config or {}
1313
if not content or not content.strip():
1414
raise FileLevelError("Empty CSV input")

python_core/extractors/csv_multi.py

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
import pandas as pd
2+
import io
3+
import csv
4+
import re
5+
from typing import List, Dict, Any, Optional
16
from .base import BaseParser
27
from python_core.errors import FileLevelError
3-
import re
48

59

610

@@ -15,14 +19,43 @@
1519
class CSVMultiParser(BaseParser):
1620

1721
@classmethod
18-
def parse(cls, s: str, filename: str, cfg, default="", **kwargs):
19-
"""Placeholder: CSV multi-section parser not yet implemented"""
20-
result = ParseResult()
21-
result.add_error(FileLevelError(
22-
"CSV multi-section parser not yet implemented",
23-
context={'filename': filename}
24-
))
25-
return result
22+
def extract(cls, content: str, config: Optional[Dict] = None, filepath: str = None) -> List[Dict[str, Any]]:
23+
config = config or {}
24+
if not content or not content.strip():
25+
raise FileLevelError("Empty CSV input")
26+
try:
27+
28+
if cls._is_concatenated(content, filepath):
29+
30+
segments = re.split("\n\n\n", content)
31+
header_records_map = {}
32+
for segment in segments:
33+
lines = [line.strip() for line in segment.split("\n") if len(line.strip()) > 0]
34+
if len(lines) >= 2:
35+
header = lines[0]
36+
csvstring = "\n".join(lines[1:])
37+
df, bad_lines = cls.str_to_df(csvstring)[0]
38+
if df.empty:
39+
header_records_map[header] = []
40+
continue
41+
content = df.fillna('').to_dict(orient='records')
42+
header_records_map[header] = content
43+
return header_records_map
44+
45+
else: # if not concatenated, parse as a single CSV
46+
print("[CSVMultiParser] No concatenated sections detected. Parsing as single CSV.")
47+
df, bad_lines = cls.str_to_df(content)
48+
if df.empty:
49+
return []
50+
return df.to_dict(orient='records')
51+
52+
# TODO deal with error handling
53+
except FileLevelError:
54+
raise
55+
except Exception as e:
56+
raise FileLevelError(f"CSV extraction failed: {e}", context={'error_type': type(e).__name__})
57+
58+
2659

2760
@classmethod
2861
def _is_concatenated(cls, s: str, path: str):
@@ -31,10 +64,8 @@ def _is_concatenated(cls, s: str, path: str):
3164
match = pattern.search(s)
3265
if match:
3366
return True
34-
if path is not None:
35-
if "iCloudUsageData" in path:
67+
if path is not None and isinstance(path, str):
68+
if "iCloudUsageData" in path: # match did not pick up on file, but likely has concatenated contents
3669
return True
37-
return False
70+
return False
3871

39-
40-
""" TODO: Implement multi-section CSV parsing """

0 commit comments

Comments
 (0)