Skip to content

Commit 62b52c8

Browse files
authored
Merge branch 'dev' into ASB-33010_sqlite_tutorial
2 parents fcc990d + 573d438 commit 62b52c8

8 files changed

Lines changed: 114 additions & 58 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
### Deprecated
1111

1212
### Fixed
13+
[PR # 34](https://github.com/spacetelescope/mast_contributor_tools/pull/34)
14+
- Addressing bug where file names with more than 9 fields were failing silently and not added to the results file.
1315

1416
### Removed
1517

docs/filename_check_readme.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,11 @@ The results are organized by field, and the fields must appear in a particular o
134134

135135
See the HLSP [File Naming Convention](https://outerspace.stsci.edu/display/MASTDOCS/File+Naming+Convention) for detailed rules. The results of the filename evaluation are stored in an SQLite3 database. Each recognized field is evaluated on the following criteria:
136136

137-
- Capitalization: the filename must be all lower case.
138-
- Character Length: each field has a maximum character length.
139-
- Format: checks overall format and special characters: for example, a period `.` is allowed in the `<version>` field but not in the `<proj-id>`. Certain fields allow hyphen-separated elements. Most fields must begin and end with an ASCII alpha-numeric character.
140-
- Value: In some cases, the contents of each field are validated against known values to the extent possible.
137+
- Capitalization (`capitalization_score`): the filename must be all lower case.
138+
- Character Length (`length_score`): each field has a maximum character length.
139+
- Format (`format_score`): checks overall format and special characters: for example, a period `.` is allowed in the `<version>` field but not in the `<proj-id>`. Certain fields allow hyphen-separated elements. Most fields must begin and end with an ASCII alpha-numeric character.
140+
- Value (`value_score`): In some cases, the contents of each field are validated against known values to the extent possible.
141+
- Field Number (`nfield_score`): The file name must contain 9 fields or fewer (including the file extension), separated by underscores.
141142

142143
The evaluation scores for individual fields and the overall file names are one of `PASS`, `NEEDS REVIEW` or `FAIL`. A verdict of `NEEDS REVIEW` is usually the result of an unrecognized value. This is often necessary and correct, e.g. for new product types or instruments whose data we haven't ingested before. Please consult with MAST staff for review.
143144

mast_contributor_tools/filename_check/fc_app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,14 +203,15 @@ def check_single_filename(file_name: str, hlsp_name: str = "") -> None:
203203
"length_score": "Character length for this field is too long.",
204204
"format_score": "Forbidden characters detected. Value should be alphanumeric with hyphens, although some special characters are allowed in the 'target_name' or 'version' fields.",
205205
"value_score": "Unrecognized value or combination. These are often necessary and good, but require review by MAST staff.",
206+
"nfield_score": "File name contains more than 9 fields; underscores cannot be used within a field.",
206207
}
207208

208209
# Display resuls
209210
for e in elements:
210211
logger_msg = "Individual Field evaluations: \n"
211212
for p, v in e.items():
212213
logger_msg += f" {p}: '{v}' \n"
213-
if (v.lower() in ["needs review", "fail"]) and (p in suggested_solutions.keys()):
214+
if (str(v).lower() in ["needs review", "fail"]) and (p in suggested_solutions.keys()):
214215
# Wrap text to the same indent level
215216
logger_msg += textwrap.fill(
216217
f"\tHINT: {suggested_solutions[p]}",

mast_contributor_tools/filename_check/fc_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ SemanticTypes:
105105
- sr
106106
- stack
107107
- tds
108+
- tp
108109
- tpf
109110
- warp
110111
- wave

mast_contributor_tools/filename_check/fc_db.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,27 @@
2020
file_ref TEXT NOT NULL,
2121
name TEXT NOT NULL,
2222
value TEXT NOT NULL,
23+
nfield INTEGER,
2324
capitalization_score TEXT NOT NULL DEFAULT 'fail' CHECK("capitalization_score" IN ('pass', 'fail')),
2425
length_score TEXT NOT NULL DEFAULT 'fail' CHECK("length_score" IN ('pass', 'fail')),
2526
format_score TEXT NOT NULL DEFAULT 'fail' CHECK("length_score" IN ('pass', 'fail')),
2627
value_score TEXT NOT NULL DEFAULT 'fail' CHECK("value_score" IN ('pass', 'fail', 'needs review')),
28+
nfield_score TEXT NOT NULL DEFAULT 'fail' CHECK("value_score" IN ('pass', 'fail', 'needs review')),
2729
field_verdict TEXT NOT NULL DEFAULT 'FAIL' CHECK("field_verdict" IN ('PASS', 'FAIL', 'NEEDS REVIEW')),
2830
FOREIGN KEY(file_ref) REFERENCES filename_db(filename)
2931
);
3032
"""
3133
PROBLEMS_VIEW = """
3234
CREATE VIEW IF NOT EXISTS potential_problems as
33-
select fn.path, fn.filename, fn.n_elements, fl.name, fl.value, fl.capitalization_score, fl.length_score,
34-
fl.value_score, fl.field_verdict
35+
select fn.path, fn.filename, fn.n_elements, fl.name, fl.value, fl.nfield, fl.capitalization_score, fl.length_score,
36+
fl.format_score, fl.value_score, f.nfield_score, fl.field_verdict
3537
from filename as fn, fields as fl
3638
where fn.filename = fl.file_ref
3739
AND fl.field_verdict != 'PASS';
3840
"""
3941

4042
INSERT_FILE_RECORD = """INSERT INTO filename VALUES(:path,:filename,:final_verdict,:n_elements)"""
41-
INSERT_FIELD_RECORD = """INSERT INTO fields VALUES(:file_ref,:name,:value,:capitalization_score,:length_score,:format_score,:value_score,:field_verdict)"""
43+
INSERT_FIELD_RECORD = """INSERT INTO fields VALUES(:file_ref,:name,:value,:nfield,:capitalization_score,:length_score,:format_score,:value_score,:nfield_score,:field_verdict)"""
4244

4345

4446
class Hlsp_SQLiteDb:

mast_contributor_tools/filename_check/hlsp_filename.py

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
import yaml
99

10+
from mast_contributor_tools.utils.logger_config import setup_logger
11+
12+
logger = setup_logger(__name__)
13+
1014
# ==========================================
1115
# Setup some configurations for this module
1216
# ==========================================
@@ -111,6 +115,11 @@ def capitalization(value: str) -> str:
111115
Returns 'pass' or 'fail' based on results."""
112116
return SCORE[value.islower()]
113117

118+
def nfields(field_index: int) -> str:
119+
"""Tests that the field index is less than 9;
120+
Returns 'pass' or 'fail' based on results."""
121+
return SCORE[field_index < 10]
122+
114123
def match_pattern(value: str, regex_expr: re.Pattern) -> str:
115124
"""Test that the field contains no forbidden characters.
116125
Returns 'pass' or 'fail' based on results."""
@@ -168,10 +177,11 @@ class FilenameFieldAB(ABC):
168177
Value of the field (i.e. text of the field in the filename)
169178
"""
170179

171-
def __init__(self, field_name: str, field_value: str) -> None:
180+
def __init__(self, field_name: str, field_value: str, field_indx: int) -> None:
172181
self.name = field_name
173182
self.value = field_value
174183
self.max_len = fieldLengthPolicy[field_name]
184+
self.field_indx = field_indx + 1 # index from 1 instead of 0
175185

176186
# Set regex pattern based on field name
177187
if self.name == "hlsp_name":
@@ -193,6 +203,8 @@ def __init__(self, field_name: str, field_value: str) -> None:
193203
self.format_eval = False
194204
# Value Evaluation (recognized entries for telescope, filter, etc.)
195205
self.value_eval = False
206+
# Field number index evaluation (must be less than 9)
207+
self.nfield_eval = False
196208
# Final Verdict
197209
self.field_verdict = "fail"
198210

@@ -202,30 +214,34 @@ def evaluate(self):
202214
self.cap_eval = FieldRule.capitalization(self.value)
203215
self.len_eval = FieldRule.length(self.value, self.max_len)
204216
self.format_eval = FieldRule.match_pattern(self.value, self.regex_pattern)
217+
self.nfield_eval = FieldRule.nfields(self.field_indx)
205218

206219
def get_scores(self):
207220
"""Return final scores"""
208221
# Determine the final verdict as the worst of the four scores
209-
all_scores = [self.cap_eval, self.len_eval, self.format_eval, self.value_eval]
222+
all_scores = [self.cap_eval, self.len_eval, self.format_eval, self.value_eval, self.nfield_eval]
210223
self.field_verdict = FieldRule.field_verdict(all_scores)
211224
return {
212225
# Name of Field: for example 'mission' or 'product_type'
213226
"name": self.name,
214227
# value of the field: for example 'jwst' or 'spec'
215228
"value": self.value,
229+
# Index of the field: location in file name
230+
"nfield": self.field_indx,
216231
# Results from each validation check
217232
"capitalization_score": self.cap_eval,
218233
"length_score": self.len_eval,
219234
"format_score": self.format_eval,
220235
"value_score": self.value_eval,
236+
"nfield_score": self.nfield_eval,
221237
# Final Score
222238
"field_verdict": self.field_verdict,
223239
}
224240

225241

226242
class ExtensionField(FilenameFieldAB):
227-
def __init__(self, value: str) -> None:
228-
super().__init__("extension", value)
243+
def __init__(self, value: str, field_indx: int = 8) -> None:
244+
super().__init__("extension", value, field_indx)
229245

230246
def evaluate(self):
231247
super().evaluate()
@@ -235,8 +251,8 @@ def evaluate(self):
235251
class FilterField(FilenameFieldAB):
236252
"""A container for attributes of the filename Filtername field."""
237253

238-
def __init__(self, value: str) -> None:
239-
super().__init__("filter", value)
254+
def __init__(self, value: str, field_indx: int = 5) -> None:
255+
super().__init__("filter", value, field_indx)
240256

241257
def evaluate(self):
242258
super().evaluate()
@@ -246,8 +262,8 @@ def evaluate(self):
246262
class HlspField(FilenameFieldAB):
247263
"""A container for attributes of the literal 'hlsp' prefix field."""
248264

249-
def __init__(self, value: str) -> None:
250-
super().__init__("hlsp_str", value)
265+
def __init__(self, value: str, field_indx: int = 0) -> None:
266+
super().__init__("hlsp_str", value, field_indx)
251267

252268
def evaluate(self):
253269
super().evaluate()
@@ -257,8 +273,8 @@ def evaluate(self):
257273
class HlspNameField(FilenameFieldAB):
258274
"""A container for attributes of the HLSP name field."""
259275

260-
def __init__(self, value: str, ref_name: str) -> None:
261-
super().__init__("hlsp_name", value)
276+
def __init__(self, value: str, ref_name: str, field_indx: int = 1) -> None:
277+
super().__init__("hlsp_name", value, field_indx)
262278
self.hlsp_ref_name = ref_name.lower()
263279

264280
def evaluate(self):
@@ -270,8 +286,8 @@ def evaluate(self):
270286
class InstrumentField(FilenameFieldAB):
271287
"""A container for attributes of the filename Instrument field."""
272288

273-
def __init__(self, value: str) -> None:
274-
super().__init__("instrument", value)
289+
def __init__(self, value: str, field_indx: int = 3) -> None:
290+
super().__init__("instrument", value, field_indx)
275291

276292
def evaluate(self):
277293
super().evaluate()
@@ -281,8 +297,8 @@ def evaluate(self):
281297
class MissionField(FilenameFieldAB):
282298
"""A container for attributes of the filename Mission (or observatory) field."""
283299

284-
def __init__(self, value: str) -> None:
285-
super().__init__("mission", value)
300+
def __init__(self, value: str, field_indx: int = 2) -> None:
301+
super().__init__("mission", value, field_indx)
286302

287303
def evaluate(self):
288304
super().evaluate()
@@ -292,8 +308,8 @@ def evaluate(self):
292308
class ProductField(FilenameFieldAB):
293309
"""A container for attributes of the filename ProductType field."""
294310

295-
def __init__(self, value: str) -> None:
296-
super().__init__("product_type", value)
311+
def __init__(self, value: str, field_indx: int = 7) -> None:
312+
super().__init__("product_type", value, field_indx)
297313

298314
def evaluate(self):
299315
super().evaluate()
@@ -303,8 +319,8 @@ def evaluate(self):
303319
class TargetField(FilenameFieldAB):
304320
"""A container for attributes of the filename TargetName field."""
305321

306-
def __init__(self, value: str) -> None:
307-
super().__init__("target_name", value)
322+
def __init__(self, value: str, field_indx: int = 4) -> None:
323+
super().__init__("target_name", value, field_indx)
308324

309325
def evaluate(self):
310326
super().evaluate()
@@ -317,8 +333,8 @@ def evaluate(self):
317333
class VersionField(FilenameFieldAB):
318334
"""A container for attributes of the filename Version field."""
319335

320-
def __init__(self, value: str) -> None:
321-
super().__init__("version_id", value)
336+
def __init__(self, value: str, field_indx: int = 6) -> None:
337+
super().__init__("version_id", value, field_indx)
322338

323339
def evaluate(self):
324340
super().evaluate()
@@ -333,10 +349,10 @@ class GenericField(FilenameFieldAB):
333349
validated for length and capitalization, but not for value.
334350
"""
335351

336-
def __init__(self, value: str, id: int) -> None:
337-
super().__init__("generic" + str(id), value)
352+
def __init__(self, value: str, id: int, field_indx: int) -> None:
353+
super().__init__("generic" + str(id), value, field_indx)
338354

339-
def evaluate(self):
355+
def evaluate(self) -> None:
340356
super().evaluate()
341357
# No restriction on generic field values
342358
self.value_eval = "pass"
@@ -399,34 +415,52 @@ def partition(self) -> None:
399415
if self.nFields < 4:
400416
raise ValueError(f"Filename {self.name} has less than 4 fields")
401417
elif self.nFields > 9:
402-
raise ValueError(f"Filename {self.name} has more than 9 fields")
418+
# Don't raise a ValueError here: the individual fields can still be checked
419+
# but filename will be added to the results as a FAIL
420+
logger.error(
421+
(
422+
f"Filename '{self.name}' contains more than 9 fields (total {self.nFields})."
423+
"Individual fields will still be evaulated, "
424+
"but the final verdict will be 'FAIL'"
425+
)
426+
)
403427

404428
def create_fields(self) -> None:
405429
"""Create Field objects for each field in the filename."""
406430
nf = self.nFields
407431
# The first two fields are: 'hlsp' and the acronnym of the collection
408-
self.fields.append(HlspField(self.fieldvals[0]))
409-
self.fields.append(HlspNameField(self.fieldvals[1], self.hlspName))
432+
self.fields.append(HlspField(self.fieldvals[0], 0))
433+
self.fields.append(HlspNameField(self.fieldvals[1], self.hlspName, 1))
410434

411435
# If there are 9 fields, assume the rest of the fields are present in order
412436
if nf == 9:
413-
self.fields.append(MissionField(self.fieldvals[2]))
414-
self.fields.append(InstrumentField(self.fieldvals[3]))
415-
self.fields.append(TargetField(self.fieldvals[4]))
416-
self.fields.append(FilterField(self.fieldvals[5]))
437+
self.fields.append(MissionField(self.fieldvals[2], 2))
438+
self.fields.append(InstrumentField(self.fieldvals[3], 3))
439+
self.fields.append(TargetField(self.fieldvals[4], 4))
440+
self.fields.append(FilterField(self.fieldvals[5], 5))
417441

418442
# If there are 5 < nFields < 9, the other fields are treated as generic
419443
elif 5 < nf < 9:
420444
for i in range(2, nf - 3):
421-
self.fields.append(GenericField(self.fieldvals[i], i - 1))
445+
self.fields.append(GenericField(self.fieldvals[i], i - 1, i))
446+
447+
# If there are more than 9 fields, treat the extra fields as generic
448+
# The check will fail at the filename level, but the fields can still be tested
449+
elif nf > 9:
450+
self.fields.append(MissionField(self.fieldvals[2], 2))
451+
self.fields.append(InstrumentField(self.fieldvals[3], 3))
452+
self.fields.append(TargetField(self.fieldvals[4], 4))
453+
self.fields.append(FilterField(self.fieldvals[5], 5))
454+
for i in range(6, nf - 3):
455+
self.fields.append(GenericField(self.fieldvals[i], i - 5, i))
422456

423457
# Files should have a version field unless the product_type is readme
424458
if self.fieldvals[nf - 2].lower() not in ["readme"]:
425-
self.fields.append(VersionField(self.fieldvals[nf - 3]))
459+
self.fields.append(VersionField(self.fieldvals[nf - 3], nf - 3))
426460

427461
# The last two fields are: the file semantic type and the extension
428-
self.fields.append(ProductField(self.fieldvals[nf - 2]))
429-
self.fields.append(ExtensionField(self.fieldvals[nf - 1]))
462+
self.fields.append(ProductField(self.fieldvals[nf - 2], nf - 2))
463+
self.fields.append(ExtensionField(self.fieldvals[nf - 1], nf - 1))
430464

431465
def evaluate_fields(self):
432466
"""Evaluate attributes of each field
@@ -451,13 +485,22 @@ def evaluate_filename(self):
451485
dict[str, Any]
452486
Dictionary of file name attributes
453487
"""
488+
# The final verdict is determined as the worst of the individual field verdicts
454489
field_verdicts = [f.field_verdict for f in self.fields]
455490
if "FAIL" in field_verdicts:
456491
final_verdict = "fail"
457492
elif "NEEDS REVIEW" in field_verdicts:
458493
final_verdict = "needs review"
459494
else:
460495
final_verdict = "pass"
496+
497+
# Additional last-minute checks based on the number of fields
498+
if self.nFields > 9: # more than 9 fields
499+
final_verdict = "fail"
500+
elif self.nFields < 5: # less than 5 fields
501+
final_verdict = "fail"
502+
503+
# Final result for this filename
461504
attr = {
462505
"path": self.path,
463506
"filename": self.name,

0 commit comments

Comments
 (0)