Skip to content

Commit f1fa2e5

Browse files
better keyboard escape
1 parent a8a0302 commit f1fa2e5

5 files changed

Lines changed: 327 additions & 259 deletions

File tree

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,17 @@ Extract MS2 format files (compatible with MS-GF+, Comet, etc.):
1919
ms2-extractor /path/to/sample.d
2020

2121
# shorthand
22-
ms2-ex /path/to/sample.d
22+
ms2-ex
2323
ms2-ex /path/to/sample.d --output custom_output.ms2 --min-intensity 100 --min-charge 2
2424
ms2-ex /path/to/directory_with_multiple_d_folders --output /path/to/output_directory
2525
```
2626

2727
### MGF Extraction
28-
Extract MGF format files (compatible with Mascot, MaxQuant, etc.):
28+
Extract MGF format files
2929

3030
```bash
3131
mgf-extractor /path/to/sample.d
32+
3233
#shorthand
3334
mgf-ex
3435
mgf-ex /path/to/sample.d --casanovo # Optimized for Casanovo de novo sequencing

src/tdfextractor/cli_args.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ def add_common_args(parser: argparse.ArgumentParser) -> None:
171171
help="Number of decimal places for intensity values (default: 0)",
172172
)
173173

174+
parser.add_argument(
175+
"--keep-empty-spectra",
176+
action="store_true",
177+
help="Keep spectra with no peaks (default: False)",
178+
)
179+
174180
parser.add_argument(
175181
"-v", "--verbose", action="store_true", help="Enable verbose logging"
176182
)
@@ -259,59 +265,60 @@ def log_common_args(logger, args: argparse.Namespace, extractor_type: str) -> No
259265
logger.info(f" Precursor Peak Width: {args.precursor_peak_width} Da")
260266
logger.info(f" Batch Size: {args.batch_size}")
261267
logger.info(
262-
f" Top N Peaks: {args.top_n_peaks if args.top_n_peaks else 'All'}"
268+
f" Top N Peaks: {args.top_n_peaks if args.top_n_peaks is not None else 'All'}"
263269
)
264270
logger.info(
265-
f" Min Spectra Intensity: {args.min_spectra_intensity if args.min_spectra_intensity else 'None'}"
271+
f" Min Spectra Intensity: {args.min_spectra_intensity if args.min_spectra_intensity is not None else 'None'}"
266272
)
267273
logger.info(
268-
f" Max Spectra Intensity: {args.max_spectra_intensity if args.max_spectra_intensity else 'None'}"
274+
f" Max Spectra Intensity: {args.max_spectra_intensity if args.max_spectra_intensity is not None else 'None'}"
269275
)
270276
logger.info(
271-
f" Min Spectra m/z: {args.min_spectra_mz if args.min_spectra_mz else 'None'}"
277+
f" Min Spectra m/z: {args.min_spectra_mz if args.min_spectra_mz is not None else 'None'}"
272278
)
273279
logger.info(
274-
f" Max Spectra m/z: {args.max_spectra_mz if args.max_spectra_mz else 'None'}"
280+
f" Max Spectra m/z: {args.max_spectra_mz if args.max_spectra_mz is not None else 'None'}"
275281
)
276282
logger.info(
277-
f" Min Precursor Intensity: {args.min_precursor_intensity if args.min_precursor_intensity else 'None'}"
283+
f" Min Precursor Intensity: {args.min_precursor_intensity if args.min_precursor_intensity is not None else 'None'}"
278284
)
279285
logger.info(
280-
f" Max Precursor Intensity: {args.max_precursor_intensity if args.max_precursor_intensity else 'None'}"
286+
f" Max Precursor Intensity: {args.max_precursor_intensity if args.max_precursor_intensity is not None else 'None'}"
281287
)
282288
logger.info(
283-
f" Min Precursor Charge: {args.min_precursor_charge if args.min_precursor_charge else 'None'}"
289+
f" Min Precursor Charge: {args.min_precursor_charge if args.min_precursor_charge is not None else 'None'}"
284290
)
285291
logger.info(
286-
f" Max Precursor Charge: {args.max_precursor_charge if args.max_precursor_charge else 'None'}"
292+
f" Max Precursor Charge: {args.max_precursor_charge if args.max_precursor_charge is not None else 'None'}"
287293
)
288294
logger.info(
289-
f" Min Precursor m/z: {args.min_precursor_mz if args.min_precursor_mz else 'None'}"
295+
f" Min Precursor m/z: {args.min_precursor_mz if args.min_precursor_mz is not None else 'None'}"
290296
)
291297
logger.info(
292-
f" Max Precursor m/z: {args.max_precursor_mz if args.max_precursor_mz else 'None'}"
298+
f" Max Precursor m/z: {args.max_precursor_mz if args.max_precursor_mz is not None else 'None'}"
293299
)
294300
logger.info(
295-
f" Min Precursor RT: {args.min_precursor_rt if args.min_precursor_rt else 'None'} seconds"
301+
f" Min Precursor RT: {args.min_precursor_rt if args.min_precursor_rt is not None else 'None'} seconds"
296302
)
297303
logger.info(
298-
f" Max Precursor RT: {args.max_precursor_rt if args.max_precursor_rt else 'None'} seconds"
304+
f" Max Precursor RT: {args.max_precursor_rt if args.max_precursor_rt is not None else 'None'} seconds"
299305
)
300306
logger.info(
301-
f" Min Precursor CCS: {args.min_precursor_ccs if args.min_precursor_ccs else 'None'}"
307+
f" Min Precursor CCS: {args.min_precursor_ccs if args.min_precursor_ccs is not None else 'None'}"
302308
)
303309
logger.info(
304-
f" Max Precursor CCS: {args.max_precursor_ccs if args.max_precursor_ccs else 'None'}"
310+
f" Max Precursor CCS: {args.max_precursor_ccs if args.max_precursor_ccs is not None else 'None'}"
305311
)
306312
logger.info(
307-
f" Min Precursor Neutral Mass: {args.min_precursor_neutral_mass if args.min_precursor_neutral_mass else 'None'}"
313+
f" Min Precursor Neutral Mass: {args.min_precursor_neutral_mass if args.min_precursor_neutral_mass is not None else 'None'}"
308314
)
309315
logger.info(
310-
f" Max Precursor Neutral Mass: {args.max_precursor_neutral_mass if args.max_precursor_neutral_mass else 'None'}"
316+
f" Max Precursor Neutral Mass: {args.max_precursor_neutral_mass if args.max_precursor_neutral_mass is not None else 'None'}"
311317
)
312318
logger.info(f" Overwrite Existing Output: {args.overwrite}")
313319
logger.info(f" m/z Precision: {args.mz_precision} decimal places")
314320
logger.info(f" Intensity Precision: {args.intensity_precision} decimal places")
321+
logger.info(f" Keep Empty Spectra: {args.keep_empty_spectra}")
315322
logger.info(f" Verbose Logging: {args.verbose}")
316323

317324
# Log preset-specific settings

src/tdfextractor/mgf_exctractor.py

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import logging
66
import os
7+
import signal
8+
import sys
79
import time
810
import threading
911
import queue
@@ -12,12 +14,11 @@
1214

1315
from tqdm import tqdm
1416

15-
from .utils import get_ms2_dda_content
17+
from .utils import get_ms2_dda_content, get_tdf_df
1618
from .cli_args import create_mgf_parser, apply_preset_settings, log_common_args
1719

1820
logger = logging.getLogger(__name__)
1921

20-
2122
def write_mgf_file(
2223
analysis_dir: str,
2324
output_file: Optional[str] = None,
@@ -43,6 +44,7 @@ def write_mgf_file(
4344
max_precursor_neutral_mass: Optional[float] = None,
4445
mz_precision: int = 5,
4546
intensity_precision: int = 0,
47+
keep_empty_spectra: bool = False,
4648
):
4749

4850
start_time = time.time()
@@ -53,10 +55,27 @@ def write_mgf_file(
5355
logger.info("Generating Ms2 Spectra (producer-consumer mode)")
5456
spectra_queue = queue.Queue(maxsize=100)
5557

58+
merged_df = get_tdf_df(
59+
analysis_dir,
60+
min_precursor_intensity,
61+
max_precursor_intensity,
62+
min_precursor_charge,
63+
max_precursor_charge,
64+
min_precursor_mz,
65+
max_precursor_mz,
66+
min_precursor_rt,
67+
max_precursor_rt,
68+
min_precursor_ccs,
69+
max_precursor_ccs,
70+
min_precursor_neutral_mass,
71+
max_precursor_neutral_mass,
72+
)
73+
5674
def producer():
5775
try:
5876
ms2_spectra = get_ms2_dda_content(
5977
analysis_dir=analysis_dir,
78+
merged_df=merged_df,
6079
remove_precursor=remove_precursor,
6180
precursor_peak_width=precursor_peak_width,
6281
batch_size=batch_size,
@@ -65,18 +84,6 @@ def producer():
6584
max_spectra_intensity=max_spectra_intensity,
6685
min_spectra_mz=min_spectra_mz,
6786
max_spectra_mz=max_spectra_mz,
68-
min_precursor_intensity=min_precursor_intensity,
69-
max_precursor_intensity=max_precursor_intensity,
70-
min_precursor_charge=min_precursor_charge,
71-
max_precursor_charge=max_precursor_charge,
72-
min_precursor_mz=min_precursor_mz,
73-
max_precursor_mz=max_precursor_mz,
74-
min_precursor_rt=min_precursor_rt,
75-
max_precursor_rt=max_precursor_rt,
76-
min_precursor_ccs=min_precursor_ccs,
77-
max_precursor_ccs=max_precursor_ccs,
78-
min_precursor_neutral_mass=min_precursor_neutral_mass,
79-
max_precursor_neutral_mass=max_precursor_neutral_mass,
8087
)
8188
for spectrum in ms2_spectra:
8289
spectra_queue.put(spectrum)
@@ -86,7 +93,9 @@ def producer():
8693
def consumer():
8794
logger.info("Writing Contents To File")
8895
with open(output_file, "w", encoding="UTF-8") as file:
89-
with tqdm(desc="Writing MGF File", unit="spectra") as pbar:
96+
with tqdm(
97+
desc="Writing MGF File", unit="spectra", total=len(merged_df)
98+
) as pbar:
9099
# https://www.matrixscience.com/help/data_file_help.html
91100
header_lines = []
92101
header_lines.append(f"INSTRUMENT=TimsTOF")
@@ -96,23 +105,35 @@ def consumer():
96105
spectrum = spectra_queue.get()
97106
if spectrum is None:
98107
break
108+
109+
pbar.update(1)
110+
111+
if len(spectrum.mz_spectra) == 0 and keep_empty_spectra is False:
112+
continue
113+
99114
mgf_lines = []
100115
mgf_lines.append("BEGIN IONS")
101116
mgf_lines.append(
102117
f"TITLE={Path(analysis_dir).stem}.{spectrum.low_scan}.{spectrum.high_scan}.{spectrum.charge} "
103118
f'File="{Path(analysis_dir).stem}", NativeID="merged={spectrum.precursor_id} frame={spectrum.parent_id} '
104119
f'scanStart={spectrum.scan_begin} scanEnd={spectrum.scan_end} scan={spectrum.low_scan}"'
105120
)
106-
mgf_lines.append(f"RTINSECONDS={spectrum.rt:.2f}")
121+
mgf_lines.append(f"RTINSECONDS={spectrum.rt:.2f}")
107122
# Pepmass is actually mz? huh?
108-
mgf_lines.append(f"PEPMASS={spectrum.mz:.6f} {spectrum.prec_intensity:.{intensity_precision}f}")
123+
mgf_lines.append(
124+
f"PEPMASS={spectrum.mz:.6f} {spectrum.prec_intensity:.{intensity_precision}f}"
125+
)
109126
mgf_lines.append(f"CHARGE={spectrum.charge}+")
110-
for mz, intensity in zip(spectrum.mz_spectra, spectrum.intensity_spectra):
111-
mgf_lines.append(f"{mz:.{mz_precision}f} {intensity:.{intensity_precision}f}")
127+
for mz, intensity in zip(
128+
spectrum.mz_spectra, spectrum.intensity_spectra
129+
):
130+
mgf_lines.append(
131+
f"{mz:.{mz_precision}f} {intensity:.{intensity_precision}f}"
132+
)
112133
mgf_lines.append("END IONS")
113134
file.write("\n".join(mgf_lines) + "\n\n")
114-
pbar.update(1)
115135

136+
116137
producer_thread = threading.Thread(target=producer)
117138
consumer_thread = threading.Thread(target=consumer)
118139

@@ -129,6 +150,7 @@ def main():
129150
"""
130151
Command-line interface for MGF extraction from TimsTOF data.
131152
"""
153+
132154
parser = create_mgf_parser()
133155
args = parser.parse_args()
134156

@@ -224,7 +246,6 @@ def main():
224246
continue
225247

226248
try:
227-
228249
write_mgf_file(
229250
analysis_dir=str(d_folder),
230251
output_file=output,
@@ -248,13 +269,15 @@ def main():
248269
max_precursor_ccs=args.max_precursor_ccs,
249270
min_precursor_neutral_mass=args.min_precursor_neutral_mass,
250271
max_precursor_neutral_mass=args.max_precursor_neutral_mass,
272+
keep_empty_spectra=args.keep_empty_spectra,
251273
)
252274
logger.info("MGF extraction completed successfully!")
253275
except Exception as e:
254276
logger.error(f"Error during MGF extraction: {e}... skipping {d_folder}")
277+
continue
255278
except KeyboardInterrupt:
256-
logger.info("Extraction interrupted by user.")
257-
return 0
279+
logger.info("\nExtraction interrupted by user.")
280+
os._exit(0)
258281

259282

260283
if __name__ == "__main__":

0 commit comments

Comments
 (0)