44
55import logging
66import os
7+ import signal
8+ import sys
79import time
810import threading
911import queue
1214
1315from tqdm import tqdm
1416
15- from .utils import get_ms2_dda_content
17+ from .utils import get_ms2_dda_content , get_tdf_df
1618from .cli_args import create_mgf_parser , apply_preset_settings , log_common_args
1719
1820logger = logging .getLogger (__name__ )
1921
20-
2122def write_mgf_file (
2223 analysis_dir : str ,
2324 output_file : Optional [str ] = None ,
@@ -43,6 +44,7 @@ def write_mgf_file(
4344 max_precursor_neutral_mass : Optional [float ] = None ,
4445 mz_precision : int = 5 ,
4546 intensity_precision : int = 0 ,
47+ keep_empty_spectra : bool = False ,
4648):
4749
4850 start_time = time .time ()
@@ -53,10 +55,27 @@ def write_mgf_file(
5355 logger .info ("Generating Ms2 Spectra (producer-consumer mode)" )
5456 spectra_queue = queue .Queue (maxsize = 100 )
5557
58+ merged_df = get_tdf_df (
59+ analysis_dir ,
60+ min_precursor_intensity ,
61+ max_precursor_intensity ,
62+ min_precursor_charge ,
63+ max_precursor_charge ,
64+ min_precursor_mz ,
65+ max_precursor_mz ,
66+ min_precursor_rt ,
67+ max_precursor_rt ,
68+ min_precursor_ccs ,
69+ max_precursor_ccs ,
70+ min_precursor_neutral_mass ,
71+ max_precursor_neutral_mass ,
72+ )
73+
5674 def producer ():
5775 try :
5876 ms2_spectra = get_ms2_dda_content (
5977 analysis_dir = analysis_dir ,
78+ merged_df = merged_df ,
6079 remove_precursor = remove_precursor ,
6180 precursor_peak_width = precursor_peak_width ,
6281 batch_size = batch_size ,
@@ -65,18 +84,6 @@ def producer():
6584 max_spectra_intensity = max_spectra_intensity ,
6685 min_spectra_mz = min_spectra_mz ,
6786 max_spectra_mz = max_spectra_mz ,
68- min_precursor_intensity = min_precursor_intensity ,
69- max_precursor_intensity = max_precursor_intensity ,
70- min_precursor_charge = min_precursor_charge ,
71- max_precursor_charge = max_precursor_charge ,
72- min_precursor_mz = min_precursor_mz ,
73- max_precursor_mz = max_precursor_mz ,
74- min_precursor_rt = min_precursor_rt ,
75- max_precursor_rt = max_precursor_rt ,
76- min_precursor_ccs = min_precursor_ccs ,
77- max_precursor_ccs = max_precursor_ccs ,
78- min_precursor_neutral_mass = min_precursor_neutral_mass ,
79- max_precursor_neutral_mass = max_precursor_neutral_mass ,
8087 )
8188 for spectrum in ms2_spectra :
8289 spectra_queue .put (spectrum )
@@ -86,7 +93,9 @@ def producer():
8693 def consumer ():
8794 logger .info ("Writing Contents To File" )
8895 with open (output_file , "w" , encoding = "UTF-8" ) as file :
89- with tqdm (desc = "Writing MGF File" , unit = "spectra" ) as pbar :
96+ with tqdm (
97+ desc = "Writing MGF File" , unit = "spectra" , total = len (merged_df )
98+ ) as pbar :
9099 # https://www.matrixscience.com/help/data_file_help.html
91100 header_lines = []
92101 header_lines .append (f"INSTRUMENT=TimsTOF" )
@@ -96,23 +105,35 @@ def consumer():
96105 spectrum = spectra_queue .get ()
97106 if spectrum is None :
98107 break
108+
109+ pbar .update (1 )
110+
111+ if len (spectrum .mz_spectra ) == 0 and keep_empty_spectra is False :
112+ continue
113+
99114 mgf_lines = []
100115 mgf_lines .append ("BEGIN IONS" )
101116 mgf_lines .append (
102117 f"TITLE={ Path (analysis_dir ).stem } .{ spectrum .low_scan } .{ spectrum .high_scan } .{ spectrum .charge } "
103118 f'File="{ Path (analysis_dir ).stem } ", NativeID="merged={ spectrum .precursor_id } frame={ spectrum .parent_id } '
104119 f'scanStart={ spectrum .scan_begin } scanEnd={ spectrum .scan_end } scan={ spectrum .low_scan } "'
105120 )
106- mgf_lines .append (f"RTINSECONDS={ spectrum .rt :.2f} " )
121+ mgf_lines .append (f"RTINSECONDS={ spectrum .rt :.2f} " )
107122 # Pepmass is actually mz? huh?
108- mgf_lines .append (f"PEPMASS={ spectrum .mz :.6f} { spectrum .prec_intensity :.{intensity_precision }f} " )
123+ mgf_lines .append (
124+ f"PEPMASS={ spectrum .mz :.6f} { spectrum .prec_intensity :.{intensity_precision }f} "
125+ )
109126 mgf_lines .append (f"CHARGE={ spectrum .charge } +" )
110- for mz , intensity in zip (spectrum .mz_spectra , spectrum .intensity_spectra ):
111- mgf_lines .append (f"{ mz :.{mz_precision }f} { intensity :.{intensity_precision }f} " )
127+ for mz , intensity in zip (
128+ spectrum .mz_spectra , spectrum .intensity_spectra
129+ ):
130+ mgf_lines .append (
131+ f"{ mz :.{mz_precision }f} { intensity :.{intensity_precision }f} "
132+ )
112133 mgf_lines .append ("END IONS" )
113134 file .write ("\n " .join (mgf_lines ) + "\n \n " )
114- pbar .update (1 )
115135
136+
116137 producer_thread = threading .Thread (target = producer )
117138 consumer_thread = threading .Thread (target = consumer )
118139
@@ -129,6 +150,7 @@ def main():
129150 """
130151 Command-line interface for MGF extraction from TimsTOF data.
131152 """
153+
132154 parser = create_mgf_parser ()
133155 args = parser .parse_args ()
134156
@@ -224,7 +246,6 @@ def main():
224246 continue
225247
226248 try :
227-
228249 write_mgf_file (
229250 analysis_dir = str (d_folder ),
230251 output_file = output ,
@@ -248,13 +269,15 @@ def main():
248269 max_precursor_ccs = args .max_precursor_ccs ,
249270 min_precursor_neutral_mass = args .min_precursor_neutral_mass ,
250271 max_precursor_neutral_mass = args .max_precursor_neutral_mass ,
272+ keep_empty_spectra = args .keep_empty_spectra ,
251273 )
252274 logger .info ("MGF extraction completed successfully!" )
253275 except Exception as e :
254276 logger .error (f"Error during MGF extraction: { e } ... skipping { d_folder } " )
277+ continue
255278 except KeyboardInterrupt :
256- logger .info ("Extraction interrupted by user." )
257- return 0
279+ logger .info ("\n Extraction interrupted by user." )
280+ os . _exit ( 0 )
258281
259282
260283if __name__ == "__main__" :
0 commit comments