33"""
44
55import logging
6+ import os
67import time
78from pathlib import Path
8- from typing import Generator , List , Optional
9+ from typing import Optional
910import argparse
1011
11- import pandas as pd
12- from tdfpy import timsdata
13- from tdfpy .pandas_tdf import PandasTdf
14- from serenipy .ms2 import Ms2Spectra
1512from tqdm import tqdm
1613
17- from tqdm import tqdm
18- from .utils import calculate_mass , get_ms2_dda_content , map_precursor_to_ip2_scan_number
19- import numpy as np
14+ from .utils import get_ms2_dda_content
2015
2116logger = logging .getLogger (__name__ )
2217
@@ -183,6 +178,12 @@ def main():
183178 help = "Remove precursor peaks from MS/MS spectra" ,
184179 )
185180
181+ parser .add_argument (
182+ "--overwrite" ,
183+ action = "store_true" ,
184+ help = "Overwrite existing output file if it exists" ,
185+ )
186+
186187 args = parser .parse_args ()
187188
188189 # if casanovo update params to be: --top-n-spectra 150 --min-intensity 0.01 --min-charge 2 --max-charge 5 --min-mz 50 --max-mz 2500
@@ -218,6 +219,7 @@ def main():
218219 logger .info (f" Max RT: { args .max_rt if args .max_rt else 'None' } seconds" )
219220 logger .info (f" Min CCS: { args .min_ccs if args .min_ccs else 'None' } " )
220221 logger .info (f" Max CCS: { args .max_ccs if args .max_ccs else 'None' } " )
222+ logger .info (f" Overwrite Existing Output: { args .overwrite } " )
221223 logger .info (f" Verbose Logging: { args .verbose } " )
222224
223225 # Validate input directory
@@ -243,9 +245,36 @@ def main():
243245 logger .error (f"No .d folders found in: { args .analysis_dir } " )
244246 return 1
245247
246- output = args .output
247- if len (d_folders ) > 1 :
248- output = None
248+ output_dir = None
249+ output_name = None
250+
251+ # if output is a dir
252+ if args .output is None :
253+ # output will bewithin d folder
254+ output_dir = None
255+ output_name = Path (args .analysis_dir ).stem + ".mgf"
256+
257+ elif args .output .endswith (".mgf" ):
258+ if len (d_folders ) > 1 :
259+ raise ValueError (
260+ "Output file specified but multiple .d folders found." )
261+ output_dir = Path (args .output ).parent
262+ output_name = Path (args .output ).name
263+
264+ else :
265+ # path is a dir
266+ output_dir = Path (args .output )
267+
268+ # make dir if it does not exist
269+ if not output_dir .exists ():
270+ try :
271+ output_dir .mkdir (parents = True , exist_ok = True )
272+ logger .info (f"Created output directory: { output_dir } " )
273+ except Exception as e :
274+ logger .error (f"Failed to create output directory: { e } " )
275+ return 1
276+
277+ output_name = None
249278
250279 for d_folder in d_folders :
251280 if not d_folder .is_dir ():
@@ -258,9 +287,21 @@ def main():
258287 if not (d_folder / "analysis.tdf_bin" ).exists ():
259288 logger .error (f"Required file not found in { d_folder } : analysis.tdf_bin" )
260289 return 1
290+
261291 logger .info (f"Processing { d_folder } ..." )
262292
293+ _output_dir = output_dir if output_dir is not None else d_folder
294+ _output_name = output_name if output_name is not None else Path (d_folder ).stem + ".mgf"
295+
296+ output = os .path .join (_output_dir , _output_name )
297+ logger .info (f"Output file: { output } " )
298+
299+ if not args .overwrite and Path (output ).exists ():
300+ logger .warning (f"Output file { output } already exists. Skipping..." )
301+ continue
302+
263303 try :
304+
264305 write_mgf_file (
265306 analysis_dir = str (d_folder ),
266307 output_file = output ,
@@ -280,8 +321,10 @@ def main():
280321 )
281322 logger .info ("MGF extraction completed successfully!" )
282323 except Exception as e :
283- logger .error (f"Error during MGF extraction: { e } " )
284-
324+ logger .error (f"Error during MGF extraction: { e } ... skipping { d_folder } " )
325+ except KeyboardInterrupt :
326+ logger .info ("Extraction interrupted by user." )
327+ return 0
285328
286329if __name__ == "__main__" :
287330 exit (main ())
0 commit comments