Skip to content

Commit 16ba4cf

Browse files
update
1 parent 4c2fa83 commit 16ba4cf

2 files changed

Lines changed: 115 additions & 21 deletions

File tree

src/tdfextractor/mgf_exctractor.py

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,15 @@
33
"""
44

55
import logging
6+
import os
67
import time
78
from pathlib import Path
8-
from typing import Generator, List, Optional
9+
from typing import Optional
910
import argparse
1011

11-
import pandas as pd
12-
from tdfpy import timsdata
13-
from tdfpy.pandas_tdf import PandasTdf
14-
from serenipy.ms2 import Ms2Spectra
1512
from tqdm import tqdm
1613

17-
from tqdm import tqdm
18-
from .utils import calculate_mass, get_ms2_dda_content, map_precursor_to_ip2_scan_number
19-
import numpy as np
14+
from .utils import get_ms2_dda_content
2015

2116
logger = logging.getLogger(__name__)
2217

@@ -183,6 +178,12 @@ def main():
183178
help="Remove precursor peaks from MS/MS spectra",
184179
)
185180

181+
parser.add_argument(
182+
"--overwrite",
183+
action="store_true",
184+
help="Overwrite existing output file if it exists",
185+
)
186+
186187
args = parser.parse_args()
187188

188189
# if casanovo update params to be: --top-n-spectra 150 --min-intensity 0.01 --min-charge 2 --max-charge 5 --min-mz 50 --max-mz 2500
@@ -218,6 +219,7 @@ def main():
218219
logger.info(f" Max RT: {args.max_rt if args.max_rt else 'None'} seconds")
219220
logger.info(f" Min CCS: {args.min_ccs if args.min_ccs else 'None'}")
220221
logger.info(f" Max CCS: {args.max_ccs if args.max_ccs else 'None'}")
222+
logger.info(f" Overwrite Existing Output: {args.overwrite}")
221223
logger.info(f" Verbose Logging: {args.verbose}")
222224

223225
# Validate input directory
@@ -243,9 +245,36 @@ def main():
243245
logger.error(f"No .d folders found in: {args.analysis_dir}")
244246
return 1
245247

246-
output = args.output
247-
if len(d_folders) > 1:
248-
output = None
248+
output_dir = None
249+
output_name = None
250+
251+
# if output is a dir
252+
if args.output is None:
253+
# output will bewithin d folder
254+
output_dir = None
255+
output_name = Path(args.analysis_dir).stem + ".mgf"
256+
257+
elif args.output.endswith(".mgf"):
258+
if len(d_folders) > 1:
259+
raise ValueError(
260+
"Output file specified but multiple .d folders found.")
261+
output_dir = Path(args.output).parent
262+
output_name = Path(args.output).name
263+
264+
else:
265+
# path is a dir
266+
output_dir = Path(args.output)
267+
268+
# make dir if it does not exist
269+
if not output_dir.exists():
270+
try:
271+
output_dir.mkdir(parents=True, exist_ok=True)
272+
logger.info(f"Created output directory: {output_dir}")
273+
except Exception as e:
274+
logger.error(f"Failed to create output directory: {e}")
275+
return 1
276+
277+
output_name = None
249278

250279
for d_folder in d_folders:
251280
if not d_folder.is_dir():
@@ -258,9 +287,21 @@ def main():
258287
if not (d_folder / "analysis.tdf_bin").exists():
259288
logger.error(f"Required file not found in {d_folder}: analysis.tdf_bin")
260289
return 1
290+
261291
logger.info(f"Processing {d_folder}...")
262292

293+
_output_dir = output_dir if output_dir is not None else d_folder
294+
_output_name = output_name if output_name is not None else Path(d_folder).stem + ".mgf"
295+
296+
output = os.path.join(_output_dir, _output_name)
297+
logger.info(f"Output file: {output}")
298+
299+
if not args.overwrite and Path(output).exists():
300+
logger.warning(f"Output file {output} already exists. Skipping...")
301+
continue
302+
263303
try:
304+
264305
write_mgf_file(
265306
analysis_dir=str(d_folder),
266307
output_file=output,
@@ -280,8 +321,10 @@ def main():
280321
)
281322
logger.info("MGF extraction completed successfully!")
282323
except Exception as e:
283-
logger.error(f"Error during MGF extraction: {e}")
284-
324+
logger.error(f"Error during MGF extraction: {e}... skipping {d_folder}")
325+
except KeyboardInterrupt:
326+
logger.info("Extraction interrupted by user.")
327+
return 0
285328

286329
if __name__ == "__main__":
287330
exit(main())

src/tdfextractor/ms2_extractor.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import logging
6+
import os
67
import time
78
from datetime import datetime
89
from pathlib import Path
@@ -19,7 +20,8 @@
1920
from .utils import calculate_mass, get_ms2_dda_content, map_precursor_to_ip2_scan_number
2021

2122
logger = logging.getLogger(__name__)
22-
23+
# make debug
24+
logger.setLevel(logging.DEBUG)
2325

2426
def get_ms2_content(
2527
analysis_dir: str,
@@ -468,6 +470,12 @@ def main():
468470
"-v", "--verbose", action="store_true", help="Enable verbose logging"
469471
)
470472

473+
parser.add_argument(
474+
"--overwrite",
475+
action="store_true",
476+
help="Overwrite existing output file if it exists",
477+
)
478+
471479
args = parser.parse_args()
472480

473481
# Set up logging
@@ -493,6 +501,7 @@ def main():
493501
logger.info(f" Max RT: {args.max_rt if args.max_rt else 'None'} seconds")
494502
logger.info(f" Min CCS: {args.min_ccs if args.min_ccs else 'None'}")
495503
logger.info(f" Max CCS: {args.max_ccs if args.max_ccs else 'None'}")
504+
logger.info(f" Overwrite Existing Output: {args.overwrite}")
496505
logger.info(f" Verbose Logging: {args.verbose}")
497506

498507
# Validate input directory
@@ -514,13 +523,42 @@ def main():
514523
else:
515524
d_folders = list(analysis_path.glob("*.d"))
516525
logger.info(f"Found {len(d_folders)} .d folders in: {args.analysis_dir}")
526+
logger.debug(f"Found .d folders: {d_folders}")
527+
517528
if not d_folders:
518529
logger.error(f"No .d folders found in: {args.analysis_dir}")
519530
return 1
520-
521-
output = args.output
522-
if len(d_folders) > 1:
523-
output = None
531+
532+
output_dir = None
533+
output_name = None
534+
535+
# if output is a dir
536+
if args.output is None:
537+
# output will bewithin d folder
538+
output_dir = None
539+
output_name = Path(args.analysis_dir).stem + ".ms2"
540+
541+
elif args.output.endswith(".ms2"):
542+
if len(d_folders) > 1:
543+
raise ValueError(
544+
"Output file specified but multiple .d folders found.")
545+
output_dir = Path(args.output).parent
546+
output_name = Path(args.output).name
547+
548+
else:
549+
# path is a dir
550+
output_dir = Path(args.output)
551+
552+
# make dir if it does not exist
553+
if not output_dir.exists():
554+
try:
555+
output_dir.mkdir(parents=True, exist_ok=True)
556+
logger.info(f"Created output directory: {output_dir}")
557+
except Exception as e:
558+
logger.error(f"Failed to create output directory: {e}")
559+
return 1
560+
561+
output_name = None
524562

525563
for d_folder in d_folders:
526564
if not d_folder.is_dir():
@@ -533,10 +571,21 @@ def main():
533571
if not (d_folder / "analysis.tdf_bin").exists():
534572
logger.error(f"Required file not found in {d_folder}: analysis.tdf_bin")
535573
return 1
574+
536575
logger.info(f"Processing {d_folder}...")
537576

577+
_output_dir = output_dir if output_dir is not None else d_folder
578+
_output_name = output_name if output_name is not None else Path(d_folder).stem + ".ms2"
579+
580+
output = os.path.join(_output_dir, _output_name)
581+
logger.info(f"Output file: {output}")
582+
583+
if not args.overwrite and Path(output).exists():
584+
logger.warning(f"Output file {output} already exists. Skipping...")
585+
continue
538586

539587
try:
588+
540589
write_ms2_file(
541590
analysis_dir=str(d_folder),
542591
output_file=output,
@@ -556,8 +605,10 @@ def main():
556605
)
557606
logger.info("MS2 extraction completed successfully!")
558607
except Exception as e:
559-
logger.error(f"Error during MS2 extraction: {e}")
560-
561-
608+
logger.error(f"Error during Ms2 extraction: {e}... skipping {d_folder}")
609+
except KeyboardInterrupt:
610+
logger.info("Extraction interrupted by user.")
611+
return 0
612+
562613
if __name__ == "__main__":
563614
exit(main())

0 commit comments

Comments
 (0)