From fa2c144e5d51781310eeb1cb30ce58d8e51c85fd Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 4 Oct 2024 17:29:37 +0100 Subject: [PATCH 01/12] fixed docs reqs --- docs/requirements.txt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 5c21307f..6a9202cc 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,10 +1,13 @@ -mkdocs -mkdocs-material -mkdocstrings[python] -mkdocs-jupyter -mkdocs-exclude -mkdocs-autorefs +mkdocs==1.5.3 +mkdocs-material==9.4.1 +mkdocstrings[python]==0.22.0 +mkdocs-jupyter==0.24.2 +mkdocs-exclude==1.0.2 +mkdocs-autorefs==0.4.1 mike pandas tabulate ipython-genutils +griffe==0.29.1 +mkdocs-material-extensions==1.2 +mkdocstrings-python==1.1.2 From de5dbaca4f5b23f56902a474d2319233c121e46a Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Thu, 27 Mar 2025 17:31:06 +0300 Subject: [PATCH 02/12] luna25 --- amid/luna25.py | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 amid/luna25.py diff --git a/amid/luna25.py b/amid/luna25.py new file mode 100644 index 00000000..3cb28fa9 --- /dev/null +++ b/amid/luna25.py @@ -0,0 +1,132 @@ +import datetime +from functools import cached_property +from typing import Dict, NamedTuple, Sequence + +import numpy as np +import pandas as pd +import SimpleITK as sitk +from dpipe.im.box import limit_box +from dpipe.itertools import collect + +from .internals import Dataset, field, licenses, register + + +class NoduleBlock(NamedTuple): + image: np.ndarray + metadata: Dict + + +class LUNA25Nodule(NamedTuple): + coords: Sequence[float] + lesion_id: int + annotation_id: str + nodule_id: str + malignancy: bool + center_voxel: Sequence[float] + bbox: np.ndarray + + +@register( + body_region='Chest', + license=licenses.CC_BY_40, + link='https://luna25.grand-challenge.org/', + modality='CT', + prep_data_size=None, + raw_data_size=None, + task='Lung nodule malignancy risk estimation', +) +class LUNA25(Dataset): + """ + The LUNA25 Challenge dataset is a comprehensive collection designed to support + the development and validation of AI algorithms for lung nodule malignancy risk + estimation using low-dose chest CT scans. In total, it contains 2120 patients + and 4069 low-dose chest CT scans, with 555 annotated malignant nodules and + 5608 benign nodules (3762 unique nodules, 348 of them are malignant). + The dataset was acquired in participants who enrolled in the + National Lung Cancer Screening Trial (NLST) between 2002 and 2004 in + one of the 33 centers in the United States. + + Parameters + ---------- + root : str, Path, optional + path to the folder containing the raw downloaded archives. + If not provided, the cache is assumed to be already populated. + + """ + + @property + def ids(self): + return [file.name[: -len('.mha')] for file in (self.root / 'luna25_images').iterdir()] + + def _image(self, i): + return sitk.ReadImage(self.root / f'luna25_images/{i}.mha') + + @field + def image(self, i): + return sitk.GetArrayFromImage(self._image(i)) + + @field + def spacing(self, i): + return self._image(i).GetSpacing()[::-1] + + @field + def origin(self, i): + return self._image(i).GetOrigin()[::-1] + + @cached_property + def _data(self): + return pd.read_csv(self.root / 'LUNA25_Public_Training_Development_Data.csv') + + def _data_rows(self, i): + return self._data[self._data['SeriesInstanceUID'] == i] + + def _data_column_value(self, i, column_name): + values = self._data_rows(i).get(column_name).unique() + assert len(values) == 1 + value = values[0] + assert not pd.isnull(value) + return value + + @field + def patient_id(self, i): + return self._data_column_value(i, 'PatientID') + + @field + def study_date(self, i): + study_date = self._data_column_value(i, 'StudyDate') + return datetime.strptime(study_date, "%Y%m%d").date() + + @field + def age(self, i): + return self._data_column_value(i, 'Age_at_StudyDate') + + @field + def sex(self, i): + return self._data_column_value(i, 'Gender') + + @field + @collect + def nodules(self, i): + for row in self._data_rows(i).itertuples(): + coords = np.array([row.CoordX, row.CoordY, row.CoordZ]) + nodule_block_metadata = self.nodule_block_metadata(row.AnnotationID) + assert np.all(nodule_block_metadata['spacing'] == self.spacing(i)) + center_voxel = (coords[::-1] - self.origin(i)) / self.spacing(i) + bbox_start_point = (nodule_block_metadata['origin'] - self.origin(i)) / self.spacing(i) + yield LUNA25Nodule( + coords=coords, + lesion_id=row.LesionID, + annotation_id=row.AnnotationID, + nodule_id=row.NoduleID, + malignancy=row.label, + center_voxel=np.round(center_voxel), + bbox=limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape), + ) + + def nodule_block_image(self, annotation_id): + return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy') + + def nodule_block_metadata(self, annotation_id): + metadata = np.load(self.root / f'luna25_nodule_blocks/metadata/{annotation_id}.npy', allow_pickle=True) + assert metadata.shape == () + return metadata.item() From 9c3830ddd177a4772e4c8ee09d886da7e4f9ebed Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Thu, 27 Mar 2025 18:27:29 +0300 Subject: [PATCH 03/12] fixes --- amid/luna25.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/amid/luna25.py b/amid/luna25.py index 3cb28fa9..f4c8344a 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -1,4 +1,4 @@ -import datetime +from datetime import datetime from functools import cached_property from typing import Dict, NamedTuple, Sequence @@ -89,11 +89,11 @@ def _data_column_value(self, i, column_name): @field def patient_id(self, i): - return self._data_column_value(i, 'PatientID') + return str(self._data_column_value(i, 'PatientID')) @field def study_date(self, i): - study_date = self._data_column_value(i, 'StudyDate') + study_date = str(self._data_column_value(i, 'StudyDate')) return datetime.strptime(study_date, "%Y%m%d").date() @field @@ -113,14 +113,20 @@ def nodules(self, i): assert np.all(nodule_block_metadata['spacing'] == self.spacing(i)) center_voxel = (coords[::-1] - self.origin(i)) / self.spacing(i) bbox_start_point = (nodule_block_metadata['origin'] - self.origin(i)) / self.spacing(i) + if np.any(center_voxel < 0) or np.any(bbox_start_point < 0): + center_voxel = None + bbox=None + else: + center_voxel = np.round(center_voxel) + bbox = limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape) yield LUNA25Nodule( coords=coords, lesion_id=row.LesionID, - annotation_id=row.AnnotationID, - nodule_id=row.NoduleID, + annotation_id=str(row.AnnotationID), + nodule_id=str(row.NoduleID), malignancy=row.label, - center_voxel=np.round(center_voxel), - bbox=limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape), + center_voxel=center_voxel, + bbox=bbox, ) def nodule_block_image(self, annotation_id): From 66529fa50507f5c4d75154fbbc3e0e01c241b857 Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 31 Mar 2025 18:10:11 +0300 Subject: [PATCH 04/12] added direction --- amid/luna25.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/amid/luna25.py b/amid/luna25.py index f4c8344a..18378d6b 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -69,10 +69,12 @@ def image(self, i): def spacing(self, i): return self._image(i).GetSpacing()[::-1] - @field - def origin(self, i): + def _image_origin(self, i): return self._image(i).GetOrigin()[::-1] + def _direction(self, i): + return self._image(i).GetDirection()[::-1] + @cached_property def _data(self): return pd.read_csv(self.root / 'LUNA25_Public_Training_Development_Data.csv') @@ -111,22 +113,19 @@ def nodules(self, i): coords = np.array([row.CoordX, row.CoordY, row.CoordZ]) nodule_block_metadata = self.nodule_block_metadata(row.AnnotationID) assert np.all(nodule_block_metadata['spacing'] == self.spacing(i)) - center_voxel = (coords[::-1] - self.origin(i)) / self.spacing(i) - bbox_start_point = (nodule_block_metadata['origin'] - self.origin(i)) / self.spacing(i) - if np.any(center_voxel < 0) or np.any(bbox_start_point < 0): - center_voxel = None - bbox=None - else: - center_voxel = np.round(center_voxel) - bbox = limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape) + image_origin = self._image_origin(i) + direction = np.array(self._direction(i)[::4]) + center_voxel = ((coords[::-1] - image_origin) / self.spacing(i)) * direction + bbox_start_point = ((nodule_block_metadata['origin'] - image_origin) / self.spacing(i)) * direction + bbox = limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape) yield LUNA25Nodule( coords=coords, lesion_id=row.LesionID, annotation_id=str(row.AnnotationID), nodule_id=str(row.NoduleID), malignancy=row.label, - center_voxel=center_voxel, - bbox=bbox, + center_voxel=np.round(center_voxel).astype(int), + bbox=np.round(bbox).astype(int) ) def nodule_block_image(self, annotation_id): From 438ba49ba67f4a83399e1388745f20b0a4c42bcb Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Fri, 4 Apr 2025 17:46:48 +0300 Subject: [PATCH 05/12] lint --- amid/luna25.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amid/luna25.py b/amid/luna25.py index 18378d6b..78fdbd7c 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -125,7 +125,7 @@ def nodules(self, i): nodule_id=str(row.NoduleID), malignancy=row.label, center_voxel=np.round(center_voxel).astype(int), - bbox=np.round(bbox).astype(int) + bbox=np.round(bbox).astype(int), ) def nodule_block_image(self, annotation_id): From 1a2b981b65ca36513ee9de753118bd094b84f56b Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 7 Apr 2025 12:06:32 +0300 Subject: [PATCH 06/12] update version --- amid/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amid/__version__.py b/amid/__version__.py index ef919940..092052c1 100644 --- a/amid/__version__.py +++ b/amid/__version__.py @@ -1 +1 @@ -__version__ = '0.14.0' +__version__ = '0.14.1' From 3fcfd6a25c010f83d47fe216c5a04b6b7946c50d Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 7 Apr 2025 12:10:34 +0300 Subject: [PATCH 07/12] added to init --- amid/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/amid/__init__.py b/amid/__init__.py index b26e169f..fe135844 100644 --- a/amid/__init__.py +++ b/amid/__init__.py @@ -19,6 +19,7 @@ from .lidc import LIDC from .lits import LiTS from .liver_medseg import LiverMedseg +from .luna25 import LUNA25 from .medseg9 import Medseg9 from .midrc import MIDRC from .mood import MOOD From 817481c07dac2660b02fdc6f9ebafe5478561c54 Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 7 Apr 2025 12:30:22 +0300 Subject: [PATCH 08/12] got rid of dpipe --- amid/luna25.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/amid/luna25.py b/amid/luna25.py index 78fdbd7c..9794d5db 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -5,8 +5,6 @@ import numpy as np import pandas as pd import SimpleITK as sitk -from dpipe.im.box import limit_box -from dpipe.itertools import collect from .internals import Dataset, field, licenses, register @@ -107,8 +105,8 @@ def sex(self, i): return self._data_column_value(i, 'Gender') @field - @collect def nodules(self, i): + nodules = [] for row in self._data_rows(i).itertuples(): coords = np.array([row.CoordX, row.CoordY, row.CoordZ]) nodule_block_metadata = self.nodule_block_metadata(row.AnnotationID) @@ -117,8 +115,8 @@ def nodules(self, i): direction = np.array(self._direction(i)[::4]) center_voxel = ((coords[::-1] - image_origin) / self.spacing(i)) * direction bbox_start_point = ((nodule_block_metadata['origin'] - image_origin) / self.spacing(i)) * direction - bbox = limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape) - yield LUNA25Nodule( + bbox = [bbox_start_point, np.minimum(bbox_start_point + np.array([64, 128, 128]), self.image(i).shape)] + nodules.append(LUNA25Nodule( coords=coords, lesion_id=row.LesionID, annotation_id=str(row.AnnotationID), @@ -126,7 +124,8 @@ def nodules(self, i): malignancy=row.label, center_voxel=np.round(center_voxel).astype(int), bbox=np.round(bbox).astype(int), - ) + )) + return nodules def nodule_block_image(self, annotation_id): return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy') From c3f874f3ab1adf6dbd45ba0ec96a85549d4863d3 Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 7 Apr 2025 12:42:04 +0300 Subject: [PATCH 09/12] lint --- amid/luna25.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/amid/luna25.py b/amid/luna25.py index 9794d5db..c7f07c25 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -116,15 +116,17 @@ def nodules(self, i): center_voxel = ((coords[::-1] - image_origin) / self.spacing(i)) * direction bbox_start_point = ((nodule_block_metadata['origin'] - image_origin) / self.spacing(i)) * direction bbox = [bbox_start_point, np.minimum(bbox_start_point + np.array([64, 128, 128]), self.image(i).shape)] - nodules.append(LUNA25Nodule( - coords=coords, - lesion_id=row.LesionID, - annotation_id=str(row.AnnotationID), - nodule_id=str(row.NoduleID), - malignancy=row.label, - center_voxel=np.round(center_voxel).astype(int), - bbox=np.round(bbox).astype(int), - )) + nodules.append( + LUNA25Nodule( + coords=coords, + lesion_id=row.LesionID, + annotation_id=str(row.AnnotationID), + nodule_id=str(row.NoduleID), + malignancy=row.label, + center_voxel=np.round(center_voxel).astype(int), + bbox=np.round(bbox).astype(int), + ) + ) return nodules def nodule_block_image(self, annotation_id): From ce319a503e58568f3d80552dbc6ff741f06cf2d0 Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 7 Apr 2025 16:44:02 +0300 Subject: [PATCH 10/12] account for comments --- amid/__version__.py | 2 +- amid/luna25.py | 50 +++++++++++++++++++++------------------------ 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/amid/__version__.py b/amid/__version__.py index 092052c1..a842d05a 100644 --- a/amid/__version__.py +++ b/amid/__version__.py @@ -1 +1 @@ -__version__ = '0.14.1' +__version__ = '0.15.0' diff --git a/amid/luna25.py b/amid/luna25.py index c7f07c25..3cf1810d 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -9,11 +9,6 @@ from .internals import Dataset, field, licenses, register -class NoduleBlock(NamedTuple): - image: np.ndarray - metadata: Dict - - class LUNA25Nodule(NamedTuple): coords: Sequence[float] lesion_id: int @@ -29,8 +24,8 @@ class LUNA25Nodule(NamedTuple): license=licenses.CC_BY_40, link='https://luna25.grand-challenge.org/', modality='CT', - prep_data_size=None, - raw_data_size=None, + prep_data_size='214G', + raw_data_size='205G', task='Lung nodule malignancy risk estimation', ) class LUNA25(Dataset): @@ -47,31 +42,31 @@ class LUNA25(Dataset): Parameters ---------- root : str, Path, optional - path to the folder containing the raw downloaded archives. + path to the folder containing `luna25_images` and `luna25_nodule_blocks` folders and + `LUNA25_Public_Training_Development_Data.csv` file obtained by the instruction at + https://luna25.grand-challenge.org/datasets/. If not provided, the cache is assumed to be already populated. + Notes + ----- + Join the challenge at https://luna25.grand-challenge.org/. + Then follow the download and extraction instructions at https://luna25.grand-challenge.org/datasets/. """ @property def ids(self): return [file.name[: -len('.mha')] for file in (self.root / 'luna25_images').iterdir()] - def _image(self, i): + def _sitk_image(self, i): return sitk.ReadImage(self.root / f'luna25_images/{i}.mha') @field def image(self, i): - return sitk.GetArrayFromImage(self._image(i)) + return sitk.GetArrayFromImage(self._sitk_image(i)) @field def spacing(self, i): - return self._image(i).GetSpacing()[::-1] - - def _image_origin(self, i): - return self._image(i).GetOrigin()[::-1] - - def _direction(self, i): - return self._image(i).GetDirection()[::-1] + return self._sitk_image(i).GetSpacing()[::-1] @cached_property def _data(self): @@ -101,21 +96,22 @@ def age(self, i): return self._data_column_value(i, 'Age_at_StudyDate') @field - def sex(self, i): + def gender(self, i): return self._data_column_value(i, 'Gender') @field def nodules(self, i): nodules = [] + sitk_image = self._sitk_image(i) + shape = self.image(i).shape + bbox_size = np.array([64, 128, 128]) # all nodule blocks in LUNA25 are of the same size for row in self._data_rows(i).itertuples(): - coords = np.array([row.CoordX, row.CoordY, row.CoordZ]) + coords = (row.CoordX, row.CoordY, row.CoordZ) + center_voxel = sitk_image.TransformPhysicalPointToIndex(map(int, coords))[::-1] + nodule_block_metadata = self.nodule_block_metadata(row.AnnotationID) - assert np.all(nodule_block_metadata['spacing'] == self.spacing(i)) - image_origin = self._image_origin(i) - direction = np.array(self._direction(i)[::4]) - center_voxel = ((coords[::-1] - image_origin) / self.spacing(i)) * direction - bbox_start_point = ((nodule_block_metadata['origin'] - image_origin) / self.spacing(i)) * direction - bbox = [bbox_start_point, np.minimum(bbox_start_point + np.array([64, 128, 128]), self.image(i).shape)] + bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_metadata['origin'][::-1]))[::-1] + bbox = np.array([bbox_start_point, np.minimum(bbox_start_point + bbox_size, shape)]) nodules.append( LUNA25Nodule( coords=coords, @@ -123,8 +119,8 @@ def nodules(self, i): annotation_id=str(row.AnnotationID), nodule_id=str(row.NoduleID), malignancy=row.label, - center_voxel=np.round(center_voxel).astype(int), - bbox=np.round(bbox).astype(int), + center_voxel=center_voxel, + bbox=bbox, ) ) return nodules From 236c6ae9709be80d974f27690c8f913302294171 Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Mon, 7 Apr 2025 17:47:33 +0300 Subject: [PATCH 11/12] lint --- amid/luna25.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/amid/luna25.py b/amid/luna25.py index 3cf1810d..e768864d 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -1,6 +1,6 @@ from datetime import datetime from functools import cached_property -from typing import Dict, NamedTuple, Sequence +from typing import NamedTuple, Sequence import numpy as np import pandas as pd @@ -42,14 +42,14 @@ class LUNA25(Dataset): Parameters ---------- root : str, Path, optional - path to the folder containing `luna25_images` and `luna25_nodule_blocks` folders and + path to the folder containing `luna25_images` and `luna25_nodule_blocks` folders and `LUNA25_Public_Training_Development_Data.csv` file obtained by the instruction at https://luna25.grand-challenge.org/datasets/. If not provided, the cache is assumed to be already populated. Notes ----- - Join the challenge at https://luna25.grand-challenge.org/. + Join the challenge at https://luna25.grand-challenge.org/. Then follow the download and extraction instructions at https://luna25.grand-challenge.org/datasets/. """ @@ -109,8 +109,8 @@ def nodules(self, i): coords = (row.CoordX, row.CoordY, row.CoordZ) center_voxel = sitk_image.TransformPhysicalPointToIndex(map(int, coords))[::-1] - nodule_block_metadata = self.nodule_block_metadata(row.AnnotationID) - bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_metadata['origin'][::-1]))[::-1] + nodule_block_origin = self.nodule_block_metadata(row.AnnotationID)['origin'][::-1] + bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_origin))[::-1] bbox = np.array([bbox_start_point, np.minimum(bbox_start_point + bbox_size, shape)]) nodules.append( LUNA25Nodule( From 45bcf6f6ffbf531586868b2b54f98a72dfc226e6 Mon Sep 17 00:00:00 2001 From: evgenyasoboleva Date: Wed, 9 Apr 2025 23:44:23 +0300 Subject: [PATCH 12/12] account for comment --- amid/luna25.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/amid/luna25.py b/amid/luna25.py index e768864d..b8942e1d 100644 --- a/amid/luna25.py +++ b/amid/luna25.py @@ -109,7 +109,7 @@ def nodules(self, i): coords = (row.CoordX, row.CoordY, row.CoordZ) center_voxel = sitk_image.TransformPhysicalPointToIndex(map(int, coords))[::-1] - nodule_block_origin = self.nodule_block_metadata(row.AnnotationID)['origin'][::-1] + nodule_block_origin = self.get_nodule_block_metadata(row.AnnotationID)['origin'][::-1] bbox_start_point = sitk_image.TransformPhysicalPointToIndex(map(int, nodule_block_origin))[::-1] bbox = np.array([bbox_start_point, np.minimum(bbox_start_point + bbox_size, shape)]) nodules.append( @@ -125,10 +125,10 @@ def nodules(self, i): ) return nodules - def nodule_block_image(self, annotation_id): + def get_nodule_block_image(self, annotation_id): return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy') - def nodule_block_metadata(self, annotation_id): + def get_nodule_block_metadata(self, annotation_id): metadata = np.load(self.root / f'luna25_nodule_blocks/metadata/{annotation_id}.npy', allow_pickle=True) assert metadata.shape == () return metadata.item()