Merge pull request #1 from davidprueser/mpe

tomsch420 · web-flow · commit d5c5742eea23 · 2023-08-24T11:41:18.000+02:00
made return values from mpe and _mpe consistent
diff --git a/doc/source/mlflow_integration.rst b/doc/source/mlflow_integration.rst
@@ -4,7 +4,17 @@ MLFlow Integration
 JPTs can be integrated with MLFlow to manage the full model lifecycle.
 Among other things Hyperparameters and the model itself can be logged using a python wrapper.
 
-A full tutorial is be found here
+In order to integrate JPTs with MLFlow, MLFlow needs to be installed. This can be done by either installing the
+``pyjpt`` package with MLFlow being added as an extra requirement
+
+.. code:: bash
+
+    $ pip install pyjpt[mlflow]
+
+or by installing the MLFlow package manually.
+
+
+A full tutorial on the MLFlow integration can be found here
 
 .. toctree::
 
diff --git a/setup.py b/setup.py
@@ -114,4 +114,5 @@ def requirements():
     long_description=__description__,
     package_data={'jpt': ['.version']},
     include_package_data=True,
+    extras_require={'mlflow': ['mlflow >= 2.5.0']}
 )
diff --git a/src/jpt/distributions/univariate/integer.py b/src/jpt/distributions/univariate/integer.py
@@ -250,20 +250,19 @@ def _variance(self) -> float:
         e = self._expectation()
         return sum((v - e) ** 2 * p for v, p in zip(self.values.values(), self.probabilities))
 
-    def mpe(self) -> (float, Set[int]):
-        p_max, lbls = self._mpe()
-        return p_max, self.value2label(lbls)
+    def mpe(self) -> (Set[int], float):
+        lbls, p_max = self._mpe()
+        return self.value2label(lbls), p_max
 
-    def _mpe(self) -> (float, Set[int]):
+    def _mpe(self) -> (Set[int], float):
         p_max = max(self.probabilities)
         return (
-            p_max,
             {
                 l for l, p in zip(
                     self.values.values(),
                     self.probabilities
                 ) if p == p_max
-             }
+             }, p_max
         )
 
     mode = mpe
diff --git a/src/jpt/distributions/univariate/multinomial.py b/src/jpt/distributions/univariate/multinomial.py
@@ -279,7 +279,7 @@ def _sample_one(self) -> Symbol:
     @deprecated('Expectation is undefined in symbolic domains. Use Multinomial._mode() instead.')
     def _expectation(self) -> Set[int]:
         '''Returns the value with the highest probability for this variable'''
-        return self._mpe()[1]
+        return self._mpe()[0]
 
     @deprecated('Expectation is undefined in symbolic domains. Use Multinomial._mode() instead.')
     def expectation(self) -> Set[Symbol]:
@@ -291,25 +291,24 @@ def expectation(self) -> Set[Symbol]:
             self._expectation()
         )
 
-    def mpe(self) -> Tuple[float, Set[Symbol]]:
-        p_max, values = self._mpe()
-        return p_max, self.value2label(values)
+    def mpe(self) -> Tuple[Set[Symbol], float]:
+        values, p_max = self._mpe()
+        return self.value2label(values), p_max
 
-    def _mpe(self) -> Tuple[float, Set[int]]:
+    def _mpe(self) -> Tuple[Set[int], float]:
         """
         Calculate the most probable configuration of this distribution in value space.
 
-        :return: The likelihood of the mpe as float and the mpe itself as Set
+        :return: The likelihood of the mpe itself as Set and the likelihood of the mpe as float
         """
         _max = max(self.probabilities)
         return (
-            _max,
             set(
                 [label for label, p in zip(
                     self.values.values(),
                     self.probabilities
                 ) if p == _max]
-            )
+            ), _max
         )
 
     mode = mpe
diff --git a/src/jpt/distributions/univariate/numeric.py b/src/jpt/distributions/univariate/numeric.py
@@ -20,6 +20,7 @@
     from ...base.functions import __module__
 except ModuleNotFoundError:
     import pyximport
+
     pyximport.install()
 finally:
     from ...base.intervals import R, ContinuousSet, RealSet, NumberSet
@@ -112,10 +113,10 @@ def label2value(
     @classmethod
     def equiv(cls, other):
         return (
-            issubclass(other, Numeric) and
-            cls.__name__ == other.__name__ and
-            cls.values == other.values and
-            cls.labels == other.labels
+                issubclass(other, Numeric) and
+                cls.__name__ == other.__name__ and
+                cls.values == other.values and
+                cls.labels == other.labels
         )
 
     @property
@@ -175,18 +176,17 @@ def is_dirac_impulse(self) -> bool:
         """Checks if this distribution is a dirac impulse."""
         return len(self._quantile.cdf.intervals) == 2
 
-    def mpe(self) -> (float, RealSet):
+    def mpe(self) -> (RealSet, float):
         return self._mpe(self.value2label)
 
-    def _mpe(self, value_transform: Optional[Callable] = None) -> (float, NumberSet):
+    def _mpe(self, value_transform: Optional[Callable] = None) -> (NumberSet, float):
         """
         Calculate the most probable configuration of this quantile distribution.
-        :return: The likelihood of the mpe as float and the mpe itself as RealSet
+        :return: The mpe itself as RealSet and the likelihood of the mpe as float
         """
         value_transform = ifnone(value_transform, lambda _: _)
         _max = max(f.value for f in self.pdf.functions)
         return (
-            _max,
             value_transform(
                 RealSet(
                     [
@@ -196,7 +196,7 @@ def _mpe(self, value_transform: Optional[Callable] = None) -> (float, NumberSet)
                         if function.value == _max
                     ]
                 ).simplify()
-            )
+            ), _max
         )
 
     def _fit(
@@ -236,8 +236,8 @@ def _p(self, value: Union[numbers.Number, NumberSet]) -> numbers.Real:
         if probspace.isdisjoint(value):
             return 0
         probmass = (
-            (self.cdf.eval(value.upper) if value.upper != np.PINF else 1.) -
-            (self.cdf.eval(value.lower) if value.lower != np.NINF else 0.)
+                (self.cdf.eval(value.upper) if value.upper != np.PINF else 1.) -
+                (self.cdf.eval(value.lower) if value.lower != np.NINF else 0.)
         )
         if not probmass:
             return probspace in value
@@ -524,10 +524,10 @@ def _moment(
             # We have to "stretch" the pdf value over the interval in label space:
             function_value = function.value * interval.width / interval_.width
             result += (
-                (
-                    pow(interval_.upper - center, order + 1)
-                    - pow(interval_.lower - center, order + 1)
-                ) * function_value / (order + 1)
+                    (
+                            pow(interval_.upper - center, order + 1)
+                            - pow(interval_.lower - center, order + 1)
+                    ) * function_value / (order + 1)
             )
         return result
 
diff --git a/src/jpt/trees.py b/src/jpt/trees.py
@@ -616,7 +616,7 @@ def conditional_leaf(self, evidence: VariableAssignment) -> 'Leaf':
 
         return result
 
-    def mpe(self, minimal_distances: VariableMap) -> (float, VariableMap):
+    def mpe(self, minimal_distances: VariableMap) -> (VariableMap, float):
         """
         Calculate the most probable explanation of this leaf as a fully factorized distribution.
         :return: the likelihood of the maximum as a float and the configuration as a VariableMap
@@ -630,7 +630,7 @@ def mpe(self, minimal_distances: VariableMap) -> (float, VariableMap):
         for variable, distribution in self.distributions.items():
 
             # calculate mpe of that distribution
-            likelihood, explanation = distribution.mpe()
+            explanation, likelihood = distribution.mpe()
 
             # apply upper cap for infinities
             likelihood = minimal_distances[variable] if likelihood == float("inf") else likelihood
@@ -642,7 +642,7 @@ def mpe(self, minimal_distances: VariableMap) -> (float, VariableMap):
             maximum[variable] = explanation
 
         # create mpe result
-        return result_likelihood, LabelAssignment(maximum.items())
+        return LabelAssignment(maximum.items()), result_likelihood
 
     def number_of_parameters(self) -> int:
         """
@@ -1235,13 +1235,13 @@ def mpe(
         maxima = [leaf.mpe(self.minimal_distances) for leaf in conditional_jpt.leaves.values()]
 
         # get the maximum of those maxima
-        highest_likelihood = max([m[0] for m in maxima])
+        highest_likelihood = max([m[1] for m in maxima])
 
         # create a list for all possible maximal occurrences
         results = []
 
         # for every leaf and its mpe
-        for leaf, (likelihood, mpe) in zip(conditional_jpt.leaves.values(), maxima):
+        for leaf, (mpe, likelihood) in zip(conditional_jpt.leaves.values(), maxima):
 
             if likelihood == highest_likelihood:
                 # append the argmax to the results
diff --git a/test/test_distributions.py b/test/test_distributions.py
@@ -169,8 +169,8 @@ def test_mpe(self):
         result_uniform = abc.mpe()
 
         # Assert
-        self.assertEqual((1 / 2, {'A'}), result_unique)
-        self.assertEqual((1 / 3, {'A', 'B', 'C'}), result_uniform)
+        self.assertEqual(({'A'}, 1 / 2), result_unique)
+        self.assertEqual(({'A', 'B', 'C'}, 1 / 3), result_uniform)
 
     def test_expectation(self):
         # Arrange
@@ -713,7 +713,7 @@ def test_mpe(self):
         )
 
         # Act
-        likelihood, mpe_state = dist.mpe()
+        mpe_state, likelihood = dist.mpe()
 
         # Assert
         self.assertEqual(
@@ -884,10 +884,10 @@ def test_mpe(self):
         biased_dice.set([0 / 6, 1 / 6, 2 / 6, 1 / 6, 1 / 6, 1 / 6])
 
         # Act
-        p_fair, fair_mpe = fair_dice.mpe()
-        _p_fair, _fair_mpe = fair_dice._mpe()
-        p_biased, biased_mpe = biased_dice.mpe()
-        _p_biased, _biased_mpe = biased_dice._mpe()
+        fair_mpe, p_fair = fair_dice.mpe()
+        _fair_mpe, _p_fair = fair_dice._mpe()
+        biased_mpe, p_biased = biased_dice.mpe()
+        _biased_mpe, _p_biased = biased_dice._mpe()
 
         # Assert
         self.assertEqual(set(range(1, 7)), fair_mpe)

Original file line number	Diff line number	Diff line change
`@@ -114,4 +114,5 @@ def requirements():`
`114`	`114`	`long_description=__description__,`
`115`	`115`	`package_data={'jpt': ['.version']},`
`116`	`116`	`include_package_data=True,`
	`117`	`+ extras_require={'mlflow': ['mlflow >= 2.5.0']}`
`117`	`118`	`)`