Skip to content

Commit 517e548

Browse files
committed
it wasn't rendering correctly because of WHITESPACES
1 parent df4dfa7 commit 517e548

42 files changed

Lines changed: 303 additions & 110 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/source/FAQ.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ How to save/serialize a modular optimizer?
204204
============================================
205205
Please refer to pytorch docs https://pytorch.org/tutorials/beginner/saving_loading_models.html.
206206

207-
Like pytorch optimizers, torchzero modular optimizers and modules support :code:`opt.state_dict()` and :code:`opt.load_state_dict()`, which saves and loads state dicts of all modules, including nested ones.
207+
Like pytorch optimizers, torchzero modular optimizers support :code:`opt.state_dict()` and :code:`opt.load_state_dict()`, which saves and loads state dicts of all modules, including nested ones.
208208

209209
So you can use the standard code for saving and loading:
210210

torchzero/modules/clipping/clipping.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,11 @@ class ClipValue(Transform):
152152
target (str): refer to :ref:`target argument` in documentation.
153153
154154
Examples:
155+
155156
Gradient clipping:
157+
156158
.. code-block:: python
159+
157160
opt = tz.Modular(
158161
model.parameters(),
159162
tz.m.ClipValue(1),
@@ -162,13 +165,16 @@ class ClipValue(Transform):
162165
)
163166
164167
Update clipping:
168+
165169
.. code-block:: python
170+
166171
opt = tz.Modular(
167172
model.parameters(),
168173
tz.m.Adam(),
169174
tz.m.ClipValue(1),
170175
tz.m.LR(1e-2),
171176
)
177+
172178
"""
173179
def __init__(self, value: float, target: Target = 'update'):
174180
defaults = dict(value=value)
@@ -198,8 +204,11 @@ class ClipNorm(Transform):
198204
what this affects.
199205
200206
Examples:
207+
201208
Gradient norm clipping:
209+
202210
.. code-block:: python
211+
203212
opt = tz.Modular(
204213
model.parameters(),
205214
tz.m.ClipNorm(1),
@@ -208,7 +217,9 @@ class ClipNorm(Transform):
208217
)
209218
210219
Update norm clipping:
220+
211221
.. code-block:: python
222+
212223
opt = tz.Modular(
213224
model.parameters(),
214225
tz.m.Adam(),
@@ -263,8 +274,11 @@ class Normalize(Transform):
263274
what this affects.
264275
265276
Examples:
277+
266278
Gradient normalization:
279+
267280
.. code-block:: python
281+
268282
opt = tz.Modular(
269283
model.parameters(),
270284
tz.m.Normalize(1),
@@ -273,7 +287,9 @@ class Normalize(Transform):
273287
)
274288
275289
Update normalization:
290+
276291
.. code-block:: python
292+
277293
opt = tz.Modular(
278294
model.parameters(),
279295
tz.m.Adam(),
@@ -363,8 +379,11 @@ class Centralize(Transform):
363379
minimal size of a dimension to normalize along it. Defaults to 1.
364380
365381
Examples:
382+
366383
Standard gradient centralization:
384+
367385
.. code-block:: python
386+
368387
opt = tz.Modular(
369388
model.parameters(),
370389
tz.m.Centralize(dim=0),

torchzero/modules/experimental/absoap.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ def update_absoap_covariances_(
2424

2525
Source=Literal['p','g','s','y', 'gy', 'sy', 'sn', 'yn', 'gys', 'sys']
2626
class ABSOAP(Transform):
27-
"""SOAP but with some extra options for testing. Please note that this is experimental and isn't guaranteed to work.
27+
"""SOAP but with some extra options for testing.
28+
29+
.. warning::
30+
This module is just for testing my stupid ideas.
2831
2932
Args:
3033
scale_by_s - whether to scale y by s

torchzero/modules/experimental/adadam.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,11 @@ def adadam_(
5050
return None
5151

5252
class Adadam(Module):
53-
"""Adam with a diagonally preconditioned preconditioner. Please note that this is experimental and isn't guaranteed to work."""
53+
"""Adam with a diagonally preconditioned preconditioner.
54+
55+
.. warning::
56+
Experimental.
57+
"""
5458
def __init__(
5559
self,
5660
beta1: float = 0.9,

torchzero/modules/experimental/adamY.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,11 @@ def adamy_(
6262
return None
6363

6464
class AdamY(Module):
65-
"""Adam but uses scaled gradient differences for second momentum. Please note that this is experimental and isn't guaranteed to work."""
65+
"""Adam but uses scaled gradient differences for second momentum.
66+
67+
.. warning::
68+
Experimental.
69+
"""
6670
def __init__(
6771
self,
6872
beta1: float = 0.9,

torchzero/modules/experimental/adasoap.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ def update_adasoap_covariances_(
3333

3434

3535
class AdaSOAP(Transform):
36-
"""SOAP with diagonally preconditioned GG^Ts. Please note that this is experimental and isn't guaranteed to work.
36+
"""SOAP with diagonally preconditioned GG^Ts.
37+
38+
.. warning::
39+
Experimental.
3740
3841
precond_beta - beta for GG^T squares
3942
"""

torchzero/modules/experimental/eigendescent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ def _cosine_similarity(x, y):
2323

2424
class EigenDescent(Module):
2525
"""
26-
Uses eigenvectors corresponding to certain eigenvalues. Please note that this is experimental and isn't guaranteed to work.
26+
Uses eigenvectors corresponding to certain eigenvalues.
27+
28+
.. warning::
29+
Experimental.
2730
2831
Args:
2932
mode (str, optional):

torchzero/modules/experimental/etf.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88

99

1010
class ExponentialTrajectoryFit(Module):
11-
"""A method. Please note that this is experimental and isn't guaranteed to work."""
11+
"""A method.
12+
13+
.. warning::
14+
Experimental.
15+
"""
1216
def __init__(self, step_size=1e-3):
1317
defaults = dict(step_size = step_size)
1418
super().__init__(defaults)
@@ -67,7 +71,12 @@ def step(self, var):
6771

6872

6973
class ExponentialTrajectoryFitV2(Module):
70-
"""Should be better than one above, except it isn't. Please note that this is experimental and isn't guaranteed to work."""
74+
"""Should be better than one above, except it isn't.
75+
76+
.. warning::
77+
Experimental.
78+
79+
"""
7180
def __init__(self, step_size=1e-3, num_steps: int= 4):
7281
defaults = dict(step_size = step_size, num_steps=num_steps)
7382
super().__init__(defaults)
@@ -132,7 +141,11 @@ def _fit_exponential(y0, y1, y2):
132141
return A, B, r
133142

134143
class PointwiseExponential(Module):
135-
"""A stupid method (for my youtube channel). Please note that this is experimental and isn't guaranteed to work."""
144+
"""A stupid method (for my youtube channel).
145+
146+
.. warning::
147+
Experimental.
148+
"""
136149
def __init__(self, step_size: float = 1e-3, reg: float = 1e-2, steps = 10000):
137150
defaults = dict(reg=reg, steps=steps, step_size=step_size)
138151
super().__init__(defaults)

torchzero/modules/experimental/higher_order_adagrad.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020

2121
class HigherOrderAdagrad(Module):
2222
"""
23-
.. note::
24-
Conceptual.
23+
.. warning::
24+
Experimental.
2525
26-
.. note::
26+
.. warning::
2727
Extremely expensive.
2828
29-
.. note::
29+
.. warning::
3030
Doesn't work.
3131
"""
3232
def __init__(

torchzero/modules/experimental/reduce_outward_lr.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@
44
from ...utils import TensorList, unpack_states, unpack_dicts
55

66
class ReduceOutwardLR(Transform):
7-
"""
8-
When update sign matches weight sign, the learning rate for that weight is multiplied by `mul`.
7+
"""When update sign matches weight sign, the learning rate for that weight is multiplied by `mul`.
98
109
This means updates that move weights towards zero have higher learning rates.
1110
12-
.. note::
13-
this sounded good, but it sucks.
11+
.. warning::
12+
This sounded good but after testing turns out it sucks.
1413
"""
1514
def __init__(self, mul = 0.5, use_grad=False, invert=False, target: Target = 'update'):
1615
defaults = dict(mul=mul, use_grad=use_grad, invert=invert)

0 commit comments

Comments
 (0)