Skip to content

API reference

High-level API

iteryne.MAML

Bases: Module

Model-agnostic meta-learning wrapper for any nn.Module.

Parameters:

Name Type Description Default
module Module

The model to meta-train. Its parameters are the meta-parameters theta optimized by the outer loop.

required
inner_lr float

Inner-loop step size alpha.

0.01
inner_steps int

Default number of inner gradient steps used by :meth:Learner.adapt_on and by :class:~iteryne.trainer.MetaTrainer.

1
first_order bool

If True, use the first-order approximation (FOMAML): drop second derivatives in the meta-gradient.

False
adapt_names Iterable[str] | None

Optional subset of parameter names to adapt in the inner loop. Defaults to all parameters. See :mod:iteryne.anil for adapting only a head.

None
allow_unused bool

Forwarded to the inner gradient computation.

True
Source code in src/iteryne/maml.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
class MAML(nn.Module):
    """Model-agnostic meta-learning wrapper for any ``nn.Module``.

    Parameters
    ----------
    module:
        The model to meta-train. Its parameters are the meta-parameters
        ``theta`` optimized by the outer loop.
    inner_lr:
        Inner-loop step size ``alpha``.
    inner_steps:
        Default number of inner gradient steps used by :meth:`Learner.adapt_on`
        and by :class:`~iteryne.trainer.MetaTrainer`.
    first_order:
        If ``True``, use the first-order approximation (FOMAML): drop second
        derivatives in the meta-gradient.
    adapt_names:
        Optional subset of parameter names to adapt in the inner loop. Defaults
        to all parameters. See :mod:`iteryne.anil` for adapting only a head.
    allow_unused:
        Forwarded to the inner gradient computation.
    """

    def __init__(
        self,
        module: nn.Module,
        *,
        inner_lr: float = 0.01,
        inner_steps: int = 1,
        first_order: bool = False,
        adapt_names: Iterable[str] | None = None,
        allow_unused: bool = True,
    ) -> None:
        super().__init__()
        self.module = module
        self.inner_lr = inner_lr
        self.inner_steps = inner_steps
        self.first_order = first_order
        self.adapt_names = tuple(adapt_names) if adapt_names is not None else None
        self.allow_unused = allow_unused

    def _inner_lr(self) -> LearningRate:
        """Return the learning rate passed to inner steps.

        Subclasses (e.g. Meta-SGD) override this to supply learnable, per-parameter
        rates.
        """
        return self.inner_lr

    def clone(self) -> Learner:
        """Create a fresh :class:`Learner` starting from the current ``theta``.

        The learner references the wrapped module's live parameter leaves, so a
        ``backward()`` through an adapted learner accumulates meta-gradients into
        ``self.parameters()``.
        """
        return Learner(
            self.module,
            named_params(self.module),
            named_buffers(self.module),
            lr=self._inner_lr(),
            inner_steps=self.inner_steps,
            first_order=self.first_order,
            adapt_names=self.adapt_names,
            allow_unused=self.allow_unused,
        )

    def forward(self, *args: Any, **kwargs: Any) -> Any:
        """Forward pass through the unadapted module (the meta-parameters)."""
        return self.module(*args, **kwargs)

clone()

Create a fresh :class:Learner starting from the current theta.

The learner references the wrapped module's live parameter leaves, so a backward() through an adapted learner accumulates meta-gradients into self.parameters().

Source code in src/iteryne/maml.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def clone(self) -> Learner:
    """Create a fresh :class:`Learner` starting from the current ``theta``.

    The learner references the wrapped module's live parameter leaves, so a
    ``backward()`` through an adapted learner accumulates meta-gradients into
    ``self.parameters()``.
    """
    return Learner(
        self.module,
        named_params(self.module),
        named_buffers(self.module),
        lr=self._inner_lr(),
        inner_steps=self.inner_steps,
        first_order=self.first_order,
        adapt_names=self.adapt_names,
        allow_unused=self.allow_unused,
    )

forward(*args, **kwargs)

Forward pass through the unadapted module (the meta-parameters).

Source code in src/iteryne/maml.py
188
189
190
def forward(self, *args: Any, **kwargs: Any) -> Any:
    """Forward pass through the unadapted module (the meta-parameters)."""
    return self.module(*args, **kwargs)

iteryne.Learner

A single adaptable copy of a model's parameters.

A Learner is created by :meth:MAML.clone. It starts from the wrapped module's live parameters and updates an internal parameter mapping with each :meth:adapt call. Calling the learner runs a stateless forward pass with the current (possibly adapted) parameters.

Source code in src/iteryne/maml.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class Learner:
    """A single adaptable copy of a model's parameters.

    A ``Learner`` is created by :meth:`MAML.clone`. It starts from the wrapped
    module's live parameters and updates an internal parameter mapping with each
    :meth:`adapt` call. Calling the learner runs a stateless forward pass with
    the current (possibly adapted) parameters.
    """

    def __init__(
        self,
        module: nn.Module,
        params: dict[str, Tensor],
        buffers: dict[str, Tensor],
        *,
        lr: LearningRate,
        inner_steps: int,
        first_order: bool,
        adapt_names: Iterable[str] | None,
        allow_unused: bool,
    ) -> None:
        self.module = module
        self.params = params
        self.buffers = buffers
        self.lr = lr
        self.inner_steps = inner_steps
        self.first_order = first_order
        self.adapt_names = tuple(adapt_names) if adapt_names is not None else None
        self.allow_unused = allow_unused

    def __call__(self, *args: Any, **kwargs: Any) -> Any:
        """Forward pass using the learner's current parameters."""
        return functional_forward(self.module, self.params, *args, buffers=self.buffers, **kwargs)

    forward = __call__

    def adapt(self, support_loss: Tensor) -> Learner:
        """Take one differentiable inner-loop step from ``support_loss``.

        Updates :attr:`params` in place (the learner) and returns ``self`` so
        calls can be chained.
        """
        self.params = inner_step(
            self.params,
            support_loss,
            self.lr,
            first_order=self.first_order,
            adapt_names=self.adapt_names,
            allow_unused=self.allow_unused,
        )
        return self

    def adapt_on(
        self,
        loss_fn: Callable[[Any, Any], Tensor],
        x: Any,
        y: Any,
        *,
        steps: int | None = None,
    ) -> Learner:
        """Run ``steps`` inner steps on a single ``(x, y)`` support batch.

        Convenience wrapper that recomputes the support loss at each step. If
        ``steps`` is ``None``, uses the learner's configured ``inner_steps``.
        """
        n = self.inner_steps if steps is None else steps
        self.params = adapt(
            self.module,
            self.params,
            lambda p: loss_fn(functional_forward(self.module, p, x, buffers=self.buffers), y),
            lr=self.lr,
            inner_steps=n,
            first_order=self.first_order,
            adapt_names=self.adapt_names,
            allow_unused=self.allow_unused,
        )
        return self

__call__(*args, **kwargs)

Forward pass using the learner's current parameters.

Source code in src/iteryne/maml.py
72
73
74
def __call__(self, *args: Any, **kwargs: Any) -> Any:
    """Forward pass using the learner's current parameters."""
    return functional_forward(self.module, self.params, *args, buffers=self.buffers, **kwargs)

adapt(support_loss)

Take one differentiable inner-loop step from support_loss.

Updates :attr:params in place (the learner) and returns self so calls can be chained.

Source code in src/iteryne/maml.py
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def adapt(self, support_loss: Tensor) -> Learner:
    """Take one differentiable inner-loop step from ``support_loss``.

    Updates :attr:`params` in place (the learner) and returns ``self`` so
    calls can be chained.
    """
    self.params = inner_step(
        self.params,
        support_loss,
        self.lr,
        first_order=self.first_order,
        adapt_names=self.adapt_names,
        allow_unused=self.allow_unused,
    )
    return self

adapt_on(loss_fn, x, y, *, steps=None)

Run steps inner steps on a single (x, y) support batch.

Convenience wrapper that recomputes the support loss at each step. If steps is None, uses the learner's configured inner_steps.

Source code in src/iteryne/maml.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def adapt_on(
    self,
    loss_fn: Callable[[Any, Any], Tensor],
    x: Any,
    y: Any,
    *,
    steps: int | None = None,
) -> Learner:
    """Run ``steps`` inner steps on a single ``(x, y)`` support batch.

    Convenience wrapper that recomputes the support loss at each step. If
    ``steps`` is ``None``, uses the learner's configured ``inner_steps``.
    """
    n = self.inner_steps if steps is None else steps
    self.params = adapt(
        self.module,
        self.params,
        lambda p: loss_fn(functional_forward(self.module, p, x, buffers=self.buffers), y),
        lr=self.lr,
        inner_steps=n,
        first_order=self.first_order,
        adapt_names=self.adapt_names,
        allow_unused=self.allow_unused,
    )
    return self

iteryne.MetaSGD

Bases: MAML

MAML variant with learnable, per-parameter inner learning rates.

Parameters:

Name Type Description Default
module Module

Model to meta-train.

required
inner_lr float

Initial value for every per-parameter learning rate.

0.01
inner_steps int

Number of inner gradient steps.

1
first_order bool

Defaults to False because the learning rates are usually learned through the second-order meta-gradient.

False
adapt_names Iterable[str] | None

Optional subset of parameter names to adapt (and to learn rates for).

None
allow_unused bool

Forwarded to the inner gradient computation.

True
Source code in src/iteryne/metasgd.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class MetaSGD(MAML):
    """MAML variant with learnable, per-parameter inner learning rates.

    Parameters
    ----------
    module:
        Model to meta-train.
    inner_lr:
        Initial value for every per-parameter learning rate.
    inner_steps:
        Number of inner gradient steps.
    first_order:
        Defaults to ``False`` because the learning rates are usually learned
        through the second-order meta-gradient.
    adapt_names:
        Optional subset of parameter names to adapt (and to learn rates for).
    allow_unused:
        Forwarded to the inner gradient computation.
    """

    def __init__(
        self,
        module: nn.Module,
        *,
        inner_lr: float = 0.01,
        inner_steps: int = 1,
        first_order: bool = False,
        adapt_names: Iterable[str] | None = None,
        allow_unused: bool = True,
    ) -> None:
        super().__init__(
            module,
            inner_lr=inner_lr,
            inner_steps=inner_steps,
            first_order=first_order,
            adapt_names=adapt_names,
            allow_unused=allow_unused,
        )
        wanted = set(self.adapt_names) if self.adapt_names is not None else None
        self.lrs = nn.ParameterDict()
        # Map sanitized key -> original parameter name, to rebuild the lr dict.
        self._lr_names: dict[str, str] = {}
        for name, param in module.named_parameters():
            if wanted is not None and name not in wanted:
                continue
            key = _sanitize(name)
            self.lrs[key] = nn.Parameter(torch.full_like(param, float(inner_lr)))
            self._lr_names[key] = name

    def _inner_lr(self) -> LearningRate:
        lr: dict[str, Tensor] = {}
        for key, original in self._lr_names.items():
            lr[original] = self.lrs[key]
        return lr

iteryne.ANIL

Bases: MAML

MAML that adapts only the parameters of a head submodule.

Parameters:

Name Type Description Default
module Module

The full model to meta-train.

required
head Module

A submodule of module whose parameters are adapted in the inner loop. The remaining (body) parameters are trained only by the meta-optimizer.

required
**kwargs object

Forwarded to :class:MAML (inner_lr, inner_steps, first_order, allow_unused).

{}
Source code in src/iteryne/anil.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class ANIL(MAML):
    """MAML that adapts only the parameters of a ``head`` submodule.

    Parameters
    ----------
    module:
        The full model to meta-train.
    head:
        A submodule of ``module`` whose parameters are adapted in the inner loop.
        The remaining (body) parameters are trained only by the meta-optimizer.
    **kwargs:
        Forwarded to :class:`MAML` (``inner_lr``, ``inner_steps``,
        ``first_order``, ``allow_unused``).
    """

    def __init__(self, module: nn.Module, head: nn.Module, **kwargs: object) -> None:
        super().__init__(module, adapt_names=head_names(module, head), **kwargs)  # type: ignore[arg-type]

iteryne.head_names(module, head)

Return the names (as seen from module) of head's parameters.

Parameters are matched by identity, so head must be a submodule of module (or otherwise share the same parameter tensors).

Source code in src/iteryne/anil.py
21
22
23
24
25
26
27
28
29
30
31
def head_names(module: nn.Module, head: nn.Module) -> list[str]:
    """Return the names (as seen from ``module``) of ``head``'s parameters.

    Parameters are matched by identity, so ``head`` must be a submodule of
    ``module`` (or otherwise share the same parameter tensors).
    """
    head_ids = {id(p) for p in head.parameters()}
    names = [n for n, p in module.named_parameters() if id(p) in head_ids]
    if not names:
        raise ValueError("`head` shares no parameters with `module`; ANIL has nothing to adapt.")
    return names

Training

iteryne.MetaTrainer

Drive MAML meta-training over batches of tasks.

Parameters:

Name Type Description Default
maml MAML

The MAML wrapper (or any subclass, e.g. MetaSGD/ANIL).

required
meta_optimizer Optimizer

Optimizer over maml.parameters() (the meta-step beta).

required
loss_fn LossFn

loss_fn(predictions, targets) -> scalar used for both the inner and the outer loss.

required
task_sampler TaskSampler

Source of training tasks.

required
Source code in src/iteryne/trainer.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class MetaTrainer:
    """Drive MAML meta-training over batches of tasks.

    Parameters
    ----------
    maml:
        The MAML wrapper (or any subclass, e.g. ``MetaSGD``/``ANIL``).
    meta_optimizer:
        Optimizer over ``maml.parameters()`` (the meta-step ``beta``).
    loss_fn:
        ``loss_fn(predictions, targets) -> scalar`` used for both the inner and
        the outer loss.
    task_sampler:
        Source of training tasks.
    """

    def __init__(
        self,
        maml: MAML,
        meta_optimizer: torch.optim.Optimizer,
        loss_fn: LossFn,
        task_sampler: TaskSampler,
    ) -> None:
        self.maml = maml
        self.meta_optimizer = meta_optimizer
        self.loss_fn = loss_fn
        self.task_sampler = task_sampler

    def meta_step(self, tasks: Sequence[Task]) -> float:
        """Run one meta-update over ``tasks`` and return the mean query loss."""
        self.meta_optimizer.zero_grad()
        total = 0.0
        for task in tasks:
            learner = self.maml.clone()
            learner.adapt_on(self.loss_fn, task.support_x, task.support_y)
            query_loss = self.loss_fn(learner(task.query_x), task.query_y)
            # Average over tasks so the meta-gradient matches the mean objective.
            (query_loss / len(tasks)).backward()  # type: ignore[no-untyped-call]
            total += float(query_loss.detach())
        self.meta_optimizer.step()
        return total / len(tasks)

    def fit(
        self,
        num_iterations: int,
        meta_batch_size: int,
        *,
        eval_every: int | None = None,
        eval_tasks: Sequence[Task] | None = None,
        callback: Callable[[int, float], None] | None = None,
    ) -> History:
        """Meta-train for ``num_iterations`` outer steps.

        Parameters
        ----------
        num_iterations:
            Number of meta-updates.
        meta_batch_size:
            Number of tasks sampled per meta-update.
        eval_every:
            If set, run :meth:`evaluate` on ``eval_tasks`` every ``eval_every``
            iterations and record the result.
        eval_tasks:
            Held-out tasks for evaluation (required if ``eval_every`` is set).
        callback:
            Optional ``callback(iteration, meta_loss)`` called after each step.

        Returns
        -------
        History
            Per-iteration meta-loss and any recorded evaluation losses.
        """
        history = History()
        for it in range(num_iterations):
            tasks = self.task_sampler.sample(meta_batch_size)
            loss = self.meta_step(tasks)
            history.meta_loss.append(loss)
            if callback is not None:
                callback(it, loss)
            if eval_every and (it + 1) % eval_every == 0:
                if eval_tasks is None:
                    raise ValueError("eval_every set but eval_tasks is None")
                history.eval_loss.append((it, self.evaluate(eval_tasks)))
        return history

    @torch.no_grad()
    def _query_loss_no_grad(self, learner: Any, task: Task) -> float:
        return float(self.loss_fn(learner(task.query_x), task.query_y).detach())

    def evaluate(self, tasks: Sequence[Task]) -> float:
        """Adapt to each task and return the mean post-adaptation query loss.

        Adaptation still requires gradients (the inner loop), but the reported
        query loss is detached and no meta-update is performed.
        """
        total = 0.0
        for task in tasks:
            learner = self.maml.clone()
            learner.adapt_on(self.loss_fn, task.support_x, task.support_y)
            total += self._query_loss_no_grad(learner, task)
        return total / len(tasks)

meta_step(tasks)

Run one meta-update over tasks and return the mean query loss.

Source code in src/iteryne/trainer.py
62
63
64
65
66
67
68
69
70
71
72
73
74
def meta_step(self, tasks: Sequence[Task]) -> float:
    """Run one meta-update over ``tasks`` and return the mean query loss."""
    self.meta_optimizer.zero_grad()
    total = 0.0
    for task in tasks:
        learner = self.maml.clone()
        learner.adapt_on(self.loss_fn, task.support_x, task.support_y)
        query_loss = self.loss_fn(learner(task.query_x), task.query_y)
        # Average over tasks so the meta-gradient matches the mean objective.
        (query_loss / len(tasks)).backward()  # type: ignore[no-untyped-call]
        total += float(query_loss.detach())
    self.meta_optimizer.step()
    return total / len(tasks)

fit(num_iterations, meta_batch_size, *, eval_every=None, eval_tasks=None, callback=None)

Meta-train for num_iterations outer steps.

Parameters:

Name Type Description Default
num_iterations int

Number of meta-updates.

required
meta_batch_size int

Number of tasks sampled per meta-update.

required
eval_every int | None

If set, run :meth:evaluate on eval_tasks every eval_every iterations and record the result.

None
eval_tasks Sequence[Task] | None

Held-out tasks for evaluation (required if eval_every is set).

None
callback Callable[[int, float], None] | None

Optional callback(iteration, meta_loss) called after each step.

None

Returns:

Type Description
History

Per-iteration meta-loss and any recorded evaluation losses.

Source code in src/iteryne/trainer.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def fit(
    self,
    num_iterations: int,
    meta_batch_size: int,
    *,
    eval_every: int | None = None,
    eval_tasks: Sequence[Task] | None = None,
    callback: Callable[[int, float], None] | None = None,
) -> History:
    """Meta-train for ``num_iterations`` outer steps.

    Parameters
    ----------
    num_iterations:
        Number of meta-updates.
    meta_batch_size:
        Number of tasks sampled per meta-update.
    eval_every:
        If set, run :meth:`evaluate` on ``eval_tasks`` every ``eval_every``
        iterations and record the result.
    eval_tasks:
        Held-out tasks for evaluation (required if ``eval_every`` is set).
    callback:
        Optional ``callback(iteration, meta_loss)`` called after each step.

    Returns
    -------
    History
        Per-iteration meta-loss and any recorded evaluation losses.
    """
    history = History()
    for it in range(num_iterations):
        tasks = self.task_sampler.sample(meta_batch_size)
        loss = self.meta_step(tasks)
        history.meta_loss.append(loss)
        if callback is not None:
            callback(it, loss)
        if eval_every and (it + 1) % eval_every == 0:
            if eval_tasks is None:
                raise ValueError("eval_every set but eval_tasks is None")
            history.eval_loss.append((it, self.evaluate(eval_tasks)))
    return history

evaluate(tasks)

Adapt to each task and return the mean post-adaptation query loss.

Adaptation still requires gradients (the inner loop), but the reported query loss is detached and no meta-update is performed.

Source code in src/iteryne/trainer.py
123
124
125
126
127
128
129
130
131
132
133
134
def evaluate(self, tasks: Sequence[Task]) -> float:
    """Adapt to each task and return the mean post-adaptation query loss.

    Adaptation still requires gradients (the inner loop), but the reported
    query loss is detached and no meta-update is performed.
    """
    total = 0.0
    for task in tasks:
        learner = self.maml.clone()
        learner.adapt_on(self.loss_fn, task.support_x, task.support_y)
        total += self._query_loss_no_grad(learner, task)
    return total / len(tasks)

iteryne.History dataclass

Records produced by :meth:MetaTrainer.fit.

Source code in src/iteryne/trainer.py
26
27
28
29
30
31
@dataclass
class History:
    """Records produced by :meth:`MetaTrainer.fit`."""

    meta_loss: list[float] = field(default_factory=list)
    eval_loss: list[tuple[int, float]] = field(default_factory=list)

Tasks and data

iteryne.Task dataclass

A single few-shot task: a support split and a query split.

Attributes:

Name Type Description
support_x, support_y

Inputs and targets used for inner-loop adaptation (K shots).

query_x, query_y

Held-out inputs and targets used to evaluate the adapted model and form the meta-objective.

Source code in src/iteryne/tasks.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
@dataclass(frozen=True)
class Task:
    """A single few-shot task: a support split and a query split.

    Attributes
    ----------
    support_x, support_y:
        Inputs and targets used for inner-loop adaptation (``K`` shots).
    query_x, query_y:
        Held-out inputs and targets used to evaluate the adapted model and form
        the meta-objective.
    """

    support_x: Tensor
    support_y: Tensor
    query_x: Tensor
    query_y: Tensor

iteryne.TaskSampler

Bases: Protocol

Protocol for objects that sample a batch of :class:Task from p(T).

Source code in src/iteryne/tasks.py
41
42
43
44
45
46
47
@runtime_checkable
class TaskSampler(Protocol):
    """Protocol for objects that sample a batch of :class:`Task` from ``p(T)``."""

    def sample(self, meta_batch_size: int) -> list[Task]:
        """Return ``meta_batch_size`` freshly sampled tasks."""
        ...

sample(meta_batch_size)

Return meta_batch_size freshly sampled tasks.

Source code in src/iteryne/tasks.py
45
46
47
def sample(self, meta_batch_size: int) -> list[Task]:
    """Return ``meta_batch_size`` freshly sampled tasks."""
    ...

iteryne.SinusoidTaskSampler

Sample few-shot sinusoid regression tasks.

Parameters:

Name Type Description Default
amp_range tuple[float, float]

Range (low, high) for the amplitude A.

(0.1, 5.0)
phase_range tuple[float, float]

Range (low, high) for the phase p.

(0.0, pi)
x_range tuple[float, float]

Range (low, high) from which inputs are drawn uniformly.

(-5.0, 5.0)
k_support int

Number of support points per task (K).

10
k_query int

Number of query points per task.

10
generator Generator | None

Optional :class:torch.Generator for reproducibility. If None a new one is created; pass seed to seed it.

None
seed int | None

Convenience seed used to build a generator when generator is None.

None
device device | str | None

Tensor placement/precision for sampled data.

None
dtype device | str | None

Tensor placement/precision for sampled data.

None
Source code in src/iteryne/datasets/sinusoid.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class SinusoidTaskSampler:
    """Sample few-shot sinusoid regression tasks.

    Parameters
    ----------
    amp_range:
        Range ``(low, high)`` for the amplitude ``A``.
    phase_range:
        Range ``(low, high)`` for the phase ``p``.
    x_range:
        Range ``(low, high)`` from which inputs are drawn uniformly.
    k_support:
        Number of support points per task (``K``).
    k_query:
        Number of query points per task.
    generator:
        Optional :class:`torch.Generator` for reproducibility. If ``None`` a new
        one is created; pass ``seed`` to seed it.
    seed:
        Convenience seed used to build a generator when ``generator`` is ``None``.
    device, dtype:
        Tensor placement/precision for sampled data.
    """

    def __init__(
        self,
        amp_range: tuple[float, float] = (0.1, 5.0),
        phase_range: tuple[float, float] = (0.0, math.pi),
        x_range: tuple[float, float] = (-5.0, 5.0),
        k_support: int = 10,
        k_query: int = 10,
        *,
        generator: torch.Generator | None = None,
        seed: int | None = None,
        device: torch.device | str | None = None,
        dtype: torch.dtype = torch.float32,
    ) -> None:
        self.amp_range = amp_range
        self.phase_range = phase_range
        self.x_range = x_range
        self.k_support = k_support
        self.k_query = k_query
        self.device = torch.device(device) if device is not None else None
        self.dtype = dtype
        if generator is None:
            generator = torch.Generator()
            if seed is not None:
                generator.manual_seed(seed)
        self.generator = generator

    def _uniform(self, n: int, low: float, high: float) -> torch.Tensor:
        u = torch.rand(n, 1, generator=self.generator, dtype=self.dtype)
        out = low + (high - low) * u
        if self.device is not None:
            out = out.to(self.device)
        return out

    def _task(self) -> Task:
        amp = float(
            torch.empty(1, dtype=self.dtype)
            .uniform_(*self.amp_range, generator=self.generator)
            .item()
        )
        phase = float(
            torch.empty(1, dtype=self.dtype)
            .uniform_(*self.phase_range, generator=self.generator)
            .item()
        )
        n = self.k_support + self.k_query
        x = self._uniform(n, *self.x_range)
        y = amp * torch.sin(x + phase)
        return Task(
            support_x=x[: self.k_support],
            support_y=y[: self.k_support],
            query_x=x[self.k_support :],
            query_y=y[self.k_support :],
        )

    def sample(self, meta_batch_size: int) -> list[Task]:
        """Return ``meta_batch_size`` independently sampled sinusoid tasks."""
        return [self._task() for _ in range(meta_batch_size)]

sample(meta_batch_size)

Return meta_batch_size independently sampled sinusoid tasks.

Source code in src/iteryne/datasets/sinusoid.py
 98
 99
100
def sample(self, meta_batch_size: int) -> list[Task]:
    """Return ``meta_batch_size`` independently sampled sinusoid tasks."""
    return [self._task() for _ in range(meta_batch_size)]

Functional core

iteryne.functional_forward(model, params, *args, buffers=None, **kwargs)

Run model statelessly using the given params (and buffers).

Parameters:

Name Type Description Default
model Module

Any nn.Module. Its own parameters are ignored; params are used.

required
params Mapping[str, Tensor]

Mapping of parameter name to tensor, as returned by :func:named_params or :func:adapt.

required
*args Any

Forwarded to the module's forward.

()
**kwargs Any

Forwarded to the module's forward.

()
buffers Mapping[str, Tensor] | None

Optional mapping of buffer name to tensor. If None, the module's own buffers are used.

None

Returns:

Type Description
Any

Whatever model.forward returns.

Source code in src/iteryne/functional.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def functional_forward(
    model: nn.Module,
    params: Mapping[str, Tensor],
    *args: Any,
    buffers: Mapping[str, Tensor] | None = None,
    **kwargs: Any,
) -> Any:
    """Run ``model`` statelessly using the given ``params`` (and ``buffers``).

    Parameters
    ----------
    model:
        Any ``nn.Module``. Its own parameters are ignored; ``params`` are used.
    params:
        Mapping of parameter name to tensor, as returned by :func:`named_params`
        or :func:`adapt`.
    *args, **kwargs:
        Forwarded to the module's ``forward``.
    buffers:
        Optional mapping of buffer name to tensor. If ``None``, the module's own
        buffers are used.

    Returns
    -------
    Any
        Whatever ``model.forward`` returns.
    """
    state: dict[str, Tensor] = {**params}
    if buffers is not None:
        state.update(buffers)
    return functional_call(model, state, args, kwargs)

iteryne.inner_step(params, loss, lr, *, first_order=False, adapt_names=None, allow_unused=True)

Take one differentiable gradient-descent step on params.

Computes grads = d loss / d params and returns the updated mapping p' = p - lr * grad for every adapted parameter.

Parameters:

Name Type Description Default
params Mapping[str, Tensor]

Current parameter mapping (the point at which loss was computed).

required
loss Tensor

Scalar support loss, differentiable w.r.t. params.

required
lr LearningRate

Inner-loop step size alpha. Either a float or a per-parameter mapping of tensors (Meta-SGD).

required
first_order bool

If True, drop second derivatives (FOMAML) by not creating the graph of the gradient term. If False, keep the graph for full MAML.

False
adapt_names Iterable[str] | None

Optional subset of parameter names to update in the inner loop. Names not in this set are passed through unchanged (used by ANIL to adapt only the head). If None, every parameter is adapted.

None
allow_unused bool

Passed to :func:torch.autograd.grad. Parameters that do not affect the loss receive a None gradient and are passed through unchanged.

True

Returns:

Type Description
dict[str, Tensor]

The adapted parameter mapping.

Source code in src/iteryne/functional.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def inner_step(
    params: Mapping[str, Tensor],
    loss: Tensor,
    lr: LearningRate,
    *,
    first_order: bool = False,
    adapt_names: Iterable[str] | None = None,
    allow_unused: bool = True,
) -> dict[str, Tensor]:
    """Take one differentiable gradient-descent step on ``params``.

    Computes ``grads = d loss / d params`` and returns the updated mapping
    ``p' = p - lr * grad`` for every adapted parameter.

    Parameters
    ----------
    params:
        Current parameter mapping (the point at which ``loss`` was computed).
    loss:
        Scalar support loss, differentiable w.r.t. ``params``.
    lr:
        Inner-loop step size ``alpha``. Either a float or a per-parameter mapping
        of tensors (Meta-SGD).
    first_order:
        If ``True``, drop second derivatives (FOMAML) by not creating the graph
        of the gradient term. If ``False``, keep the graph for full MAML.
    adapt_names:
        Optional subset of parameter names to update in the inner loop. Names not
        in this set are passed through unchanged (used by ANIL to adapt only the
        head). If ``None``, every parameter is adapted.
    allow_unused:
        Passed to :func:`torch.autograd.grad`. Parameters that do not affect the
        loss receive a ``None`` gradient and are passed through unchanged.

    Returns
    -------
    dict[str, Tensor]
        The adapted parameter mapping.
    """
    names = list(params.keys())
    if adapt_names is None:
        differentiable_names = names
    else:
        wanted = set(adapt_names)
        differentiable_names = [n for n in names if n in wanted]

    if not differentiable_names:
        raise ValueError("inner_step received no parameters to adapt; check `adapt_names`.")

    grads = torch.autograd.grad(
        loss,
        [params[n] for n in differentiable_names],
        create_graph=not first_order,
        allow_unused=allow_unused,
    )
    grad_by_name = dict(zip(differentiable_names, grads, strict=True))

    updated: dict[str, Tensor] = {}
    for name in names:
        param = params[name]
        grad = grad_by_name.get(name)
        if grad is None:
            updated[name] = param
        else:
            updated[name] = param - _resolve_lr(lr, name) * grad
    return updated

iteryne.adapt(model, params, build_loss, *, lr=0.01, inner_steps=1, first_order=False, adapt_names=None, allow_unused=True)

Run the full inner loop and return the adapted parameters theta'.

Parameters:

Name Type Description Default
model Module

The module whose architecture defines the forward pass.

required
params Mapping[str, Tensor]

Starting parameter mapping theta (typically :func:named_params).

required
build_loss Callable[[dict[str, Tensor]], Tensor]

Callable mapping a parameter dict to the scalar support loss. Keeping the loss as a closure (rather than (output, target)) keeps this core fully task- and loss-agnostic; the closure decides how to call model and which data to use. Example::

build_loss = lambda p: mse(functional_forward(model, p, x), y)
required
lr LearningRate

Inner step size alpha (float or per-parameter mapping).

0.01
inner_steps int

Number of gradient steps in the inner loop (>= 1).

1
first_order bool

See :func:inner_step.

False
adapt_names bool

See :func:inner_step.

False
allow_unused bool

See :func:inner_step.

False

Returns:

Type Description
dict[str, Tensor]

The adapted parameter mapping after inner_steps updates.

Source code in src/iteryne/functional.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def adapt(
    model: nn.Module,
    params: Mapping[str, Tensor],
    build_loss: Callable[[dict[str, Tensor]], Tensor],
    *,
    lr: LearningRate = 0.01,
    inner_steps: int = 1,
    first_order: bool = False,
    adapt_names: Iterable[str] | None = None,
    allow_unused: bool = True,
) -> dict[str, Tensor]:
    """Run the full inner loop and return the adapted parameters ``theta'``.

    Parameters
    ----------
    model:
        The module whose architecture defines the forward pass.
    params:
        Starting parameter mapping ``theta`` (typically :func:`named_params`).
    build_loss:
        Callable mapping a parameter dict to the scalar support loss. Keeping the
        loss as a closure (rather than ``(output, target)``) keeps this core fully
        task- and loss-agnostic; the closure decides how to call ``model`` and
        which data to use. Example::

            build_loss = lambda p: mse(functional_forward(model, p, x), y)

    lr:
        Inner step size ``alpha`` (float or per-parameter mapping).
    inner_steps:
        Number of gradient steps in the inner loop (``>= 1``).
    first_order, adapt_names, allow_unused:
        See :func:`inner_step`.

    Returns
    -------
    dict[str, Tensor]
        The adapted parameter mapping after ``inner_steps`` updates.
    """
    if inner_steps < 1:
        raise ValueError(f"inner_steps must be >= 1, got {inner_steps}")

    adapted: dict[str, Tensor] = dict(params)
    names = tuple(adapt_names) if adapt_names is not None else None
    for _ in range(inner_steps):
        loss = build_loss(adapted)
        adapted = inner_step(
            adapted,
            loss,
            lr,
            first_order=first_order,
            adapt_names=names,
            allow_unused=allow_unused,
        )
    return adapted

iteryne.named_params(model)

Return a fresh {name: parameter} dict for model.

The tensors are the model's live :class:~torch.nn.Parameter leaves, so gradients flowing back into them accumulate into model.parameters().grad.

Source code in src/iteryne/functional.py
47
48
49
50
51
52
53
def named_params(model: nn.Module) -> dict[str, Tensor]:
    """Return a fresh ``{name: parameter}`` dict for ``model``.

    The tensors are the model's live :class:`~torch.nn.Parameter` leaves, so
    gradients flowing back into them accumulate into ``model.parameters().grad``.
    """
    return dict(model.named_parameters())

iteryne.named_buffers(model)

Return a fresh {name: buffer} dict for model (e.g. BatchNorm stats).

Source code in src/iteryne/functional.py
56
57
58
def named_buffers(model: nn.Module) -> dict[str, Tensor]:
    """Return a fresh ``{name: buffer}`` dict for ``model`` (e.g. BatchNorm stats)."""
    return dict(model.named_buffers())