Fix torchscript related test failures. (#4069)

pearu · web-flow · commit 3fc07b542703 · 2025-12-12T17:26:54.000+02:00
diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh
@@ -29,5 +29,5 @@ fi
     export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true
     export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true
     cd test
-    pytest torchaudio_unittest -k "not torchscript and not fairseq and not demucs ${PYTEST_K_EXTRA}" 
+    pytest torchaudio_unittest -k "not fairseq and not demucs ${PYTEST_K_EXTRA}"
 )
diff --git a/.github/scripts/unittest-windows/run_test.sh b/.github/scripts/unittest-windows/run_test.sh
@@ -13,8 +13,8 @@ env | grep TORCHAUDIO || true
 
 cd test
 if [ -z "${CUDA_VERSION:-}" ] ; then
-    pytest --continue-on-collection-errors --cov=torchaudio --junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml -v --durations 20 torchaudio_unittest -k "not torchscript and not fairseq and not demucs and not librosa"
+    pytest --continue-on-collection-errors --cov=torchaudio --junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml -v --durations 20 torchaudio_unittest -k "not fairseq and not demucs and not librosa"
 else
-    pytest --continue-on-collection-errors --cov=torchaudio --junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml -v --durations 20 torchaudio_unittest -k "not cpu and (cuda or gpu) and not torchscript and not fairseq and not demucs and not librosa"
+    pytest --continue-on-collection-errors --cov=torchaudio --junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml -v --durations 20 torchaudio_unittest -k "not cpu and (cuda or gpu) and not fairseq and not demucs and not librosa"
 fi
 coverage html
diff --git a/src/torchaudio/functional/filtering.py b/src/torchaudio/functional/filtering.py
@@ -946,7 +946,8 @@ def forward(ctx, waveform, b_coeffs):
         b_coeff_flipped = b_coeffs.flip(1).contiguous()
         padded_waveform = F.pad(waveform, (n_order - 1, 0))
         output = F.conv1d(padded_waveform, b_coeff_flipped.unsqueeze(1), groups=n_channel)
-        ctx.save_for_backward(waveform, b_coeffs, output)
+        if not torch.jit.is_scripting():
+            ctx.save_for_backward(waveform, b_coeffs, output)
         return output
 
     @staticmethod
@@ -955,6 +956,7 @@ def backward(ctx, dy):
         n_batch = x.size(0)
         n_channel = x.size(1)
         n_order = b_coeffs.size(1)
+
         db = (
             F.conv1d(
                 F.pad(x, (n_order - 1, 0)).view(1, n_batch * n_channel, -1),
@@ -970,6 +972,13 @@ def backward(ctx, dy):
         dx = F.conv1d(F.pad(dy, (0, n_order - 1)), b_coeffs.unsqueeze(1), groups=n_channel) if x.requires_grad else None
         return (dx, db)
 
+    @staticmethod
+    def ts_apply(waveform, b_coeffs):
+        if torch.jit.is_scripting():
+            return DifferentiableFIR.forward(torch.empty(0), waveform, b_coeffs)
+        else:
+            return DifferentiableFIR.apply(waveform, b_coeffs)
+
 
 class DifferentiableIIR(torch.autograd.Function):
     @staticmethod
@@ -984,7 +993,8 @@ def forward(ctx, waveform, a_coeffs_normalized):
         )
         _lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform)
         output = padded_output_waveform[:, :, n_order - 1 :]
-        ctx.save_for_backward(waveform, a_coeffs_normalized, output)
+        if not torch.jit.is_scripting():
+            ctx.save_for_backward(waveform, a_coeffs_normalized, output)
         return output
 
     @staticmethod
@@ -1006,10 +1016,17 @@ def backward(ctx, dy):
         )
         return (dx, da)
 
+    @staticmethod
+    def ts_apply(waveform, a_coeffs_normalized):
+        if torch.jit.is_scripting():
+            return DifferentiableIIR.forward(torch.empty(0), waveform, a_coeffs_normalized)
+        else:
+            return DifferentiableIIR.apply(waveform, a_coeffs_normalized)
+
 
 def _lfilter(waveform, a_coeffs, b_coeffs):
-    filtered_waveform = DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1])
-    return DifferentiableIIR.apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1])
+    filtered_waveform = DifferentiableFIR.ts_apply(waveform, b_coeffs / a_coeffs[:, 0:1])
+    return DifferentiableIIR.ts_apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1])
 
 
 def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = True, batching: bool = True) -> Tensor:
diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py
@@ -847,7 +847,8 @@ def mask_along_axis_iid(
 
     if axis not in [dim - 2, dim - 1]:
         raise ValueError(
-            f"Only Frequency and Time masking are supported (axis {dim-2} and axis {dim-1} supported; {axis} given)."
+            "Only Frequency and Time masking are supported"
+            f" (axis {dim - 2} and axis {dim - 1} supported; {axis} given)."
         )
 
     if not 0.0 <= p <= 1.0:
@@ -919,7 +920,8 @@ def mask_along_axis(
 
     if axis not in [dim - 2, dim - 1]:
         raise ValueError(
-            f"Only Frequency and Time masking are supported (axis {dim-2} and axis {dim-1} supported; {axis} given)."
+            "Only Frequency and Time masking are supported"
+            f" (axis {dim - 2} and axis {dim - 1} supported; {axis} given)."
         )
 
     if not 0.0 <= p <= 1.0:
@@ -1731,6 +1733,16 @@ def backward(ctx, dy):
         result = grad * grad_out
         return (result, None, None, None, None, None, None, None)
 
+    @staticmethod
+    def ts_apply(logits, targets, logit_lengths, target_lengths, blank: int, clamp: float, fused_log_softmax: bool):
+        if torch.jit.is_scripting():
+            output, saved = torch.ops.torchaudio.rnnt_loss_forward(
+                logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax
+            )
+            return output
+        else:
+            return RnntLoss.apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax)
+
 
 def rnnt_loss(
     logits: Tensor,
@@ -1774,7 +1786,7 @@ def rnnt_loss(
     if blank < 0:  # reinterpret blank index if blank < 0.
         blank = logits.shape[-1] + blank
 
-    costs = RnntLoss.apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax)
+    costs = RnntLoss.ts_apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax)
 
     if reduction == "mean":
         return costs.mean()
diff --git a/src/torchaudio/transforms/_transforms.py b/src/torchaudio/transforms/_transforms.py
@@ -1202,7 +1202,8 @@ def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0
                 specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p
             )
         else:
-            return F.mask_along_axis(specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p)
+            mask_value_ = float(mask_value) if isinstance(mask_value, Tensor) else mask_value
+            return F.mask_along_axis(specgram, self.mask_param, mask_value_, self.axis + specgram.dim() - 3, p=self.p)
 
 
 class FrequencyMasking(_AxisMasking):

Original file line number	Diff line number	Diff line change
`@@ -29,5 +29,5 @@ fi`
`29`	`29`	`export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true`
`30`	`30`	`export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true`
`31`	`31`	`cd test`
`32`		`- pytest torchaudio_unittest -k "not torchscript and not fairseq and not demucs ${PYTEST_K_EXTRA}"`
	`32`	`+ pytest torchaudio_unittest -k "not fairseq and not demucs ${PYTEST_K_EXTRA}"`
`33`	`33`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1202,7 +1202,8 @@ def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0`
`1202`	`1202`	`specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p`
`1203`	`1203`	`)`
`1204`	`1204`	`else:`
`1205`		`- return F.mask_along_axis(specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p)`
	`1205`	`+ mask_value_ = float(mask_value) if isinstance(mask_value, Tensor) else mask_value`
	`1206`	`+ return F.mask_along_axis(specgram, self.mask_param, mask_value_, self.axis + specgram.dim() - 3, p=self.p)`
`1206`	`1207`
`1207`	`1208`
`1208`	`1209`	`class FrequencyMasking(_AxisMasking):`