Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 2 additions & 30 deletions paconvert/api_mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -4198,27 +4198,7 @@
"Matcher": "ChangePrefixMatcher"
},
"torch.cuda.amp.GradScaler": {
"Matcher": "GenericMatcher",
"paddle_api": "paddle.amp.GradScaler",
"args_list": [
"init_scale",
"growth_factor",
"backoff_factor",
"growth_interval",
"enabled"
],
"kwargs_change": {
"init_scale": "init_loss_scaling",
"growth_factor": "incr_ratio",
"backoff_factor": "decr_ratio",
"growth_interval": "incr_every_n_steps",
"enabled": "enable"
},
"paddle_default_kwargs": {
"incr_every_n_steps": 2000,
"init_loss_scaling": 65536.0
},
"min_input_args": 0
"Matcher": "ChangePrefixMatcher"
},
"torch.cuda.amp.autocast": {
"Matcher": "ChangePrefixMatcher"
Expand Down Expand Up @@ -7220,15 +7200,7 @@
"Matcher": "ChangePrefixMatcher"
},
"torch.mm": {
"Matcher": "GenericMatcher",
"paddle_api": "paddle.mm",
"min_input_args": 2,
"args_list": [
"input",
"mat2",
"*",
"out"
]
"Matcher": "ChangePrefixMatcher"
},
"torch.mode": {
"Matcher": "DoubleAssignMatcher",
Expand Down
212 changes: 212 additions & 0 deletions tests/test_cuda_amp_GradScaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,215 @@ def test_case_2():
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_3():
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(growth_factor=3.0)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
with torch.cuda.amp.autocast():
loss = torch.mean(x*x).to('cpu')
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_4():
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(backoff_factor=0.25)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
with torch.cuda.amp.autocast():
loss = torch.mean(x*x).to('cpu')
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_5():
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(init_scale=1024, growth_factor=3.0, backoff_factor=0.25, growth_interval=500, enabled=True)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
with torch.cuda.amp.autocast():
loss = torch.mean(x*x).to('cpu')
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_6():
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(growth_interval=500, init_scale=32768, backoff_factor=0.25, growth_factor=3.0)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
with torch.cuda.amp.autocast():
loss = torch.mean(x*x).to('cpu')
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_7():
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(enabled=False)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
loss = torch.mean(x*x)
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_8():
"""Test with init_scale keyword argument only"""
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(init_scale=512)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
with torch.cuda.amp.autocast():
loss = torch.mean(x*x).to('cpu')
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_9():
"""Test variable kwargs dict unpacking"""
pytorch_code = textwrap.dedent(
"""
import torch
kwargs = {'init_scale': 1024, 'growth_factor': 3.0, 'backoff_factor': 0.25, 'growth_interval': 500}
scaler = torch.cuda.amp.GradScaler(**kwargs)
x = torch.tensor([[[-1.3020, -0.1005, 0.5766, 0.6351, -0.8893, 0.0253, -0.1756, 1.2913],
[-0.8833, -0.1369, -0.0168, -0.5409, -0.1511, -0.1240, -1.1870, -1.8816]]])
with torch.cuda.amp.autocast():
loss = torch.mean(x*x).to('cpu')
scaled = scaler.scale(loss).cpu()
"""
)
obj.run(pytorch_code, ["scaled"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_10():
"""Test get_scale() method - verify initial scale value"""
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(init_scale=2048)
result = torch.tensor([scaler.get_scale()])
"""
)
obj.run(pytorch_code, ["result"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_11():
"""Test is_enabled() method with enabled=True"""
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(enabled=True)
result = torch.tensor([float(scaler.is_enabled())])
"""
)
obj.run(pytorch_code, ["result"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_12():
"""Test is_enabled() method with enabled=False"""
pytorch_code = textwrap.dedent(
"""
import torch
scaler = torch.cuda.amp.GradScaler(enabled=False)
result = torch.tensor([float(scaler.is_enabled())])
"""
)
obj.run(pytorch_code, ["result"])


@pytest.mark.skipif(
condition=not paddle.device.is_compiled_with_cuda(),
reason="can only run on paddle with CUDA",
)
def test_case_13():
"""Test full training loop: scale, backward, step, update, get_scale"""
pytorch_code = textwrap.dedent(
"""
import torch
import torch.nn as nn
model = nn.Linear(3, 1, bias=False).cuda()
with torch.no_grad():
model.weight.fill_(0.1)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
scaler = torch.cuda.amp.GradScaler(init_scale=512, growth_interval=1000)
x = torch.tensor([[1.0, 2.0, 3.0]], device='cuda')
with torch.cuda.amp.autocast():
output = model(x)
loss = output.sum()
optimizer.zero_grad()
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
result = torch.tensor([scaler.get_scale()])
"""
)
obj.run(pytorch_code, ["result"])
101 changes: 101 additions & 0 deletions tests/test_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,104 @@ def test_case_6():
"""
)
obj.run(pytorch_code, ["result"])


def test_case_7():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2., 3.], [4., 5., 6.]])
b = torch.tensor([[1., 2.], [3., 4.], [5., 6.]])
result = torch.mm(a, b)
"""
)
obj.run(pytorch_code, ["result"])


def test_case_8():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2., 3.], [4., 5., 6.]])
b = torch.tensor([[1., 2.], [3., 4.], [5., 6.]])
result = torch.mm(input=a, mat2=b)
"""
)
obj.run(pytorch_code, ["result"])


def test_case_9():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2.], [3., 4.], [5., 6.]])
b = torch.tensor([[1., 2., 3., 4.], [5., 6., 7., 8.]])
result = torch.mm(a, b)
"""
)
obj.run(pytorch_code, ["result"])


def test_case_10():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2.], [4., 5.]], dtype=torch.float64)
b = torch.tensor([[1., 3.], [3., 6.]], dtype=torch.float64)
result = torch.mm(a, b)
"""
)
obj.run(pytorch_code, ["result"])


def test_case_11():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])
b = torch.tensor([[9., 8., 7.], [6., 5., 4.], [3., 2., 1.]])
result = torch.mm(a, b)
"""
)
obj.run(pytorch_code, ["result"])


def test_case_12():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2.], [4., 5.]])
b = torch.tensor([[1., 3.], [3., 6.]])
args = (a, b)
result = torch.mm(*args)
"""
)
obj.run(pytorch_code, ["result"])


def test_case_13():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2.], [4., 5.]], requires_grad=True)
b = torch.tensor([[1., 3.], [3., 6.]], requires_grad=True)
result = torch.mm(a, b)
"""
)
obj.run(pytorch_code, ["result"], check_stop_gradient=False)


# Paddle leaf tensor .grad returns None in exec() context, framework mechanism difference
def _test_case_14():
pytorch_code = textwrap.dedent(
"""
import torch
a = torch.tensor([[1., 2.], [4., 5.]], requires_grad=True)
b = torch.tensor([[1., 3.], [3., 6.]], requires_grad=True)
result = torch.mm(a, b)
result.sum().backward()
a_grad = a.grad
b_grad = b.grad
"""
)
obj.run(pytorch_code, ["result", "a_grad", "b_grad"], check_stop_gradient=False)