Mipmap¶
我们通过一个例子来入门使用 Slang 完成训练的过程。
在这个示例中,我们的目标是获得质量更高的 mipmap。传统的 mipmap 通常通过对区域进行简单平均来生成,这种方式虽然快速,但在处理 法线贴图(normal map) 等非线性数据时会导致明显的质量损失。为了解决这一问题,我们采用了一种 可微分神经渲染 的方法,通过训练让低分辨率贴图在渲染后的效果尽可能逼近原始高分辨率结果,从而得到视觉质量更优的 mipmap。
整个代码结构如下
# 创建窗口
app = App(width=3092, height=1024, title="Mipmap Example")
# 导入 slang 模块
module = spy.Module.load_from_file(app.device, "step_05_train.slang")
data_path = Path(__file__).parent
# 导入贴图
albedo_map = spy.Tensor.load_from_image(
app.device, data_path.joinpath("PavingStones070_2K.diffuse.jpg"), linearize=True
)
normal_map = ...
roughness_map = ...
# 下采样函数
def downsample(source: spy.Tensor, steps: int) -> spy.Tensor:
...
# 对之前的贴图进行下采样
lr_albedo_map = downsample(albedo_map, 2)
lr_normal_map = ...
lr_roughness_map = ...
# 创建要训练的贴图,并进行初始化
lr_trained_albedo_map = spy.Tensor.zeros_like(lr_albedo_map)
lr_trained_normal_map = spy.Tensor.zeros_like(lr_normal_map)
lr_trained_roughness_map = spy.Tensor.zeros_like(lr_roughness_map)
module.init3(lr_trained_albedo_map, spy.float3(0.5, 0.5, 0.5))
module.init_normal(lr_trained_normal_map)
module.init1(lr_trained_roughness_map, 0.5)
# 相应的梯度与统计数据
lr_albedo_grad = spy.Tensor.zeros_like(lr_albedo_map)
lr_normal_grad = spy.Tensor.zeros_like(lr_normal_map)
lr_roughness_grad = spy.Tensor.zeros_like(lr_roughness_map)
# 一阶矩
m_albedo = spy.Tensor.zeros_like(lr_albedo_grad)
#二阶矩
v_albedo = spy.Tensor.zeros_like(lr_albedo_grad)
m_normal = spy.Tensor.zeros_like(lr_normal_grad)
v_normal = spy.Tensor.zeros_like(lr_normal_grad)
m_roughness = spy.Tensor.zeros_like(lr_roughness_grad)
v_roughness = spy.Tensor.zeros_like(lr_roughness_grad)
def getRandomDir():
r = math.sqrt(np.random.rand())
phi = np.random.rand() * math.pi * 2
Lx = r * math.sin(phi)
Ly = r * math.cos(phi)
Lz = math.sqrt(max(1 - r**2, 0))
return spy.float3(Lx, Ly, Lz)
optimize_counter = 0
while app.process_events():
light_dir = spy.math.normalize(spy.float3(0.2, 0.2, 1.0))
xpos = 0
bilinear_output = True
# Full res rendered output BRDF from full res inputs.
# 利用导入的模块高分辨率的渲染
output = spy.Tensor.empty_like(albedo_map)
module.render(
pixel=spy.call_id(),
material={
"albedo": albedo_map,
"normal": normal_map,
"roughness": roughness_map,
},
light_dir=light_dir,
view_dir=spy.float3(0, 0, 1),
_result=output,
)
# Downsample the output tensor.
# 对高分辨率结果下采样,视为理想结果
output = downsample(output, 2)
# Blit tensor to screen.
app.blit(output, size=spy.int2(1024, 1024), offset=spy.int2(xpos, 0), bilinear=bilinear_output)
xpos += 1024 + 10
# Same but using trained normal map res rendered output BRDF from quarter res inputs.
# 这是要训练的部分
lr_output = spy.Tensor.empty_like(output)
module.render(
pixel=spy.call_id(),
material={
"albedo": lr_trained_albedo_map,
"normal": lr_trained_normal_map,
"roughness": lr_trained_roughness_map,
},
light_dir=light_dir,
view_dir=spy.float3(0, 0, 1),
_result=lr_output,
)
# Blit tensor to screen.
app.blit(
lr_output, size=spy.int2(1024, 1024), offset=spy.int2(xpos, 0), bilinear=bilinear_output
)
xpos += 1024 + 10
# Loss between downsampled output and quarter res rendered output.
# 原先使用常规 mipmap 得到的结果,用来做比较
orig_loss_output = spy.Tensor.empty_like(output)
module.loss(
pixel=spy.call_id(),
material={
"albedo": lr_albedo_map,
"normal": lr_normal_map,
"roughness": lr_roughness_map,
},
reference=output,
light_dir=light_dir,
view_dir=spy.float3(0, 0, 1),
_result=orig_loss_output,
)
# Loss between downsampled output and quarter res rendered output.
# 计算要训练的图和理想的差距
loss_output = spy.Tensor.empty_like(output)
module.loss(
pixel=spy.call_id(),
material={
"albedo": lr_trained_albedo_map,
"normal": lr_trained_normal_map,
"roughness": lr_trained_roughness_map,
},
reference=output,
light_dir=light_dir,
view_dir=spy.float3(0, 0, 1),
_result=loss_output,
)
# Blit tensor to screen.
app.blit(
loss_output, size=spy.int2(1024, 1024), offset=spy.int2(xpos, 0), tonemap=bilinear_output
)
xpos += 1024 + 10
# Extra credit: Start with a fast learning rate and slowly ramp down
training_progress_percentage = min(optimize_counter / 3000, 1.0)
learning_rate = (
0.002 * (1.0 - training_progress_percentage) + 0.0002 * training_progress_percentage
)
# Loss between downsampled output and quarter res rendered output.
# NOTE: iterations can be turned up to 1000 if you want to see results quicker!
# 这里的意思就是每次刷新进行多少次训练
for i in range(50):
module.calculate_grads(
seed=spy.wang_hash(seed=optimize_counter, warmup=2),
pixel=spy.grid(shape=lr_albedo_map.shape),
material={
"albedo": lr_trained_albedo_map,
"normal": lr_trained_normal_map,
"roughness": lr_trained_roughness_map,
"albedo_grad": lr_albedo_grad,
"normal_grad": lr_normal_grad,
"roughness_grad": lr_roughness_grad,
},
ref_material={
"albedo": albedo_map,
"normal": normal_map,
"roughness": roughness_map,
},
)
optimize_counter += 1
# Optimize the trained maps using the gradients.
module.optimizer_step3(
lr_trained_albedo_map,
lr_albedo_grad,
m_albedo,
v_albedo,
learning_rate,
optimize_counter,
False,
)
module.optimizer_step3(
lr_trained_normal_map,
lr_normal_grad,
m_normal,
v_normal,
learning_rate,
optimize_counter,
True,
)
module.optimizer_step1(
lr_trained_roughness_map,
lr_roughness_grad,
m_roughness,
v_roughness,
learning_rate,
optimize_counter,
)
# read loss output to numpy tensor and sum abs values
orig_loss_np = orig_loss_output.to_numpy()
orig_loss_value = np.mean(orig_loss_np)
loss_np = loss_output.to_numpy()
loss_value = np.mean(loss_np)
print(f"Loss: {loss_value:.6f}, Original Loss: {orig_loss_value:.6f}")
# Present the window.
app.present()
上述这个训练过程还是比较好理解的,然后我们再看看 Slang 模块中我们是如何实现一系列如 render optimizer calculate_grads 之类的函数的
为什么要把将 render、optimizer、calculate_grads 等函数放在 Slang 中编写?
在 render 函数中我们可以让 GPU 大规模同时处理成千上万个像素,从而提升速度。在 Slang 中我们只需要标记 [Differentiable] ,该函数的导数计算代码
现在我们看一下 MaterialParameters 是怎么写的
struct MaterialParameters
{
RWTensor<float3, 2> albedo;
RWTensor<float3, 2> normal;
RWTensor<float, 2> roughness;
RWTensor<float3, 2> albedo_grad;
RWTensor<float3, 2> normal_grad;
RWTensor<float, 2> roughness_grad;
[Differentiable]
float3 get_albedo(int2 pixel)
{
return albedo.getv(pixel);
}
[BackwardDerivativeOf(get_albedo)]
void get_albedo_bwd(int2 pixel, float3 grad)
{
albedo_grad.setv(pixel, grad);
}
[Differentiable]
float3 get_normal(int2 pixel)
{
return normal.getv(pixel);
}
[BackwardDerivativeOf(get_normal)]
void get_normal_bwd(int2 pixel, float3 grad)
{
normal_grad.setv(pixel, grad);
}
[Differentiable]
float get_roughness(int2 pixel)
{
return roughness.getv(pixel);
}
[BackwardDerivativeOf(get_roughness)]
void get_roughness_bwd(int2 pixel, float grad)
{
roughness_grad.setv(pixel, grad);
}
};
数据存储结构
RWTensor<float3, 2> albedo; // 反照率贴图 (RGB颜色)
RWTensor<float3, 2> normal; // 法线贴图 (3D向量)
RWTensor<float, 2> roughness; // 粗糙度贴图 (标量)
- 使用
RWTensor类型存储二维纹理数据 - 支持读写操作 (
RW前缀) - 分别存储三种材质属性及其对应的梯度信息
每个材质属性都有对应的前向和反向函数:
前向函数(获取值):
反向函数(存储梯度):
[BackwardDerivativeOf(get_albedo)]
void get_albedo_bwd(int2 pixel, float3 grad) {
albedo_grad.setv(pixel, grad); // 将计算得到的梯度存储到梯度缓冲区
}
backward 的典型结构
对 forward:
Slang 自动生成:
grad_output= 上游传进来的∂L/∂(f(x))- 你需要根据链式法则计算
∂L/∂x,并写入grad_x
如果你自己写:
我们这里手动实现 get_albedo_bwd 完成梯度的存储
分析一下 calculate_grads 函数
[Differentiable]
float3 loss(int2 pixel, no_diff float3 reference, no_diff MaterialParameters material, no_diff float3 light_dir, no_diff float3 view_dir)
{
float3 color = render(pixel, material,
light_dir, view_dir);
float3 error = color - reference;
return error * error; // Squared error
}
void calculate_grads(uint seed, int2 pixel, MaterialParameters material, MaterialParameters ref_material)
{
LCG lcg = LCG(seed);
// 计算参考颜色
// Sample light and view direction from a hemisphere.
float3 light_dir = lcg.next_dir_on_hemisphere();
float3 view_dir = lcg.next_dir_on_hemisphere();
// Index of high res pixel we'll sample.
int2 hi_res_pixel = pixel * 4;
// Render and average 16 samples from the high resolution material.
float3 sum = 0;
for(int x0 = 0; x0 < 4; x0++)
{
for(int y0 = 0; y0 < 4; y0++)
{
int2 hi_res_pixel_offset = hi_res_pixel + int2(x0, y0);
sum += render(hi_res_pixel_offset, ref_material, light_dir, view_dir);
}
}
sum /= 16.0f;
// 计算梯度
// Back-propagate through the loss function.
// results in get_albedo_bwd, get_normal_bwd, get_roughness_bwd being
// called with the gradients, which are then stored in buffers.
bwd_diff(loss)(pixel, sum, material, light_dir, view_dir, 1);
}
利用 bwd_diff(loss) 我们可以生成相应的计算梯度的反向函数,输入的参数是 loss 函数的参数以及一个上游梯度
为什么需要这样一个上游梯度
| 需求 | seed 示例 | 说明 |
|---|---|---|
| 只对输出多通道的某一维求导 | (1,0,0) |
选择性反向传播 |
| 缩放 loss,例如 loss=2*f(x) | 2 |
对标量输出求方向导数 |
| 对多通道函数的向量-Jacobian 乘法 | 任意 vector | 常用于自定义优化 |
| 处理图像梯度(NxM×3) | mask 或权重矩阵 | 常用于渲染器/MLP |
bwd_diff 的基本流程是:接收上游梯度 → 根据当前操作的局部导数计算局部梯度 → 将局部梯度与上游梯度相乘 → 输出传递给前一层。
