diff --git a/README.md b/README.md index f044c82..09e19ee 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,52 @@ CUDA Denoiser For CUDA Path Tracer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Dayu Li +* Tested on: Windows 10, i7-10700K @ 3.80GHz 16GB, GTX 2070 8150MB (Personal laptop) -### (TODO: Your README) +## Features +* A pathtracing denoiser that uses geometry buffers (G-buffers) based on A-trous wavelet filter -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +## Effects +* Tested with 8 iterations and color-phi = 0.45, normal phi = 0.35, postion phi = 0.2. +| Raw | Denoised | +| :----: | :----: | +|![](./img/1.png)|![](./img/2.png)| + +### Performance Analysis +Since the A-Trous filter is applied only when the iterations are completed, the denoiser's performance is based on the image for the last frame thus has nothing todo with the iteration times. When the image size is 800 * 800, the time cost of applying a denoiser with 5 filter layers is about 5.88 ms. + +#### Number of Levels of the Filter +Improving the filter layers will definitely cause a incrase of computation time. However, when the layer is greater than 5, imprving the number of layers will NOT change the effect of denoising apparently. Thus the filter layer number 4 or 5 is considered a proper value. The cost of time is linearly increasing with the number of layers which makes sense since the layers will affect the iteration times for a single for loop. + +Effects and time with differnt filter layers (phi values are 0.45,0.35,0.2). + +| Filter Layers |Time | Effect | +| :----: | :----: | :----:| +| 1 | 2.14ms |![](./img/layer1.png)| +| 3 | 4.65ms |![](./img/layer3.png)| +| 5 | 5.37ms |![](./img/layer5.png)| +| 10 | 11.66ms |![](./img/layer10.png)| + +### Change the phi values +Improving the phi values given in the A-Trous paper will lead to different weight values when the denoiser is applied, thus lead to a total different output image, however the change of the phi values will not change the time coplexsity of the denoiser thus makes no change in the time costs. + +* Tested with 8 iterations, changing the specified phi value to 3.0 + +| Status |Effect | +| :----: | :----:| +| Large Color | ![](./img/color.png)| +| Large Normal | ![](./img/normal.png)| +| Large Postion | ![](./img/pos.png)| + +### Change the Image size +Change the size of image will change the pixel numbers and thus lead to an increase of executing time. + +* Tested with 10 iteartions with 5 filter layers + +|Image Size| Time | +|---|---| +| 800 * 800 | 5.37ms| +| 1600 * 1600 | 26.35ms| +| 3200 * 3200 | 53.37ms| diff --git a/img/1.png b/img/1.png new file mode 100644 index 0000000..7459a8f Binary files /dev/null and b/img/1.png differ diff --git a/img/2.png b/img/2.png new file mode 100644 index 0000000..6485000 Binary files /dev/null and b/img/2.png differ diff --git a/img/color.png b/img/color.png new file mode 100644 index 0000000..c83d7f7 Binary files /dev/null and b/img/color.png differ diff --git a/img/layer1.png b/img/layer1.png new file mode 100644 index 0000000..f07690f Binary files /dev/null and b/img/layer1.png differ diff --git a/img/layer10.png b/img/layer10.png new file mode 100644 index 0000000..096781f Binary files /dev/null and b/img/layer10.png differ diff --git a/img/layer3.png b/img/layer3.png new file mode 100644 index 0000000..f975096 Binary files /dev/null and b/img/layer3.png differ diff --git a/img/layer5.png b/img/layer5.png new file mode 100644 index 0000000..1d33c7f Binary files /dev/null and b/img/layer5.png differ diff --git a/img/normal.png b/img/normal.png new file mode 100644 index 0000000..6ece29b Binary files /dev/null and b/img/normal.png differ diff --git a/img/pos.png b/img/pos.png new file mode 100644 index 0000000..7075a77 Binary files /dev/null and b/img/pos.png differ diff --git a/src/main.cpp b/src/main.cpp index 4092ae4..429fc6d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,7 +1,7 @@ #include "main.h" #include "preview.h" #include - +#include #include "../imgui/imgui.h" #include "../imgui/imgui_impl_glfw.h" #include "../imgui/imgui_impl_opengl3.h" @@ -24,11 +24,13 @@ int startupIterations = 0; int lastLoopIterations = 0; bool ui_showGbuffer = false; bool ui_denoise = false; -int ui_filterSize = 80; -float ui_colorWeight = 0.45f; -float ui_normalWeight = 0.35f; -float ui_positionWeight = 0.2f; +bool ui_denoise_changed = true; +int ui_filterLayers = 5; +float ui_color_phi = 0.45f; +float ui_normal_phi = 0.35f; +float ui_position_phi = 0.2f; bool ui_saveAndExit = false; +float avgerageTime = 0; static bool camchanged = true; static float dtheta = 0, dphi = 0; @@ -50,193 +52,217 @@ int height; //------------------------------- int main(int argc, char** argv) { - startTimeString = currentTimeString(); + startTimeString = currentTimeString(); - if (argc < 2) { - printf("Usage: %s SCENEFILE.txt\n", argv[0]); - return 1; - } + if (argc < 2) { + printf("Usage: %s SCENEFILE.txt\n", argv[0]); + return 1; + } - const char *sceneFile = argv[1]; + const char *sceneFile = argv[1]; - // Load scene file - scene = new Scene(sceneFile); + // Load scene file + scene = new Scene(sceneFile); - // Set up camera stuff from loaded path tracer settings - iteration = 0; - renderState = &scene->state; - Camera &cam = renderState->camera; - width = cam.resolution.x; - height = cam.resolution.y; + // Set up camera stuff from loaded path tracer settings + iteration = 0; + renderState = &scene->state; + Camera &cam = renderState->camera; + width = cam.resolution.x; + height = cam.resolution.y; - ui_iterations = renderState->iterations; - startupIterations = ui_iterations; + ui_iterations = renderState->iterations; + startupIterations = ui_iterations; - glm::vec3 view = cam.view; - glm::vec3 up = cam.up; - glm::vec3 right = glm::cross(view, up); - up = glm::cross(right, view); + glm::vec3 view = cam.view; + glm::vec3 up = cam.up; + glm::vec3 right = glm::cross(view, up); + up = glm::cross(right, view); - cameraPosition = cam.position; + cameraPosition = cam.position; - // compute phi (horizontal) and theta (vertical) relative 3D axis - // so, (0 0 1) is forward, (0 1 0) is up - glm::vec3 viewXZ = glm::vec3(view.x, 0.0f, view.z); - glm::vec3 viewZY = glm::vec3(0.0f, view.y, view.z); - phi = glm::acos(glm::dot(glm::normalize(viewXZ), glm::vec3(0, 0, -1))); - theta = glm::acos(glm::dot(glm::normalize(viewZY), glm::vec3(0, 1, 0))); - ogLookAt = cam.lookAt; - zoom = glm::length(cam.position - ogLookAt); + // compute phi (horizontal) and theta (vertical) relative 3D axis + // so, (0 0 1) is forward, (0 1 0) is up + glm::vec3 viewXZ = glm::vec3(view.x, 0.0f, view.z); + glm::vec3 viewZY = glm::vec3(0.0f, view.y, view.z); + phi = glm::acos(glm::dot(glm::normalize(viewXZ), glm::vec3(0, 0, -1))); + theta = glm::acos(glm::dot(glm::normalize(viewZY), glm::vec3(0, 1, 0))); + ogLookAt = cam.lookAt; + zoom = glm::length(cam.position - ogLookAt); - // Initialize CUDA and GL components - init(); + // Initialize CUDA and GL components + init(); - // GLFW main loop - mainLoop(); + // GLFW main loop + mainLoop(); - return 0; + return 0; } void saveImage() { - float samples = iteration; - // output image file - image img(width, height); - - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - int index = x + (y * width); - glm::vec3 pix = renderState->image[index]; - img.setPixel(width - 1 - x, y, glm::vec3(pix) / samples); - } - } - - std::string filename = renderState->imageName; - std::ostringstream ss; - ss << filename << "." << startTimeString << "." << samples << "samp"; - filename = ss.str(); - - // CHECKITOUT - img.savePNG(filename); - //img.saveHDR(filename); // Save a Radiance HDR file + float samples = ui_denoise? 1 : iteration; + // output image file + image img(width, height); + + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + int index = x + (y * width); + glm::vec3 pix = renderState->image[index]; + img.setPixel(width - 1 - x, y, glm::vec3(pix) / samples); + } + } + + std::string filename = renderState->imageName; + std::ostringstream ss; + ss << filename << "." << startTimeString << "." << samples << "samp"; + filename = ss.str(); + + // CHECKITOUT + img.savePNG(filename); + //img.saveHDR(filename); // Save a Radiance HDR file } void runCuda() { - if (lastLoopIterations != ui_iterations) { - lastLoopIterations = ui_iterations; - camchanged = true; - } - - if (camchanged) { - iteration = 0; - Camera &cam = renderState->camera; - cameraPosition.x = zoom * sin(phi) * sin(theta); - cameraPosition.y = zoom * cos(theta); - cameraPosition.z = zoom * cos(phi) * sin(theta); - - cam.view = -glm::normalize(cameraPosition); - glm::vec3 v = cam.view; - glm::vec3 u = glm::vec3(0, 1, 0);//glm::normalize(cam.up); - glm::vec3 r = glm::cross(v, u); - cam.up = glm::cross(r, v); - cam.right = r; - - cam.position = cameraPosition; - cameraPosition += cam.lookAt; - cam.position = cameraPosition; - camchanged = false; - } - - // Map OpenGL buffer object for writing from CUDA on a single GPU - // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - - if (iteration == 0) { - pathtraceFree(); - pathtraceInit(scene); - } - - uchar4 *pbo_dptr = NULL; - cudaGLMapBufferObject((void**)&pbo_dptr, pbo); - - if (iteration < ui_iterations) { - iteration++; - - // execute the kernel - int frame = 0; - pathtrace(frame, iteration); - } - - if (ui_showGbuffer) { - showGBuffer(pbo_dptr); - } else { - showImage(pbo_dptr, iteration); - } - - // unmap buffer object - cudaGLUnmapBufferObject(pbo); - - if (ui_saveAndExit) { - saveImage(); - pathtraceFree(); - cudaDeviceReset(); - exit(EXIT_SUCCESS); - } + if (lastLoopIterations != ui_iterations) { + lastLoopIterations = ui_iterations; + camchanged = true; + } + + if (camchanged) { + iteration = 0; + Camera &cam = renderState->camera; + cameraPosition.x = zoom * sin(phi) * sin(theta); + cameraPosition.y = zoom * cos(theta); + cameraPosition.z = zoom * cos(phi) * sin(theta); + + cam.view = -glm::normalize(cameraPosition); + glm::vec3 v = cam.view; + glm::vec3 u = glm::vec3(0, 1, 0);//glm::normalize(cam.up); + glm::vec3 r = glm::cross(v, u); + cam.up = glm::cross(r, v); + cam.right = r; + + cam.position = cameraPosition; + cameraPosition += cam.lookAt; + cam.position = cameraPosition; + camchanged = false; + ui_denoise_changed = true; + } + + // Map OpenGL buffer object for writing from CUDA on a single GPU + // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer + + if (iteration == 0) { + pathtraceFree(); + pathtraceInit(scene); + } + + uchar4 *pbo_dptr = NULL; + cudaGLMapBufferObject((void**)&pbo_dptr, pbo); + + if (iteration < ui_iterations) { + iteration++; + + // execute the kernel + int frame = 0; + std::chrono::high_resolution_clock::time_point timer_start = std::chrono::high_resolution_clock::now(); + pathtrace(frame, iteration); + std::chrono::high_resolution_clock::time_point timer_end = std::chrono::high_resolution_clock::now(); + std::chrono::duration period = timer_end - timer_start; + float prev_cpu_time = static_cast(period.count()); + avgerageTime = (avgerageTime * (iteration - 1) + prev_cpu_time) / (iteration); + cout << "Iterations:" << iteration << ", Time: " << prev_cpu_time << ", Average Time" << avgerageTime << endl; + + } + else if (iteration == ui_iterations) + { + if (ui_denoise) { + std::chrono::high_resolution_clock::time_point timer_start = std::chrono::high_resolution_clock::now(); + denoise(iteration); + std::chrono::high_resolution_clock::time_point timer_end = std::chrono::high_resolution_clock::now(); + std::chrono::duration period = timer_end - timer_start; + float prev_cpu_time = static_cast(period.count()); + cout << "Denoising cost:" << prev_cpu_time << endl; + ui_denoise_changed = false; + } + } + + if (ui_showGbuffer) { + showGBuffer(pbo_dptr); + } + else if (iteration == ui_iterations && ui_denoise) { + showDenoisedImage(pbo_dptr); + } + else { + showImage(pbo_dptr, iteration); + } + + // unmap buffer object + cudaGLUnmapBufferObject(pbo); + + if (ui_saveAndExit) { + saveImage(); + pathtraceFree(); + cudaDeviceReset(); + exit(EXIT_SUCCESS); + } } void keyCallback(GLFWwindow* window, int key, int scancode, int action, int mods) { - if (action == GLFW_PRESS) { - switch (key) { - case GLFW_KEY_ESCAPE: - saveImage(); - glfwSetWindowShouldClose(window, GL_TRUE); - break; - case GLFW_KEY_S: - saveImage(); - break; - case GLFW_KEY_SPACE: - camchanged = true; - renderState = &scene->state; - Camera &cam = renderState->camera; - cam.lookAt = ogLookAt; - break; - } - } + if (action == GLFW_PRESS) { + switch (key) { + case GLFW_KEY_ESCAPE: + saveImage(); + glfwSetWindowShouldClose(window, GL_TRUE); + break; + case GLFW_KEY_S: + saveImage(); + break; + case GLFW_KEY_SPACE: + camchanged = true; + renderState = &scene->state; + Camera &cam = renderState->camera; + cam.lookAt = ogLookAt; + break; + } + } } void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods) { - if (ImGui::GetIO().WantCaptureMouse) return; - leftMousePressed = (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS); - rightMousePressed = (button == GLFW_MOUSE_BUTTON_RIGHT && action == GLFW_PRESS); - middleMousePressed = (button == GLFW_MOUSE_BUTTON_MIDDLE && action == GLFW_PRESS); + if (ImGui::GetIO().WantCaptureMouse) return; + leftMousePressed = (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS); + rightMousePressed = (button == GLFW_MOUSE_BUTTON_RIGHT && action == GLFW_PRESS); + middleMousePressed = (button == GLFW_MOUSE_BUTTON_MIDDLE && action == GLFW_PRESS); } void mousePositionCallback(GLFWwindow* window, double xpos, double ypos) { - if (xpos == lastX || ypos == lastY) return; // otherwise, clicking back into window causes re-start - if (leftMousePressed) { - // compute new camera parameters - phi -= (xpos - lastX) / width; - theta -= (ypos - lastY) / height; - theta = std::fmax(0.001f, std::fmin(theta, PI)); - camchanged = true; - } - else if (rightMousePressed) { - zoom += (ypos - lastY) / height; - zoom = std::fmax(0.1f, zoom); - camchanged = true; - } - else if (middleMousePressed) { - renderState = &scene->state; - Camera &cam = renderState->camera; - glm::vec3 forward = cam.view; - forward.y = 0.0f; - forward = glm::normalize(forward); - glm::vec3 right = cam.right; - right.y = 0.0f; - right = glm::normalize(right); - - cam.lookAt -= (float) (xpos - lastX) * right * 0.01f; - cam.lookAt += (float) (ypos - lastY) * forward * 0.01f; - camchanged = true; - } - lastX = xpos; - lastY = ypos; + if (xpos == lastX || ypos == lastY) return; // otherwise, clicking back into window causes re-start + if (leftMousePressed) { + // compute new camera parameters + phi -= (xpos - lastX) / width; + theta -= (ypos - lastY) / height; + theta = std::fmax(0.001f, std::fmin(theta, PI)); + camchanged = true; + } + else if (rightMousePressed) { + zoom += (ypos - lastY) / height; + zoom = std::fmax(0.1f, zoom); + camchanged = true; + } + else if (middleMousePressed) { + renderState = &scene->state; + Camera &cam = renderState->camera; + glm::vec3 forward = cam.view; + forward.y = 0.0f; + forward = glm::normalize(forward); + glm::vec3 right = cam.right; + right.y = 0.0f; + right = glm::normalize(right); + + cam.lookAt -= (float)(xpos - lastX) * right * 0.01f; + cam.lookAt += (float)(ypos - lastY) * forward * 0.01f; + camchanged = true; + } + lastX = xpos; + lastY = ypos; } diff --git a/src/main.h b/src/main.h index 06d311a..03d04df 100644 --- a/src/main.h +++ b/src/main.h @@ -36,10 +36,10 @@ extern int ui_iterations; extern int startupIterations; extern bool ui_showGbuffer; extern bool ui_denoise; -extern int ui_filterSize; -extern float ui_colorWeight; -extern float ui_normalWeight; -extern float ui_positionWeight; +extern int ui_filterLayers; +extern float ui_color_phi; +extern float ui_normal_phi; +extern float ui_position_phi; extern bool ui_saveAndExit; void runCuda(); diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 23e5f90..5a3b3e2 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -20,66 +20,66 @@ #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) void checkCUDAErrorFn(const char *msg, const char *file, int line) { #if ERRORCHECK - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } - - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); # ifdef _WIN32 - getchar(); + getchar(); # endif - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); #endif } __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { - int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); - return thrust::default_random_engine(h); + int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); + return thrust::default_random_engine(h); } //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int)(pix.x / iter * 255.0), 0, 255); + color.y = glm::clamp((int)(pix.y / iter * 255.0), 0, 255); + color.z = glm::clamp((int)(pix.z / iter * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - float timeToIntersect = gBuffer[index].t * 256.0; - - pbo[index].w = 0; - pbo[index].x = timeToIntersect; - pbo[index].y = timeToIntersect; - pbo[index].z = timeToIntersect; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + float timeToIntersect = gBuffer[index].t * 256.0; + + pbo[index].w = 0; + pbo[index].x = timeToIntersect; + pbo[index].y = timeToIntersect; + pbo[index].z = timeToIntersect; + } } static Scene * hst_scene = NULL; @@ -90,44 +90,50 @@ static PathSegment * dev_paths = NULL; static ShadeableIntersection * dev_intersections = NULL; static GBufferPixel* dev_gBuffer = NULL; // TODO: static variables for device memory, any extra info you need, etc -// ... +static glm::vec3 * dev_denoised = NULL; +static glm::vec3 * dev_denoisedBuffer = NULL; // An image buffer for denoising void pathtraceInit(Scene *scene) { - hst_scene = scene; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + hst_scene = scene; + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; - cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); - cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); - cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); - cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); - cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); + cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); - cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); - cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); + cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); - cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); + cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); - // TODO: initialize any extra device memeory you need + // TODO: initialize any extra device memeory you need + cudaMalloc(&dev_denoised, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_denoised, 0, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_denoisedBuffer, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_denoisedBuffer, 0, pixelcount * sizeof(glm::vec3)); - checkCUDAError("pathtraceInit"); + checkCUDAError("pathtraceInit"); } void pathtraceFree() { - cudaFree(dev_image); // no-op if dev_image is null - cudaFree(dev_paths); - cudaFree(dev_geoms); - cudaFree(dev_materials); - cudaFree(dev_intersections); - cudaFree(dev_gBuffer); - // TODO: clean up any extra device memory you created - - checkCUDAError("pathtraceFree"); + cudaFree(dev_image); // no-op if dev_image is null + cudaFree(dev_paths); + cudaFree(dev_geoms); + cudaFree(dev_materials); + cudaFree(dev_intersections); + cudaFree(dev_gBuffer); + // TODO: clean up any extra device memory you created + cudaFree(dev_denoised); + cudaFree(dev_denoisedBuffer); + checkCUDAError("pathtraceFree"); } /** @@ -148,12 +154,12 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path PathSegment & segment = pathSegments[index]; segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); segment.ray.direction = glm::normalize(cam.view - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) - ); + ); segment.pixelIndex = index; segment.remainingBounces = traceDepth; @@ -161,13 +167,8 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } __global__ void computeIntersections( - int depth - , int num_paths - , PathSegment * pathSegments - , Geom * geoms - , int geoms_size - , ShadeableIntersection * intersections - ) + int depth, int num_paths, PathSegment * pathSegments, Geom * geoms, + int geoms_size, ShadeableIntersection * intersections) { int path_index = blockIdx.x * blockDim.x + threadIdx.x; @@ -225,64 +226,63 @@ __global__ void computeIntersections( } } -__global__ void shadeSimpleMaterials ( - int iter - , int num_paths - , ShadeableIntersection * shadeableIntersections - , PathSegment * pathSegments - , Material * materials - ) +__global__ void shadeSimpleMaterials( + int iter, int num_paths, ShadeableIntersection * shadeableIntersections, + PathSegment * pathSegments, Material * materials) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - ShadeableIntersection intersection = shadeableIntersections[idx]; - PathSegment segment = pathSegments[idx]; - if (segment.remainingBounces == 0) { - return; - } - - if (intersection.t > 0.0f) { // if the intersection exists... - segment.remainingBounces--; - // Set up the RNG - thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces); - - Material material = materials[intersection.materialId]; - glm::vec3 materialColor = material.color; - - // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - segment.color *= (materialColor * material.emittance); - segment.remainingBounces = 0; - } - else { - segment.color *= materialColor; - glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin; - scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng); - } - // If there was no intersection, color the ray black. - // Lots of renderers use 4 channel color, RGBA, where A = alpha, often - // used for opacity, in which case they can indicate "no opacity". - // This can be useful for post-processing and image compositing. - } else { - segment.color = glm::vec3(0.0f); - segment.remainingBounces = 0; - } - - pathSegments[idx] = segment; - } + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + ShadeableIntersection intersection = shadeableIntersections[idx]; + PathSegment segment = pathSegments[idx]; + if (segment.remainingBounces == 0) { + return; + } + + if (intersection.t > 0.0f) { // if the intersection exists... + segment.remainingBounces--; + // Set up the RNG + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + segment.color *= (materialColor * material.emittance); + segment.remainingBounces = 0; + } + else { + segment.color *= materialColor; + glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin; + scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng); + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } + else { + segment.color = glm::vec3(0.0f); + segment.remainingBounces = 0; + } + + pathSegments[idx] = segment; + } } -__global__ void generateGBuffer ( - int num_paths, - ShadeableIntersection* shadeableIntersections, +__global__ void generateGBuffer( + int num_paths, + ShadeableIntersection* shadeableIntersections, PathSegment* pathSegments, - GBufferPixel* gBuffer) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - gBuffer[idx].t = shadeableIntersections[idx].t; - } + GBufferPixel* gBuffer) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + gBuffer[idx].t = shadeableIntersections[idx].t; + gBuffer[idx].normal = shadeableIntersections[idx].surfaceNormal; + gBuffer[idx].position = getPointOnRay(pathSegments[idx].ray, gBuffer[idx].t); + } } // Add the current iteration's output to the overall image @@ -297,56 +297,168 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati } } +__global__ void average( + int iter + ,Camera cam + ,glm::vec3 *image + ,glm::vec3 *imageAverage) +{ + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < cam.resolution.x && y < cam.resolution.y) { + + int idx = x + (y * cam.resolution.x); + glm::vec3 curr_pixel = image[idx]; + curr_pixel /= iter; + curr_pixel = glm::clamp(curr_pixel, glm::vec3(0.f), glm::vec3(1.f)); + + imageAverage[idx] = curr_pixel; + } +} + +__global__ void ATrousFilter( + int stepWidth + , Camera cam + , float color_phi + , float normal_phi + , float pos_phi + , GBufferPixel* gBuffer + , glm::vec3* image + , glm::vec3* imageDenoised) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < cam.resolution.x && y < cam.resolution.y) { + float h_lib[5] = { 1.f / 16.f, 1.f / 4.f, 1.f / 8.f, 1.f / 4.f, 1.f / 16.f }; + int idx = x+ y * cam.resolution.x; + + glm::vec3 sum = glm::vec3(0.f); + float sum_weight = 0.f; + for (int i = 0; i < 5; ++i) + { + for (int j = 0; j < 5; ++j) + { + glm::ivec2 uv = glm::ivec2(x + (i - 2) * stepWidth, y + (j - 2) * stepWidth); + uv = glm::clamp(uv, glm::ivec2(0, 0), glm::ivec2(cam.resolution.x-1,cam.resolution.y-1)); + int idx_uv = uv.x + uv.y * cam.resolution.x; + + //Color weight + glm::vec3 t = image[idx] - image[idx_uv]; + float dist2 = glm::dot(t, t); + float weight_c = glm::min(glm::exp(-dist2 / color_phi), 1.f); + + //Normal weight + t = gBuffer[idx_uv].normal - gBuffer[idx].normal; + dist2 = glm::max(glm::dot(t, t) / (stepWidth, stepWidth), 0.f); + float weight_n = glm::min(glm::exp(-dist2 / normal_phi), 1.f); + + //Position weight + t = gBuffer[idx_uv].position - gBuffer[idx].position; + dist2 = glm::dot(t, t); + float weight_p = glm::min(glm::exp(-dist2 / pos_phi), 1.f); + + float weight = weight_c * weight_n * weight_p; + sum += image[idx_uv] * weight * h_lib[i] * h_lib[j]; + sum_weight += weight * h_lib[i] * h_lib[j]; + } + } + imageDenoised[idx] = sum / sum_weight; + } +} + + +void denoise(int iter) { + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; + + // 2D block for denoising + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + //Average the image + average << < blocksPerGrid2d, blockSize2d >> > ( + iter + , cam + , dev_image + , dev_denoised); + + + int stepWidth = 1; + for (int i = 1; i <= ui_filterLayers; ++i) + { + ATrousFilter << > > ( + stepWidth + ,cam + ,ui_color_phi + ,ui_normal_phi + ,ui_position_phi + ,dev_gBuffer + ,dev_denoised + ,dev_denoisedBuffer); + stepWidth = stepWidth << 1; + std::swap(dev_denoised, dev_denoisedBuffer); + } + + // Send results to OpenGL buffer for rendering + cudaMemcpy(hst_scene->state.image.data(), dev_denoised, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + + checkCUDAError("denoise"); +} + /** * Wrapper for the __global__ call that sets up the kernel calls and does a ton * of memory management */ void pathtrace(int frame, int iter) { - const int traceDepth = hst_scene->state.traceDepth; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + const int traceDepth = hst_scene->state.traceDepth; + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; // 2D block for generating ray from camera - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // 1D block for path tracing const int blockSize1d = 128; - /////////////////////////////////////////////////////////////////////////// - - // Pathtracing Recap: - // * Initialize array of path rays (using rays that come out of the camera) - // * You can pass the Camera object to that kernel. - // * Each path ray must carry at minimum a (ray, color) pair, - // * where color starts as the multiplicative identity, white = (1, 1, 1). - // * This has already been done for you. - // * NEW: For the first depth, generate geometry buffers (gbuffers) - // * For each depth: - // * Compute an intersection in the scene for each path ray. - // A very naive version of this has been implemented for you, but feel - // free to add more primitives and/or a better algorithm. - // Currently, intersection distance is recorded as a parametric distance, - // t, or a "distance along the ray." t = -1.0 indicates no intersection. - // * Color is attenuated (multiplied) by reflections off of any object - // * Stream compact away all of the terminated paths. - // You may use either your implementation or `thrust::remove_if` or its - // cousins. - // * Note that you can't really use a 2D kernel launch any more - switch - // to 1D. - // * Shade the rays that intersected something or didn't bottom out. - // That is, color the ray by performing a color computation according - // to the shader, then generate a new ray to continue the ray path. - // We recommend just updating the ray's PathSegment in place. - // Note that this step may come before or after stream compaction, - // since some shaders you write may also cause a path to terminate. - // * Finally: - // * if not denoising, add this iteration's results to the image - // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl - - generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); + /////////////////////////////////////////////////////////////////////////// + + // Pathtracing Recap: + // * Initialize array of path rays (using rays that come out of the camera) + // * You can pass the Camera object to that kernel. + // * Each path ray must carry at minimum a (ray, color) pair, + // * where color starts as the multiplicative identity, white = (1, 1, 1). + // * This has already been done for you. + // * NEW: For the first depth, generate geometry buffers (gbuffers) + // * For each depth: + // * Compute an intersection in the scene for each path ray. + // A very naive version of this has been implemented for you, but feel + // free to add more primitives and/or a better algorithm. + // Currently, intersection distance is recorded as a parametric distance, + // t, or a "distance along the ray." t = -1.0 indicates no intersection. + // * Color is attenuated (multiplied) by reflections off of any object + // * Stream compact away all of the terminated paths. + // You may use either your implementation or `thrust::remove_if` or its + // cousins. + // * Note that you can't really use a 2D kernel launch any more - switch + // to 1D. + // * Shade the rays that intersected something or didn't bottom out. + // That is, color the ray by performing a color computation according + // to the shader, then generate a new ray to continue the ray path. + // We recommend just updating the ray's PathSegment in place. + // Note that this step may come before or after stream compaction, + // since some shaders you write may also cause a path to terminate. + // * Finally: + // * if not denoising, add this iteration's results to the image + // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl + + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths); checkCUDAError("generate camera ray"); int depth = 0; @@ -356,78 +468,90 @@ void pathtrace(int frame, int iter) { // --- PathSegment Tracing Stage --- // Shoot ray into scene, bounce between objects, push shading chunks - // Empty gbuffer - cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel)); + // Empty gbuffer + cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel)); // clean shading chunks cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - bool iterationComplete = false; + bool iterationComplete = false; while (!iterationComplete) { - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections - ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); + // tracing + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + computeIntersections << > > ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); - if (depth == 0) { - generateGBuffer<<>>(num_paths, dev_intersections, dev_paths, dev_gBuffer); - } + if (depth == 0) { + generateGBuffer << > > (num_paths, dev_intersections, dev_paths, dev_gBuffer); + } - depth++; + depth++; - shadeSimpleMaterials<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = depth == traceDepth; + shadeSimpleMaterials << > > ( + iter, + num_paths, + dev_intersections, + dev_paths, + dev_materials + ); + iterationComplete = depth == traceDepth; } - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; - finalGather<<>>(num_paths, dev_image, dev_paths); + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + finalGather << > > (num_paths, dev_image, dev_paths); - /////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. - // Otherwise, screenshots are also acceptable. - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. + // Otherwise, screenshots are also acceptable. + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - checkCUDAError("pathtrace"); + checkCUDAError("pathtrace"); } // CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging. void showGBuffer(uchar4* pbo) { - const Camera &cam = hst_scene->state.camera; - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - - // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization - gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); + const Camera &cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization + gbufferToPBO << > > (pbo, cam.resolution, dev_gBuffer); } void showImage(uchar4* pbo, int iter) { -const Camera &cam = hst_scene->state.camera; - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); + const Camera &cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); +} + +void showDenoisedImage(uchar4 * pbo) +{ + const Camera &cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, 1, dev_denoised); } diff --git a/src/pathtrace.h b/src/pathtrace.h index 9e12f44..a70b32a 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -2,9 +2,12 @@ #include #include "scene.h" +#include "main.h" void pathtraceInit(Scene *scene); void pathtraceFree(); void pathtrace(int frame, int iteration); void showGBuffer(uchar4 *pbo); void showImage(uchar4 *pbo, int iter); +void showDenoisedImage(uchar4 *pbo); +void denoise(int iter); diff --git a/src/preview.cpp b/src/preview.cpp index 3ca2718..2edc264 100644 --- a/src/preview.cpp +++ b/src/preview.cpp @@ -214,10 +214,10 @@ void drawGui(int windowWidth, int windowHeight) { ImGui::Checkbox("Denoise", &ui_denoise); - ImGui::SliderInt("Filter Size", &ui_filterSize, 0, 100); - ImGui::SliderFloat("Color Weight", &ui_colorWeight, 0.0f, 10.0f); - ImGui::SliderFloat("Normal Weight", &ui_normalWeight, 0.0f, 10.0f); - ImGui::SliderFloat("Position Weight", &ui_positionWeight, 0.0f, 10.0f); + ImGui::SliderInt("Filter Size", &ui_filterLayers, 0, 100); + ImGui::SliderFloat("Color Weight", &ui_color_phi, 0.0f, 10.0f); + ImGui::SliderFloat("Normal Weight", &ui_normal_phi, 0.0f, 10.0f); + ImGui::SliderFloat("Position Weight", &ui_position_phi, 0.0f, 10.0f); ImGui::Separator(); diff --git a/src/sceneStructs.h b/src/sceneStructs.h index da7e558..131f211 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -79,4 +79,6 @@ struct ShadeableIntersection { // What information might be helpful for guiding a denoising filter? struct GBufferPixel { float t; + glm::vec3 normal; + glm::vec3 position; };