CIS565-Fall-2020 · WeiyuDu · Sep 30, 2020 · Sep 30, 2020 · Oct 7, 2020 · Oct 8, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.1)
 
-project(cis565_path_tracer)
+project(cis565_denoiser)
 
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
@@ -86,16 +86,34 @@ set(sources
     src/utilities.cpp
     )
 
+set(imgui
+    imgui/imconfig.h
+    imgui/imgui.cpp
+    imgui/imgui.h
+    imgui/imgui_draw.cpp
+    imgui/imgui_internal.h
+    imgui/imgui_widgets.cpp
+    imgui/imgui_demo.cpp
+    imgui/imgui_impl_glfw.cpp
+    imgui/imgui_impl_glfw.h
+    imgui/imgui_impl_opengl2.cpp
+    imgui/imgui_impl_opengl2.h
+    imgui/imgui_impl_opengl3.cpp
+    imgui/imgui_impl_opengl3.h
+    )
+
 list(SORT headers)
 list(SORT sources)
+list(SORT imgui)
 
 source_group(Headers FILES ${headers})
 source_group(Sources FILES ${sources})
+source_group(imgui FILES ${imgui})
 
 #add_subdirectory(stream_compaction)  # TODO: uncomment if using your stream compaction
 
-cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers})
+cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers} ${imgui})
 target_link_libraries(${CMAKE_PROJECT_NAME}
     ${LIBRARIES}
     #stream_compaction  # TODO: uncomment if using your stream compaction
-    )
+    )
diff --git a/README.md b/README.md
@@ -3,11 +3,106 @@ CUDA Path Tracer
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Weiyu Du
+* Tested on: CETS Virtual Lab
+### Part 3
+### Denoiser
+Left: denoised image; Right: original image. (iteration=20)
 
-### (TODO: Your README)
+<nobr><img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/denoised_20.png" width=300/></nobr>
+<nobr><img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/original_20.png" width=300/></nobr>
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+### Performance Analysis
+1. From the results above, we see that with denoising, we obtain an acceptably smooth results in 20 iterations, while without denoising, we need about 850 iterations.
 
+2. From the table below, we see the run time per iteration nearly doubled with denoising.
+
+|                        | with denoising | without denoising |
+| ---                    | ---            | ---               |
+| Run Time Per Iteration | 120.964        |  58.2717          |
+
+3. Denoising is more effective on object with diffuse material then reflective or refractive materials. This is because diffuse surface reflect light randomly, causing an uneven distribution of pixels getting mapped to image in those areas. Refractive or reflective materials, on the other hand, reflect light more deterministically. Therefore, the diffuse surfaces would benefit more from gaussian blurring. 
+
+4. Visual and run time comparison of different filter sizes.
+
+We observe that visual results improve greatly from filtersize=10 to 20 and from 20 to 40. At smaller filtersize, the image is less smooth and we can see blocks of color. The image quality is stable after filtersize=40. 
+
+We observe an increase in run time per iteration as we increase filtersize. However, the increase becomes smaller when filtersize is large. 
+
+| Filter Size            | 10             | 20                | 40      | 80      |
+| ---                    | ---            | ---               | ---     | ---     |
+| Run Time Per Iteration |   91.114       |    104.181        | 120.628 | 122.903 |
+| Visual Result          |    <img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/filter_10.png" width=150/>     |      <img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/filter_20.png" width=150/>      |  <img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/filter_40.png" width=150/>  | <img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/filter_80.png" width=150/>|
+
+5. Visual comparison of different scenes. Left: denoised image; Right: original image. (Iteration = 20, same parameter is used as the above cornell ceiling light scene.) We observe that we do not achieve as good of a denoised result as the cornel ceiling light scene. This may be because cornell ceiling light has a large light source. When the number of iterations is low, more pixels got filled in the ceiling light scene than in other two scenes. The original images from the two scenes are at a worse quality, therefore the denoised images are at a worse quality.
+
+<nobr><img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/cornell_denoised.png" width=300/></nobr>
+<nobr><img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/cornell_orig.png" width=300/></nobr>
+
+<nobr><img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/refractive_denoised.png" width=300/></nobr>
+<nobr><img src="https://github.com/WeiyuDu/Project4-CUDA-Denoiser/blob/denoiser/img/refractive_orig.png" width=300/></nobr>
+
+6. Run time comparison of denoising at different image resolutions. We observe as image resolution increases, the run time per iteration also increases. The run time for rendering denoised image is almost proportional to the number of pixels.
+
+| Image Resolution       | 200 x 200      | 400 x 400         | 600 x 600      | 800 x 800      |
+| ---                    | ---            | ---               | ---            | ---            |
+| Run Time Per Iteration |   12.8942      |    36.0212        | 70.0918        | 120.405        |
+
+### Part 2
+### Refraction
+Refraction rendering with Frensel effects using Schlick's approximation
+
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/refract.png" width=300/>
+
+### Depth of Field
+From left to right: focus on foreground, focus on background
+
+<nobr><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/dof_close.png" width=300/>
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/dof_far.png" width=300/></nobr>
+
+### Stochastic Sampled Antialiasing
+
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/antialiasing.png" width=300/>
+
+### Arbitrary OBJ Mesh Loader
+
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/wahoo.png" width=300/>
+Performance comparison regarding bounding volume interseciton culling (measured in time per iteration):
+
+| OBJ file | bounding volume intersection culling | naive implementation |
+| ---      | ---                                  | ---                  |
+| Sphere   | 98.122 | 129.479 |
+| Wahoo    | 1068.55 | 1453.84 |
+| Stanford Bunny | 11970.6 | 22964.9 |
+
+We observe that such optimization reduces the run time per iteration consistenly across different obj files, specifically, the more vertices an obj file has, we observe more significant improvement using bounding volume intersection culling.
+
+### Stratified Sampling
+
+1) Comparison of stratified sampling (10x10 grid, left) and uniform random sampling (right) at 5000 iterations
+
+<nobr><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/strat_5000.png" width=300/><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/ref_5000.png" width=300/></nobr>
+
+2) Comparison of stratified sampling (10x10 grid, left) and uniform random sampling (right) at 100 iterations
+
+<nobr><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/strat_100iter_10x10.png" width=300/><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/ref_100iter_10x10.png" width=300/></nobr>
+
+### Motion Blur
+1) Defined motion in scene file
+
+<nobr><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/defined_motion1.png" width=300/><img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/defined_motion2.png" width=300/></nobr>
+
+2) User input camera motion (user drag the camera while rendering)
+
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/master/img/real_time_motion.png" width=300/>
+
+### Part 1
+### Render Result
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/mid-project-submission/img/render_res.png" width=300/>
+
+### Analysis
+1) Plot of elapsed time per iteration versus max ray depth (timed when sorting_material set to true)
+<img src="https://github.com/WeiyuDu/Project3-CUDA-Path-Tracer/blob/mid-project-submission/img/hw3_plot.png"/>
+
+- We expected that sorting the rays/path segments by material should improve the performance, because this will make the threads more likely to finish at around the same time, reducing waiting time for threads in the same warp. However, in reality we found that rendering without sorting is actually significantly faster. This may because that there isn't a variety of different materials in the scene. Since we're sorting the entire set of rays, this operation takes much more time than it saves.
+- From the plot above we see that increasing max ray depth results in longer run time per iteration. Rendering using first bounce cache is consistently faster than rendering without cache, though not by a large margin. This is expected as we save time by avoiding the initial intersection computation.
diff --git a/img/antialiasing.png b/img/antialiasing.png
diff --git a/img/cornell_denoised.png b/img/cornell_denoised.png
diff --git a/img/cornell_orig.png b/img/cornell_orig.png
diff --git a/img/defined_motion1.png b/img/defined_motion1.png
diff --git a/img/defined_motion2.png b/img/defined_motion2.png
diff --git a/img/denoised_20.png b/img/denoised_20.png
diff --git a/img/dof_close.png b/img/dof_close.png
diff --git a/img/dof_far.png b/img/dof_far.png
diff --git a/img/filter_10.png b/img/filter_10.png
diff --git a/img/filter_20.png b/img/filter_20.png
diff --git a/img/filter_40.png b/img/filter_40.png
diff --git a/img/filter_60.png b/img/filter_60.png
diff --git a/img/filter_80.png b/img/filter_80.png
diff --git a/img/original_20.png b/img/original_20.png
diff --git a/img/real_time_motion.png b/img/real_time_motion.png
diff --git a/img/ref_100iter_10x10.png b/img/ref_100iter_10x10.png
diff --git a/img/ref_5000.png b/img/ref_5000.png
diff --git a/img/ref_antialiasing.png b/img/ref_antialiasing.png
diff --git a/img/refract.png b/img/refract.png
diff --git a/img/refractive_denoised.png b/img/refractive_denoised.png
diff --git a/img/refractive_orig.png b/img/refractive_orig.png
diff --git a/img/strat_100iter_10x10.png b/img/strat_100iter_10x10.png
diff --git a/img/strat_5000.png b/img/strat_5000.png
diff --git a/img/wahoo.png b/img/wahoo.png
diff --git a/imgui/LICENSE.txt b/imgui/LICENSE.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014-2019 Omar Cornut
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/imgui/imconfig.h b/imgui/imconfig.h
@@ -0,0 +1,94 @@
+//-----------------------------------------------------------------------------
+// COMPILE-TIME OPTIONS FOR DEAR IMGUI
+// Runtime options (clipboard callbacks, enabling various features, etc.) can generally be set via the ImGuiIO structure.
+// You can use ImGui::SetAllocatorFunctions() before calling ImGui::CreateContext() to rewire memory allocation functions.
+//-----------------------------------------------------------------------------
+// A) You may edit imconfig.h (and not overwrite it when updating Dear ImGui, or maintain a patch/branch with your modifications to imconfig.h)
+// B) or add configuration directives in your own file and compile with #define IMGUI_USER_CONFIG "myfilename.h"
+// If you do so you need to make sure that configuration settings are defined consistently _everywhere_ Dear ImGui is used, which include
+// the imgui*.cpp files but also _any_ of your code that uses Dear ImGui. This is because some compile-time options have an affect on data structures.
+// Defining those options in imconfig.h will ensure every compilation unit gets to see the same data structure layouts.
+// Call IMGUI_CHECKVERSION() from your .cpp files to verify that the data structures your files are using are matching the ones imgui.cpp is using.
+//-----------------------------------------------------------------------------
+
+#pragma once
+
+//---- Define assertion handler. Defaults to calling assert().
+//#define IM_ASSERT(_EXPR)  MyAssert(_EXPR)
+//#define IM_ASSERT(_EXPR)  ((void)(_EXPR))     // Disable asserts
+
+//---- Define attributes of all API symbols declarations, e.g. for DLL under Windows
+// Using dear imgui via a shared library is not recommended, because of function call overhead and because we don't guarantee backward nor forward ABI compatibility.
+//#define IMGUI_API __declspec( dllexport )
+//#define IMGUI_API __declspec( dllimport )
+
+//---- Don't define obsolete functions/enums names. Consider enabling from time to time after updating to avoid using soon-to-be obsolete function/names.
+//#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS
+
+//---- Don't implement demo windows functionality (ShowDemoWindow()/ShowStyleEditor()/ShowUserGuide() methods will be empty)
+// It is very strongly recommended to NOT disable the demo windows during development. Please read the comments in imgui_demo.cpp.
+//#define IMGUI_DISABLE_DEMO_WINDOWS
+//#define IMGUI_DISABLE_METRICS_WINDOW
+
+//---- Don't implement some functions to reduce linkage requirements.
+//#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS   // [Win32] Don't implement default clipboard handler. Won't use and link with OpenClipboard/GetClipboardData/CloseClipboard etc.
+//#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS         // [Win32] Don't implement default IME handler. Won't use and link with ImmGetContext/ImmSetCompositionWindow.
+//#define IMGUI_DISABLE_WIN32_FUNCTIONS                     // [Win32] Won't use and link with any Win32 function (clipboard, ime).
+//#define IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS      // [OSX] Implement default OSX clipboard handler (need to link with '-framework ApplicationServices', this is why this is not the default).
+//#define IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS            // Don't implement ImFormatString/ImFormatStringV so you can implement them yourself (e.g. if you don't want to link with vsnprintf)
+//#define IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS              // Don't implement ImFabs/ImSqrt/ImPow/ImFmod/ImCos/ImSin/ImAcos/ImAtan2 so you can implement them yourself.
+//#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS              // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite so you can implement them yourself if you don't want to link with fopen/fclose/fread/fwrite. This will also disable the LogToTTY() function.
+//#define IMGUI_DISABLE_DEFAULT_ALLOCATORS                  // Don't implement default allocators calling malloc()/free() to avoid linking with them. You will need to call ImGui::SetAllocatorFunctions().
+
+//---- Include imgui_user.h at the end of imgui.h as a convenience
+//#define IMGUI_INCLUDE_IMGUI_USER_H
+
+//---- Pack colors to BGRA8 instead of RGBA8 (to avoid converting from one to another)
+//#define IMGUI_USE_BGRA_PACKED_COLOR
+
+//---- Avoid multiple STB libraries implementations, or redefine path/filenames to prioritize another version
+// By default the embedded implementations are declared static and not available outside of imgui cpp files.
+//#define IMGUI_STB_TRUETYPE_FILENAME   "my_folder/stb_truetype.h"
+//#define IMGUI_STB_RECT_PACK_FILENAME  "my_folder/stb_rect_pack.h"
+//#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION
+//#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION
+
+//---- Define constructor and implicit cast operators to convert back<>forth between your math types and ImVec2/ImVec4.
+// This will be inlined as part of ImVec2 and ImVec4 class declarations.
+/*
+#define IM_VEC2_CLASS_EXTRA                                                 \
+        ImVec2(const MyVec2& f) { x = f.x; y = f.y; }                       \
+        operator MyVec2() const { return MyVec2(x,y); }
+
+#define IM_VEC4_CLASS_EXTRA                                                 \
+        ImVec4(const MyVec4& f) { x = f.x; y = f.y; z = f.z; w = f.w; }     \
+        operator MyVec4() const { return MyVec4(x,y,z,w); }
+*/
+
+//---- Use 32-bit vertex indices (default is 16-bit) is one way to allow large meshes with more than 64K vertices.
+// Your renderer back-end will need to support it (most example renderer back-ends support both 16/32-bit indices).
+// Another way to allow large meshes while keeping 16-bit indices is to handle ImDrawCmd::VtxOffset in your renderer.
+// Read about ImGuiBackendFlags_RendererHasVtxOffset for details.
+//#define ImDrawIdx unsigned int
+
+//---- Override ImDrawCallback signature (will need to modify renderer back-ends accordingly)
+//struct ImDrawList;
+//struct ImDrawCmd;
+//typedef void (*MyImDrawCallback)(const ImDrawList* draw_list, const ImDrawCmd* cmd, void* my_renderer_user_data);
+//#define ImDrawCallback MyImDrawCallback
+
+//---- Debug Tools
+// Use 'Metrics->Tools->Item Picker' to pick widgets with the mouse and break into them for easy debugging.
+//#define IM_DEBUG_BREAK  IM_ASSERT(0)
+//#define IM_DEBUG_BREAK  __debugbreak()
+// Have the Item Picker break in the ItemAdd() function instead of ItemHoverable() - which is earlier in the code, will catch a few extra items, allow picking items other than Hovered one.
+// This adds a small runtime cost which is why it is not enabled by default.
+//#define IMGUI_DEBUG_TOOL_ITEM_PICKER_EX
+
+//---- Tip: You can add extra functions within the ImGui:: namespace, here or in your own headers files.
+/*
+namespace ImGui
+{
+    void MyFunction(const char* name, const MyMatrix44& v);
+}
+*/