diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f8947468..a2949b51e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## Release 515 Entries +### [515.76] 2022-09-20 + +#### Fixed + +- Improved compatibility with new Linux kernel releases +- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates + ### [515.65.01] 2022-08-02 #### Fixed diff --git a/README.md b/README.md index 49522fbae..5597a07d7 100644 --- a/README.md +++ b/README.md @@ -643,6 +643,8 @@ Subsystem Device ID. | NVIDIA A100-PG509-200 | 20B0 10DE 1450 | | NVIDIA A100-SXM4-80GB | 20B2 10DE 1463 | | NVIDIA A100-SXM4-80GB | 20B2 10DE 147F | +| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 | +| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 | | NVIDIA PG506-242 | 20B3 10DE 14A7 | | NVIDIA PG506-243 | 20B3 10DE 14A8 | | NVIDIA A100 80GB PCIe | 20B5 10DE 1533 | @@ -743,6 +745,7 @@ Subsystem Device ID. | NVIDIA GeForce RTX 3050 | 2507 | | NVIDIA GeForce RTX 3050 OEM | 2508 | | NVIDIA GeForce RTX 3060 Laptop GPU | 2520 | +| NVIDIA GeForce RTX 3060 Laptop GPU | 2521 | | NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2523 | | NVIDIA RTX A2000 | 2531 1028 151D | | NVIDIA RTX A2000 | 2531 103C 151D | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 63a61538c..822d5748e 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -203,9 +203,108 @@ $(obj)/conftest/patches.h: $(NV_CONFTEST_SCRIPT) @mkdir -p $(obj)/conftest @$(NV_CONFTEST_CMD) patch_check > $@ -$(obj)/conftest/headers.h: $(NV_CONFTEST_SCRIPT) - @mkdir -p $(obj)/conftest - @$(NV_CONFTEST_CMD) test_kernel_headers '$(NV_CONFTEST_CFLAGS)' > $@ + +# Each of these headers is checked for presence with a test #include; a +# corresponding #define will be generated in conftest/headers.h. +NV_HEADER_PRESENCE_TESTS = \ + asm/system.h \ + drm/drmP.h \ + drm/drm_auth.h \ + drm/drm_gem.h \ + drm/drm_crtc.h \ + drm/drm_atomic.h \ + drm/drm_atomic_helper.h \ + drm/drm_encoder.h \ + drm/drm_atomic_uapi.h \ + drm/drm_drv.h \ + drm/drm_framebuffer.h \ + drm/drm_connector.h \ + drm/drm_probe_helper.h \ + drm/drm_blend.h \ + drm/drm_fourcc.h \ + drm/drm_prime.h \ + drm/drm_plane.h \ + drm/drm_vblank.h \ + drm/drm_file.h \ + drm/drm_ioctl.h \ + drm/drm_device.h \ + drm/drm_mode_config.h \ + dt-bindings/interconnect/tegra_icc_id.h \ + generated/autoconf.h \ + generated/compile.h \ + generated/utsrelease.h \ + linux/efi.h \ + linux/kconfig.h \ + linux/platform/tegra/mc_utils.h \ + linux/semaphore.h \ + linux/printk.h \ + linux/ratelimit.h \ + linux/prio_tree.h \ + linux/log2.h \ + linux/of.h \ + linux/bug.h \ + linux/sched/signal.h \ + linux/sched/task.h \ + linux/sched/task_stack.h \ + xen/ioemu.h \ + linux/fence.h \ + linux/dma-resv.h \ + soc/tegra/chip-id.h \ + soc/tegra/fuse.h \ + soc/tegra/tegra_bpmp.h \ + video/nv_internal.h \ + linux/platform/tegra/dce/dce-client-ipc.h \ + linux/nvhost.h \ + linux/nvhost_t194.h \ + asm/book3s/64/hash-64k.h \ + asm/set_memory.h \ + asm/prom.h \ + asm/powernv.h \ + linux/atomic.h \ + asm/barrier.h \ + asm/opal-api.h \ + sound/hdaudio.h \ + asm/pgtable_types.h \ + linux/stringhash.h \ + linux/dma-map-ops.h \ + rdma/peer_mem.h \ + sound/hda_codec.h \ + linux/dma-buf.h \ + linux/time.h \ + linux/platform_device.h \ + linux/mutex.h \ + linux/reset.h \ + linux/of_platform.h \ + linux/of_device.h \ + linux/of_gpio.h \ + linux/gpio.h \ + linux/gpio/consumer.h \ + linux/interconnect.h \ + linux/pm_runtime.h \ + linux/clk.h \ + linux/clk-provider.h \ + linux/ioasid.h \ + linux/stdarg.h \ + linux/iosys-map.h \ + asm/coco.h + +# Filename to store the define for the header in $(1); this is only consumed by +# the rule below that concatenates all of these together. +NV_HEADER_PRESENCE_PART = $(addprefix $(obj)/conftest/header_presence/,$(addsuffix .part,$(1))) + +# Define a rule to check the header $(1). +define NV_HEADER_PRESENCE_CHECK + $$(call NV_HEADER_PRESENCE_PART,$(1)): $$(NV_CONFTEST_SCRIPT) $(obj)/conftest/uts_release + @mkdir -p $$(dir $$@) + @$$(NV_CONFTEST_CMD) test_kernel_header '$$(NV_CONFTEST_CFLAGS)' '$(1)' > $$@ +endef + +# Evaluate the rule above for each header in the list. +$(foreach header,$(NV_HEADER_PRESENCE_TESTS),$(eval $(call NV_HEADER_PRESENCE_CHECK,$(header)))) + +# Concatenate all of the parts into headers.h. +$(obj)/conftest/headers.h: $(call NV_HEADER_PRESENCE_PART,$(NV_HEADER_PRESENCE_TESTS)) + @cat $^ > $@ clean-dirs := $(obj)/conftest diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h index eeba17df5..dcfac7d03 100644 --- a/kernel-open/common/inc/nv-linux.h +++ b/kernel-open/common/inc/nv-linux.h @@ -227,6 +227,7 @@ static inline uid_t __kuid_val(uid_t uid) #endif #include /* fb_info struct */ +#include /* screen_info */ #if !defined(CONFIG_PCI) #warning "Attempting to build driver for a platform with no PCI support!" diff --git a/kernel-open/common/inc/nv-pgprot.h b/kernel-open/common/inc/nv-pgprot.h index b56d95611..581e97f3a 100644 --- a/kernel-open/common/inc/nv-pgprot.h +++ b/kernel-open/common/inc/nv-pgprot.h @@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot) #define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot) #if defined(NVCPU_AARCH64) -#if defined(NV_MT_DEVICE_GRE_PRESENT) -#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \ - PTE_ATTRINDX(MT_DEVICE_GRE)) -#else #define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \ PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#endif #define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \ __pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE) #define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot) diff --git a/kernel-open/common/inc/nv.h b/kernel-open/common/inc/nv.h index 923967220..568dfdf13 100644 --- a/kernel-open/common/inc/nv.h +++ b/kernel-open/common/inc/nv.h @@ -624,27 +624,45 @@ typedef enum #define NV_GET_NV_STATE(pGpu) \ (nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL) -#define IS_REG_OFFSET(nv, offset, length) \ - (((offset) >= (nv)->regs->cpu_address) && \ - (((offset) + ((length)-1)) <= \ - (nv)->regs->cpu_address + ((nv)->regs->size-1))) - -#define IS_FB_OFFSET(nv, offset, length) \ - (((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \ - (((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1))) - -#define IS_UD_OFFSET(nv, offset, length) \ - (((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \ - ((offset) >= (nv)->ud.cpu_address) && \ - (((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1))) - -#define IS_IMEM_OFFSET(nv, offset, length) \ - (((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \ - ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \ - ((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \ - (((offset) + ((length) - 1)) <= \ - (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \ - ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))) +static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((offset >= nv->regs->cpu_address) && + + + + ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1)))); +} + +static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((nv->fb) && (offset >= nv->fb->cpu_address) && + + + + ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1)))); +} + +static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) && + (offset >= nv->ud.cpu_address) && + + + + ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1)))); +} + +static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && + (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && + (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && + + + + ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + + (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))); +} #define NV_RM_MAX_MSIX_LINES 8 diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index ee19e0374..5ec66bf37 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -55,9 +55,13 @@ append_conftest() { done } -translate_and_preprocess_header_files() { - # Inputs: - # $1: list of relative file paths +test_header_presence() { + # + # Determine if the given header file (which may or may not be + # present) is provided by the target kernel. + # + # Input: + # $1: relative file path # # This routine creates an upper case, underscore version of each of the # relative file paths, and uses that as the token to either define or @@ -73,115 +77,25 @@ translate_and_preprocess_header_files() { # strings, without special handling of the beginning or the end of the line. TEST_CFLAGS=`echo "-E -M $CFLAGS " | sed -e 's/\( -M[DG]\)* / /g'` - for file in "$@"; do - file_define=NV_`echo $file | tr '/.' '_' | tr '-' '_' | tr 'a-z' 'A-Z'`_PRESENT + file="$1" + file_define=NV_`echo $file | tr '/.' '_' | tr '-' '_' | tr 'a-z' 'A-Z'`_PRESENT - CODE="#include <$file>" + CODE="#include <$file>" - if echo "$CODE" | $CC $TEST_CFLAGS - > /dev/null 2>&1; then - echo "#define $file_define" + if echo "$CODE" | $CC $TEST_CFLAGS - > /dev/null 2>&1; then + echo "#define $file_define" + else + # If preprocessing failed, it could have been because the header + # file under test is not present, or because it is present but + # depends upon the inclusion of other header files. Attempting + # preprocessing again with -MG will ignore a missing header file + # but will still fail if the header file is present. + if echo "$CODE" | $CC $TEST_CFLAGS -MG - > /dev/null 2>&1; then + echo "#undef $file_define" else - # If preprocessing failed, it could have been because the header - # file under test is not present, or because it is present but - # depends upon the inclusion of other header files. Attempting - # preprocessing again with -MG will ignore a missing header file - # but will still fail if the header file is present. - if echo "$CODE" | $CC $TEST_CFLAGS -MG - > /dev/null 2>&1; then - echo "#undef $file_define" - else - echo "#define $file_define" - fi + echo "#define $file_define" fi - done -} - -test_headers() { - # - # Determine which header files (of a set that may or may not be - # present) are provided by the target kernel. - # - FILES="asm/system.h" - FILES="$FILES drm/drmP.h" - FILES="$FILES drm/drm_auth.h" - FILES="$FILES drm/drm_gem.h" - FILES="$FILES drm/drm_crtc.h" - FILES="$FILES drm/drm_atomic.h" - FILES="$FILES drm/drm_atomic_helper.h" - FILES="$FILES drm/drm_encoder.h" - FILES="$FILES drm/drm_atomic_uapi.h" - FILES="$FILES drm/drm_drv.h" - FILES="$FILES drm/drm_framebuffer.h" - FILES="$FILES drm/drm_connector.h" - FILES="$FILES drm/drm_probe_helper.h" - FILES="$FILES drm/drm_blend.h" - FILES="$FILES drm/drm_fourcc.h" - FILES="$FILES drm/drm_prime.h" - FILES="$FILES drm/drm_plane.h" - FILES="$FILES drm/drm_vblank.h" - FILES="$FILES drm/drm_file.h" - FILES="$FILES drm/drm_ioctl.h" - FILES="$FILES drm/drm_device.h" - FILES="$FILES drm/drm_mode_config.h" - FILES="$FILES dt-bindings/interconnect/tegra_icc_id.h" - FILES="$FILES generated/autoconf.h" - FILES="$FILES generated/compile.h" - FILES="$FILES generated/utsrelease.h" - FILES="$FILES linux/efi.h" - FILES="$FILES linux/kconfig.h" - FILES="$FILES linux/platform/tegra/mc_utils.h" - FILES="$FILES linux/semaphore.h" - FILES="$FILES linux/printk.h" - FILES="$FILES linux/ratelimit.h" - FILES="$FILES linux/prio_tree.h" - FILES="$FILES linux/log2.h" - FILES="$FILES linux/of.h" - FILES="$FILES linux/bug.h" - FILES="$FILES linux/sched/signal.h" - FILES="$FILES linux/sched/task.h" - FILES="$FILES linux/sched/task_stack.h" - FILES="$FILES xen/ioemu.h" - FILES="$FILES linux/fence.h" - FILES="$FILES linux/dma-resv.h" - FILES="$FILES soc/tegra/chip-id.h" - FILES="$FILES soc/tegra/fuse.h" - FILES="$FILES soc/tegra/tegra_bpmp.h" - FILES="$FILES video/nv_internal.h" - FILES="$FILES linux/platform/tegra/dce/dce-client-ipc.h" - FILES="$FILES linux/nvhost.h" - FILES="$FILES linux/nvhost_t194.h" - FILES="$FILES asm/book3s/64/hash-64k.h" - FILES="$FILES asm/set_memory.h" - FILES="$FILES asm/prom.h" - FILES="$FILES asm/powernv.h" - FILES="$FILES linux/atomic.h" - FILES="$FILES asm/barrier.h" - FILES="$FILES asm/opal-api.h" - FILES="$FILES sound/hdaudio.h" - FILES="$FILES asm/pgtable_types.h" - FILES="$FILES linux/stringhash.h" - FILES="$FILES linux/dma-map-ops.h" - FILES="$FILES rdma/peer_mem.h" - FILES="$FILES sound/hda_codec.h" - FILES="$FILES linux/dma-buf.h" - FILES="$FILES linux/time.h" - FILES="$FILES linux/platform_device.h" - FILES="$FILES linux/mutex.h" - FILES="$FILES linux/reset.h" - FILES="$FILES linux/of_platform.h" - FILES="$FILES linux/of_device.h" - FILES="$FILES linux/of_gpio.h" - FILES="$FILES linux/gpio.h" - FILES="$FILES linux/gpio/consumer.h" - FILES="$FILES linux/interconnect.h" - FILES="$FILES linux/pm_runtime.h" - FILES="$FILES linux/clk.h" - FILES="$FILES linux/clk-provider.h" - FILES="$FILES linux/ioasid.h" - FILES="$FILES linux/stdarg.h" - FILES="$FILES linux/iosys-map.h" - FILES="$FILES asm/coco.h" - - translate_and_preprocess_header_files $FILES + fi } build_cflags() { @@ -2420,23 +2334,6 @@ compile_test() { compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types" ;; - mt_device_gre) - # - # Determine if MT_DEVICE_GRE flag is present. - # - # MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274 - # ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1 - # (2021-06-01). - # - CODE=" - #include - unsigned int conftest_mt_device_gre(void) { - return MT_DEVICE_GRE; - }" - - compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types" - ;; - get_user_pages) # # Conftest for get_user_pages() @@ -5366,6 +5263,23 @@ compile_test() { compile_check_conftest "$CODE" "NV_GET_TASK_IOPRIO_PRESENT" "" "functions" ;; + num_registered_fb) + # + # Determine if 'num_registered_fb' variable is present. + # + # 'num_registered_fb' was removed by commit 5727dcfd8486 + # ("fbdev: Make registered_fb[] private to fbmem.c) for + # v5.20 linux-next (2022-07-27). + # + CODE=" + #include + int conftest_num_registered_fb(void) { + return num_registered_fb; + }" + + compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types" + ;; + # When adding a new conftest entry, please use the correct format for # specifying the relevant upstream Linux kernel commit. # @@ -5764,14 +5678,14 @@ case "$5" in ;; - test_kernel_headers) + test_kernel_header) # - # Check for the availability of certain kernel headers + # Check for the availability of the given kernel header # CFLAGS=$6 - test_headers + test_header_presence "${7}" for file in conftest*.d; do rm -f $file > /dev/null 2>&1 diff --git a/kernel-open/nvidia-drm/nvidia-drm-helper.c b/kernel-open/nvidia-drm/nvidia-drm-helper.c index 3831180e0..8fc862068 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-helper.c +++ b/kernel-open/nvidia-drm/nvidia-drm-helper.c @@ -41,6 +41,19 @@ #include #endif +/* + * The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit + * 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from + * drm_crtc.h") in linux-next, expected in v5.19-rc7. + * + * We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always + * present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it + * is safe to unconditionally include it when drm_framebuffer_get() is present. + */ +#if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT) +#include +#endif + static void __nv_drm_framebuffer_put(struct drm_framebuffer *fb) { #if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT) diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c index 8440b3c0c..383af3de7 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c +++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c @@ -59,6 +59,9 @@ #define NVKMS_LOG_PREFIX "nvidia-modeset: " +static bool output_rounding_fix = false; +module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400); + /* These parameters are used for fault injection tests. Normally the defaults * should be used. */ MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc"); @@ -71,6 +74,10 @@ module_param_named(malloc_verbose, malloc_verbose, bool, 0400); static atomic_t nvkms_alloc_called_count; +NvBool nvkms_output_rounding_fix(void) +{ + return output_rounding_fix; +} #define NVKMS_SYNCPT_STUBS_NEEDED diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h index 36685a026..8036811f9 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h +++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h @@ -110,6 +110,7 @@ typedef struct { } set_maxval; } NvKmsSyncPtOpParams; +NvBool nvkms_output_rounding_fix(void); void nvkms_call_rm (void *ops); void* nvkms_alloc (size_t size, diff --git a/kernel-open/nvidia-uvm/uvm_channel.c b/kernel-open/nvidia-uvm/uvm_channel.c index 5eaf3f14e..b1b2177f8 100644 --- a/kernel-open/nvidia-uvm/uvm_channel.c +++ b/kernel-open/nvidia-uvm/uvm_channel.c @@ -35,10 +35,6 @@ #include "nv_uvm_interface.h" #include "clb06f.h" -#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024 -#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32 -#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024) - static unsigned uvm_channel_num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT; #define UVM_CHANNEL_GPFIFO_LOC_DEFAULT "auto" @@ -86,6 +82,12 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel, uvm_spin_lock(&channel->pool->lock); + // Completed value should never exceed the queued value + UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value, + "GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n", + channel->pool->manager->gpu->parent->name, channel->name, completed_value, + channel->tracking_sem.queued_value); + cpu_put = channel->cpu_put; gpu_get = channel->gpu_get; @@ -395,6 +397,14 @@ static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, { uvm_gpu_t *gpu = uvm_push_get_gpu(push); + // We used to skip the membar or use membar GPU for the semaphore release + // for a few pushes, but that doesn't provide sufficient ordering guarantees + // in some cases (e.g. ga100 with an LCE with PCEs from both HSHUBs) for the + // semaphore writes. To be safe, just always uses a membar sys for now. + // TODO bug 3770539: Optimize membars used by end of push semaphore releases + (void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU); + (void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); + if (uvm_channel_is_ce(push->channel)) gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload); @@ -1562,6 +1572,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s) UVM_SEQ_OR_DBG_PRINT(s, "get %u\n", channel->gpu_get); UVM_SEQ_OR_DBG_PRINT(s, "put %u\n", channel->cpu_put); UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel)); + UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload); uvm_spin_unlock(&channel->pool->lock); } diff --git a/kernel-open/nvidia-uvm/uvm_channel.h b/kernel-open/nvidia-uvm/uvm_channel.h index 7e5add260..fadc4b3c1 100644 --- a/kernel-open/nvidia-uvm/uvm_channel.h +++ b/kernel-open/nvidia-uvm/uvm_channel.h @@ -46,6 +46,21 @@ // wait for a GPFIFO entry to free up. // +#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024 +#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32 +#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024) + +// Semaphore payloads cannot advance too much between calls to +// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps +// are bound by gpfifo sizing as we have to update the completed value to +// reclaim gpfifo entries. Set a limit based on the max gpfifo entries we could +// ever see. +// +// Logically this define belongs to uvm_gpu_semaphore.h but it depends on the +// channel GPFIFO sizing defined here so it's easiest to just have it here as +// uvm_channel.h includes uvm_gpu_semaphore.h. +#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX) + // Channel types typedef enum { diff --git a/kernel-open/nvidia-uvm/uvm_channel_test.c b/kernel-open/nvidia-uvm/uvm_channel_test.c index c7f31d059..2b7d78359 100644 --- a/kernel-open/nvidia-uvm/uvm_channel_test.c +++ b/kernel-open/nvidia-uvm/uvm_channel_test.c @@ -151,6 +151,37 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space) return status; } +static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space) +{ + NV_STATUS status; + uvm_gpu_t *gpu; + + for_each_va_space_gpu(gpu, va_space) { + uvm_channel_t *channel; + NvU64 completed_value; + + // The GPU channel manager is destroyed and then re-created after + // the test, so this test requires exclusive access to the GPU. + TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1); + + channel = &gpu->channel_manager->channel_pools[0].channels[0]; + completed_value = uvm_channel_update_completed_value(channel); + uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1); + + TEST_CHECK_RET(uvm_global_get_status() == NV_OK); + uvm_channel_update_progress_all(channel); + TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE); + + uvm_channel_manager_destroy(gpu->channel_manager); + // Destruction will hit the error again, so clear one more time. + uvm_global_reset_fatal_error(); + + TEST_NV_CHECK_RET(uvm_channel_manager_create(gpu, &gpu->channel_manager)); + } + + return NV_OK; +} + static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu) { uvm_push_t push; @@ -712,6 +743,14 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct + g_uvm_global.disable_fatal_error_assert = true; + uvm_release_asserts_set_global_error_for_tests = true; + status = test_unexpected_completed_values(va_space); + uvm_release_asserts_set_global_error_for_tests = false; + g_uvm_global.disable_fatal_error_assert = false; + if (status != NV_OK) + goto done; + if (g_uvm_global.num_simulated_devices == 0) { status = test_rc(va_space); if (status != NV_OK) diff --git a/kernel-open/nvidia-uvm/uvm_common.c b/kernel-open/nvidia-uvm/uvm_common.c index f46761eb5..2e38472ef 100644 --- a/kernel-open/nvidia-uvm/uvm_common.c +++ b/kernel-open/nvidia-uvm/uvm_common.c @@ -48,6 +48,33 @@ module_param(uvm_enable_builtin_tests, int, S_IRUGO); MODULE_PARM_DESC(uvm_enable_builtin_tests, "Enable the UVM built-in tests. (This is a security risk)"); +// Default to release asserts being enabled. +int uvm_release_asserts __read_mostly = 1; + +// Make the module param writable so that release asserts can be enabled or +// disabled at any time by modifying the module parameter. +module_param(uvm_release_asserts, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(uvm_release_asserts, "Enable uvm asserts included in release builds."); + +// Default to failed release asserts not dumping stack. +int uvm_release_asserts_dump_stack __read_mostly = 0; + +// Make the module param writable so that dumping the stack can be enabled and +// disabled at any time by modifying the module parameter. +module_param(uvm_release_asserts_dump_stack, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(uvm_release_asserts_dump_stack, "dump_stack() on failed UVM release asserts."); + +// Default to failed release asserts not setting the global UVM error. +int uvm_release_asserts_set_global_error __read_mostly = 0; + +// Make the module param writable so that setting the global fatal error can be +// enabled and disabled at any time by modifying the module parameter. +module_param(uvm_release_asserts_set_global_error, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(uvm_release_asserts_set_global_error, "Set UVM global fatal error on failed release asserts."); + +// A separate flag to enable setting global error, to be used by tests only. +bool uvm_release_asserts_set_global_error_for_tests __read_mostly = false; + // // Convert kernel errno codes to corresponding NV_STATUS // diff --git a/kernel-open/nvidia-uvm/uvm_common.h b/kernel-open/nvidia-uvm/uvm_common.h index 1b93e2303..f5e320b41 100644 --- a/kernel-open/nvidia-uvm/uvm_common.h +++ b/kernel-open/nvidia-uvm/uvm_common.h @@ -80,6 +80,9 @@ bool uvm_debug_prints_enabled(void); #define UVM_ASSERT_PRINT(fmt, ...) \ UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__) +#define UVM_ASSERT_PRINT_RL(fmt, ...) \ + UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__) + #define UVM_ERR_PRINT(fmt, ...) \ UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__) @@ -146,9 +149,7 @@ void on_uvm_test_fail(void); // Unlike on_uvm_test_fail it provides 'panic' coverity semantics void on_uvm_assert(void); -// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always enabled, even on -// release builds. -#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \ +#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \ do { \ if (unlikely(!(expr))) { \ UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \ @@ -157,9 +158,6 @@ void on_uvm_assert(void); } \ } while (0) -#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__) -#define UVM_ASSERT_RELEASE(expr) _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n") - // Prevent function calls in expr and the print argument list from being // evaluated. #define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...) \ @@ -170,13 +168,42 @@ void on_uvm_assert(void); // UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds #if UVM_IS_DEBUG() || defined __COVERITY__ - #define UVM_ASSERT_MSG UVM_ASSERT_MSG_RELEASE - #define UVM_ASSERT UVM_ASSERT_RELEASE + #define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__) + #define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n") #else #define UVM_ASSERT_MSG(expr, fmt, ...) UVM_ASSERT_MSG_IGNORE(expr, fmt, ##__VA_ARGS__) #define UVM_ASSERT(expr) UVM_ASSERT_MSG_IGNORE(expr, "\n") #endif +// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always included in the +// build, even on release builds. They are skipped at runtime if +// uvm_release_asserts is 0. + +// Whether release asserts are enabled and whether they should dump the stack +// and set the global error. +extern int uvm_release_asserts; +extern int uvm_release_asserts_dump_stack; +extern int uvm_release_asserts_set_global_error; +extern bool uvm_release_asserts_set_global_error_for_tests; + +// Given these are enabled for release builds, we need to be more cautious than +// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module +// param is enabled. +#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \ + do { \ + if (uvm_release_asserts && unlikely(!(expr))) { \ + UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \ + if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \ + uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \ + if (uvm_release_asserts_dump_stack) \ + dump_stack(); \ + on_uvm_assert(); \ + } \ + } while (0) + +#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__) +#define UVM_ASSERT_RELEASE(expr) _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n") + // Provide a short form of UUID's, typically for use in debug printing: #define ABBREV_UUID(uuid) (unsigned)(uuid) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c index f649de09a..9ae32eeb3 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c @@ -25,6 +25,7 @@ #include "uvm_lock.h" #include "uvm_global.h" #include "uvm_kvmalloc.h" +#include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP #define UVM_SEMAPHORE_SIZE 4 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE @@ -467,9 +468,16 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin // push, it's easily guaranteed because of the small number of GPFIFO // entries available per channel (there could be at most as many pending // pushes as GPFIFO entries). - if (new_sem_value < old_sem_value) + if (unlikely(new_sem_value < old_sem_value)) new_value += 1ULL << 32; + // Check for unexpected large jumps of the semaphore value + UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP, + "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n", + tracking_semaphore->semaphore.page->pool->gpu->parent->name, + (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload, + old_value, new_value); + // Use an atomic write even though the spinlock is held so that the value can // be (carefully) read atomically outside of the lock. // diff --git a/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c b/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c index 220d0a46b..b8a8d2874 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c @@ -27,6 +27,18 @@ #include "uvm_va_space.h" #include "uvm_kvmalloc.h" +static NV_STATUS set_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 new_value) +{ + uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value); + TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value); + TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value)); + TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1)); + TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1)); + TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem)); + + return NV_OK; +} + static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32 increment_by) { NvU64 new_value; @@ -43,13 +55,45 @@ static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32 TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value)); TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_completed(tracking_sem)); - uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value); - TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value); + TEST_NV_CHECK_RET(set_and_test(tracking_sem, new_value)); TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed)); - TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value)); - TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1)); - TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1)); - TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem)); + + return NV_OK; +} + +// Set the current state of the sema, avoiding UVM_GPU_SEMAPHORE_MAX_JUMP +// detection. +static void manual_set(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value) +{ + uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)value); + atomic64_set(&tracking_sem->completed_value, value); + tracking_sem->queued_value = value; +} + +// Set the starting value and payload and expect a global error +static NV_STATUS set_and_expect_error(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 starting_value, NvU32 payload) +{ + manual_set(tracking_sem, starting_value); + uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, payload); + + TEST_CHECK_RET(uvm_global_get_status() == NV_OK); + uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem); + TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE); + + return NV_OK; +} + +static NV_STATUS test_invalid_jumps(uvm_gpu_tracking_semaphore_t *tracking_sem) +{ + int i; + for (i = 0; i < 10; ++i) { + NvU64 base = (1ULL<<32) * i; + TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base, UVM_GPU_SEMAPHORE_MAX_JUMP + 1)); + TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base, UINT_MAX)); + TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + i + 1, i)); + TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + UINT_MAX / 2, UINT_MAX / 2 + UVM_GPU_SEMAPHORE_MAX_JUMP + 1)); + TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + UINT_MAX / 2, UINT_MAX / 2 - i - 1)); + } return NV_OK; } @@ -73,11 +117,31 @@ static NV_STATUS test_tracking(uvm_va_space_t *va_space) goto done; for (i = 0; i < 100; ++i) { - status = add_and_test(&tracking_sem, UINT_MAX - 1); + status = add_and_test(&tracking_sem, UVM_GPU_SEMAPHORE_MAX_JUMP - i); + if (status != NV_OK) + goto done; + } + + // Test wrap-around cases + for (i = 0; i < 100; ++i) { + // Start with a value right before wrap-around + NvU64 starting_value = (1ULL<<32) * (i + 1) - i - 1; + manual_set(&tracking_sem, starting_value); + + // And set payload to after wrap-around + status = set_and_test(&tracking_sem, (1ULL<<32) * (i + 1) + i); if (status != NV_OK) goto done; } + g_uvm_global.disable_fatal_error_assert = true; + uvm_release_asserts_set_global_error_for_tests = true; + status = test_invalid_jumps(&tracking_sem); + uvm_release_asserts_set_global_error_for_tests = false; + g_uvm_global.disable_fatal_error_assert = false; + if (status != NV_OK) + goto done; + done: uvm_gpu_tracking_semaphore_free(&tracking_sem); return status; diff --git a/kernel-open/nvidia-uvm/uvm_push.h b/kernel-open/nvidia-uvm/uvm_push.h index 6d8d15021..931006e64 100644 --- a/kernel-open/nvidia-uvm/uvm_push.h +++ b/kernel-open/nvidia-uvm/uvm_push.h @@ -52,11 +52,21 @@ typedef enum // By default all operations include a membar sys after any transfer and // before a semaphore operation. // This flag indicates that next operation should use no membar at all. + // + // For end of push semaphore release, this flag indicates that the push + // itself does not need a membar to be used (membar sys is the default). A + // membar may still be used, if needed to order the semaphore release + // write. See comments in uvm_channel_end_push(). UVM_PUSH_FLAG_NEXT_MEMBAR_NONE, // By default all operations include a membar sys after any transfer and // before a semaphore operation. // This flag indicates that next operation should use a membar gpu instead. + // + // For end of push semaphore release, this flag indicates that the push + // itself only needs a membar gpu (the default is membar sys). A membar sys + // may still be used, if needed to order the semaphore release write. See + // comments in uvm_channel_end_push(). UVM_PUSH_FLAG_NEXT_MEMBAR_GPU, UVM_PUSH_FLAG_COUNT, diff --git a/kernel-open/nvidia/nv-dmabuf.c b/kernel-open/nvidia/nv-dmabuf.c index 29894e9d0..84d3146a4 100644 --- a/kernel-open/nvidia/nv-dmabuf.c +++ b/kernel-open/nvidia/nv-dmabuf.c @@ -820,8 +820,13 @@ nv_dma_buf_reuse( goto cleanup_dmabuf; } + + + + if (params->index > (priv->total_objects - params->numObjects)) { + status = NV_ERR_INVALID_ARGUMENT; goto unlock_priv; } diff --git a/kernel-open/nvidia/nv-mmap.c b/kernel-open/nvidia/nv-mmap.c index 5c0f764c1..b62719cda 100644 --- a/kernel-open/nvidia/nv-mmap.c +++ b/kernel-open/nvidia/nv-mmap.c @@ -132,6 +132,13 @@ nvidia_vma_access( pageIndex = ((addr - vma->vm_start) >> PAGE_SHIFT); pageOffset = (addr & ~PAGE_MASK); + + + + + + + if (!mmap_context->valid) { nv_printf(NV_DBG_ERRORS, "NVRM: VM: invalid mmap context\n"); @@ -430,7 +437,7 @@ static int nvidia_mmap_numa( const nv_alloc_mapping_context_t *mmap_context) { NvU64 start, addr; - unsigned int pages; + NvU64 pages; NvU64 i; pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT; @@ -509,6 +516,13 @@ int nvidia_mmap_helper( NvU64 access_start = mmap_context->access_start; NvU64 access_len = mmap_context->access_size; + + + + + + + if (IS_REG_OFFSET(nv, access_start, access_len)) { if (nv_encode_caching(&vma->vm_page_prot, NV_MEMORY_UNCACHED, diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index babdd2882..b8ce6d5a9 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -1467,6 +1467,11 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp) return -ENODEV; } + + + + + if ( ! (nv->flags & NV_FLAG_OPEN)) { /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */ diff --git a/kernel-open/nvidia/nvidia.Kbuild b/kernel-open/nvidia/nvidia.Kbuild index 6941c651b..6ca6abae8 100644 --- a/kernel-open/nvidia/nvidia.Kbuild +++ b/kernel-open/nvidia/nvidia.Kbuild @@ -219,6 +219,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_dram_clk_to_mc_clk NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_get_dram_num_channels NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_dram_types NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_pxm_to_node +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_screen_info NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t @@ -242,9 +243,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vmalloc_has_pgprot_t_arg NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock NV_CONFTEST_TYPE_COMPILE_TESTS += pci_channel_state NV_CONFTEST_TYPE_COMPILE_TESTS += pci_dev_has_ats_enabled -NV_CONFTEST_TYPE_COMPILE_TESTS += mt_device_gre NV_CONFTEST_TYPE_COMPILE_TESTS += remove_memory_has_nid_arg NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg +NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build diff --git a/kernel-open/nvidia/nvlink_linux.c b/kernel-open/nvidia/nvlink_linux.c index 6c44d949b..af8a048d0 100644 --- a/kernel-open/nvidia/nvlink_linux.c +++ b/kernel-open/nvidia/nvlink_linux.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2015-2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -207,7 +207,10 @@ static int nvlink_fops_release(struct inode *inode, struct file *filp) nvlink_print(NVLINK_DBG_INFO, "nvlink driver close\n"); - WARN_ON(private == NULL); + + + + mutex_lock(&nvlink_drvctx.lock); diff --git a/kernel-open/nvidia/os-interface.c b/kernel-open/nvidia/os-interface.c index f8810c338..c9ff6f8da 100644 --- a/kernel-open/nvidia/os-interface.c +++ b/kernel-open/nvidia/os-interface.c @@ -1120,31 +1120,58 @@ void NV_API_CALL os_get_screen_info( NvU64 consoleBar2Address ) { -#if defined(CONFIG_FB) - int i; *pPhysicalAddress = 0; *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0; - for (i = 0; i < num_registered_fb; i++) +#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT) + if (num_registered_fb > 0) { - if (!registered_fb[i]) - continue; + int i; + + for (i = 0; i < num_registered_fb; i++) + { + if (!registered_fb[i]) + continue; + + /* Make sure base address is mapped to GPU BAR */ + if ((registered_fb[i]->fix.smem_start == consoleBar1Address) || + (registered_fb[i]->fix.smem_start == consoleBar2Address)) + { + *pPhysicalAddress = registered_fb[i]->fix.smem_start; + *pFbWidth = registered_fb[i]->var.xres; + *pFbHeight = registered_fb[i]->var.yres; + *pFbDepth = registered_fb[i]->var.bits_per_pixel; + *pFbPitch = registered_fb[i]->fix.line_length; + break; + } + } + } +#elif NV_IS_EXPORT_SYMBOL_PRESENT_screen_info + /* + * If there is not a framebuffer console, return 0 size. + * + * orig_video_isVGA is set to 1 during early Linux kernel + * initialization, and then will be set to a value, such as + * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used. + */ + if (screen_info.orig_video_isVGA > 1) + { + NvU64 physAddr = screen_info.lfb_base; +#if defined(VIDEO_CAPABILITY_64BIT_BASE) + physAddr |= (NvU64)screen_info.ext_lfb_base << 32; +#endif /* Make sure base address is mapped to GPU BAR */ - if ((registered_fb[i]->fix.smem_start == consoleBar1Address) || - (registered_fb[i]->fix.smem_start == consoleBar2Address)) + if ((physAddr == consoleBar1Address) || + (physAddr == consoleBar2Address)) { - *pPhysicalAddress = registered_fb[i]->fix.smem_start; - *pFbWidth = registered_fb[i]->var.xres; - *pFbHeight = registered_fb[i]->var.yres; - *pFbDepth = registered_fb[i]->var.bits_per_pixel; - *pFbPitch = registered_fb[i]->fix.line_length; - break; + *pPhysicalAddress = physAddr; + *pFbWidth = screen_info.lfb_width; + *pFbHeight = screen_info.lfb_height; + *pFbDepth = screen_info.lfb_depth; + *pFbPitch = screen_info.lfb_linelength; } } -#else - *pPhysicalAddress = 0; - *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0; #endif } diff --git a/src/common/inc/nvlog_defs.h b/src/common/inc/nvlog_defs.h index e0b3b415f..45e1a3f3a 100644 --- a/src/common/inc/nvlog_defs.h +++ b/src/common/inc/nvlog_defs.h @@ -195,6 +195,11 @@ extern NVLOG_LOGGER NvLogLogger; #define NVLOG_BUFFER_FLAGS_FORMAT_LIBOS_LOG 1 #define NVLOG_BUFFER_FLAGS_FORMAT_MEMTRACK 2 +// Never deallocate this buffer until RM is unloaded +#define NVLOG_BUFFER_FLAGS_PRESERVE 11:11 +#define NVLOG_BUFFER_FLAGS_PRESERVE_NO 0 +#define NVLOG_BUFFER_FLAGS_PRESERVE_YES 1 + // Buffer GPU index #define NVLOG_BUFFER_FLAGS_GPU_INSTANCE 31:24 diff --git a/src/common/modeset/timing/nvtiming.h b/src/common/modeset/timing/nvtiming.h index 70ee491cf..047d7af93 100644 --- a/src/common/modeset/timing/nvtiming.h +++ b/src/common/modeset/timing/nvtiming.h @@ -4091,6 +4091,8 @@ typedef struct tagNVT_GAMUT_METADATA #define NVT_DPCD_ADDRESS_DOWN_REP_BUFFER_FIELD 0x01400 #define NVT_DPCD_ADDRESS_UP_REQ_BUFFER_FIELD 0x01600 #define NVT_DPCD_ADDRESS_DEVICE_SERVICE_IRQ_VECTOR_ESI0 0x02003 +#define NVT_DPCD_ADDRESS_DP_TUNNELING_DEVICE_IEEE_OUI 0xE0000 +#define NVT_DPCD_ADDRESS_DP_TUNNELING_DEVICE_ID_STRING 0xE0003 #define NVT_DPCD_ADDRESS_DP_TUNNELING_CAPS_SUPPORT_FIELD 0xE000D #define NVT_DPCD_ADDRESS_DP_IN_ADAPTER_INFO_FIELD 0xE000E #define NVT_DPCD_ADDRESS_USB4_DRIVER_ID_FIELD 0xE000F @@ -5079,7 +5081,7 @@ typedef struct tagNVT_DPCD_CONFIG typedef struct tagNVT_DPCD_DP_TUNNELING_CAPS { - NvU8 dpTunnelingSupport : 1; // DP Tunneling through USB4 Support + NvU8 dpTunneling : 1; // DP Tunneling through USB4 Support NvU8 reserved : 5; // Reserved. NvU8 dpPanelReplayTunnelingOptSupport : 1; // Panel Replay Tunneling Optimization Support NvU8 dpInBwAllocationModeSupport : 1; // DP IN Bandwidth Allocation Mode Support diff --git a/src/common/nvlink/interface/nvlink_lib_ctrl.h b/src/common/nvlink/interface/nvlink_lib_ctrl.h index e81aed4a8..19bb0168e 100644 --- a/src/common/nvlink/interface/nvlink_lib_ctrl.h +++ b/src/common/nvlink/interface/nvlink_lib_ctrl.h @@ -64,7 +64,7 @@ * Total number of nvlink endpoints core library can have * This is mapped to NVLINK_MAX_SYSTEM_LINK_NUM in drivers/nvlink/interface/nvlink.h */ -#define NVLINK_MAX_NVLINK_ENDPOINTS 312 +#define NVLINK_MAX_NVLINK_ENDPOINTS 624 #define NVLINK_VERSION_STRING_LENGTH 64 diff --git a/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c b/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c index 0afb10f8e..a0a07853e 100644 --- a/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c +++ b/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c @@ -28,6 +28,7 @@ #include "../nvlink_ctx.h" #include "../nvlink_helper.h" #include "nvlink_lock.h" +#include "nvctassert.h" #define NVLINK_IOC_GET_BUF(ctrlParams, type) (ctrlParams)->size >= sizeof(type) ? (type *) (ctrlParams)->buf : NULL @@ -3423,6 +3424,8 @@ nvlink_lib_ctrl_get_device_link_states NvU32 numLinks = 0; NvU32 i = 0; + ct_assert(NVLINK_MAX_SYSTEM_LINK_NUM == NVLINK_MAX_NVLINK_ENDPOINTS); + nvlink_link **links = (nvlink_link **)nvlink_malloc( sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM); if (links == NULL) diff --git a/src/common/uproc/os/libos-v2.0.0/debug/logdecode.c b/src/common/uproc/os/libos-v2.0.0/debug/logdecode.c index f898adadb..8b0f58175 100644 --- a/src/common/uproc/os/libos-v2.0.0/debug/logdecode.c +++ b/src/common/uproc/os/libos-v2.0.0/debug/logdecode.c @@ -1041,24 +1041,41 @@ static NvBool libosCopyLogToNvlog_nowrap(LIBOS_LOG_DECODE_LOG *pLog) NvU64 putCopy = pLog->physicLogBuffer[0]; NvU64 putOffset = putCopy * sizeof(NvU64) + sizeof(NvU64); - if (putOffset == pNvLogBuffer->pos) + // + // If RM was not unloaded, we will reuse a preserved nowrap nvlog buffer with the fresh + // physical log buffer. In this case, we fix up all the offsets into the nvlog buffer to be + // relative to its preserved position rather than the start. + // + NvU64 nvlogPos = pNvLogBuffer->pos - pLog->preservedNoWrapPos; + + if (putOffset < nvlogPos) + { + // Buffer put counter unexpectedly reset. Terminate nowrap log collection. + return NV_FALSE; + } + + if (putOffset == nvlogPos) { // No new data return NV_TRUE; } - if (putOffset > pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)) + if (putOffset + pLog->preservedNoWrapPos > + pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)) { // Are we done filling nowrap? return NV_FALSE; } - NvU64 len = putOffset - pNvLogBuffer->pos; - NvU8 *pSrc = ((NvU8 *)pLog->physicLogBuffer) + pNvLogBuffer->pos; + NvU64 len = putOffset - nvlogPos; + NvU8 *pSrc = ((NvU8 *)pLog->physicLogBuffer) + nvlogPos; NvU8 *pDst = pNoWrapBuf->data + pNvLogBuffer->pos; + + pLog->bDidPush = NV_TRUE; + portMemCopy(pDst, len, pSrc, len); - pNvLogBuffer->pos = putOffset; // TODO: usage of NVLOG_BUFFER::pos is sus here, reconsider? - *(NvU64 *)(pNoWrapBuf->data) = putCopy; + pNvLogBuffer->pos = putOffset + pLog->preservedNoWrapPos; // TODO: usage of NVLOG_BUFFER::pos is sus here, reconsider? + *(NvU64 *)(pNoWrapBuf->data) = putCopy + pLog->preservedNoWrapPos / sizeof(NvU64); return NV_TRUE; } @@ -1095,6 +1112,46 @@ static void libosExtractLogs_nvlog(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLo } } +void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode) +{ + NvU64 i; + for (i = 0; i < pLogDecode->numLogBuffers; i++) + { + LIBOS_LOG_DECODE_LOG *pLog = &pLogDecode->log[i]; + + if (pLog->bDidPush) + { + NvHandle hNvlog = pLog->hNvLogNoWrap; + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[hNvlog]; + + if (hNvlog == 0 || pNvLogBuffer == NULL) + continue; + + pNvLogBuffer->flags |= DRF_DEF(LOG, _BUFFER_FLAGS, _PRESERVE, _YES); + } + } +} + +static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle) +{ + NVLOG_BUFFER_HANDLE handle = 0; + NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle); + + if (status != NV_OK) + return NV_FALSE; + + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle]; + if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && + DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance && + (pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + { + *pHandle = handle; + return NV_TRUE; + } + + return NV_FALSE; +} + #endif // LIBOS_LOG_TO_NVLOG /** @@ -1211,39 +1268,60 @@ void libosLogAddLogEx(LIBOS_LOG_DECODE *logDecode, void *buffer, NvU64 bufferSiz pLog->hNvLogWrap = 0; pLog->bNvLogNoWrap = NV_FALSE; - LIBOS_LOG_NVLOG_BUFFER *pNoWrapBuf; + pLog->bDidPush = NV_FALSE; + pLog->preservedNoWrapPos = 0; - status = nvlogAllocBuffer( - bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosNoWrapBufferFlags, - LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2), - &pLog->hNvLogNoWrap); + LIBOS_LOG_NVLOG_BUFFER *pNoWrapBuf; + NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2); + NvBool bFoundPreserved = findPreservedNvlogBuffer(tag, gpuInstance, &pLog->hNvLogNoWrap); - if (status == NV_OK) + if (!bFoundPreserved) { - pNoWrapBuf = (LIBOS_LOG_NVLOG_BUFFER *)NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->data; - if (name) + status = nvlogAllocBuffer( + bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosNoWrapBufferFlags, + tag, + &pLog->hNvLogNoWrap); + + if (status == NV_OK) { - portStringCopy( - pNoWrapBuf->taskPrefix, sizeof pNoWrapBuf->taskPrefix, name, sizeof pNoWrapBuf->taskPrefix); - } + pNoWrapBuf = (LIBOS_LOG_NVLOG_BUFFER *)NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->data; + if (name) + { + portStringCopy( + pNoWrapBuf->taskPrefix, sizeof pNoWrapBuf->taskPrefix, name, sizeof pNoWrapBuf->taskPrefix); + } - pNoWrapBuf->gpuArch = gpuArch; - pNoWrapBuf->gpuImpl = gpuImpl; + pNoWrapBuf->gpuArch = gpuArch; + pNoWrapBuf->gpuImpl = gpuImpl; - NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos = sizeof(NvU64); // offset to account for put pointer - pLog->bNvLogNoWrap = NV_TRUE; + NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos = sizeof(NvU64); // offset to account for put pointer + pLog->bNvLogNoWrap = NV_TRUE; + } + else + { + printf("nvlogAllocBuffer nowrap failed\n"); + } } else { - printf("nvlogAllocBuffer nowrap failed\n"); + pLog->bNvLogNoWrap = NV_TRUE; + pLog->preservedNoWrapPos = NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos; + + // + // The 0th NvU64 is the last value of put pointer from the physical log buffer, which is + // the number of NvU64 log buffer elements in it plus one. + // Subtract one NvU64 from it to avoid off-by-one error. + // + if (pLog->preservedNoWrapPos >= sizeof(NvU64)) + pLog->preservedNoWrapPos -= sizeof(NvU64); } LIBOS_LOG_NVLOG_BUFFER *pWrapBuf; + tag = LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2 + 1); status = nvlogAllocBuffer( bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosWrapBufferFlags, - LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2 + 1), - &pLog->hNvLogWrap); + tag, &pLog->hNvLogWrap); if (status == NV_OK) { @@ -1349,13 +1427,13 @@ void libosLogDestroy(LIBOS_LOG_DECODE *logDecode) if (pLog->hNvLogNoWrap != 0) { - nvlogDeallocBuffer(pLog->hNvLogNoWrap); + nvlogDeallocBuffer(pLog->hNvLogNoWrap, NV_FALSE); pLog->hNvLogNoWrap = 0; } if (pLog->hNvLogWrap != 0) { - nvlogDeallocBuffer(pLog->hNvLogWrap); + nvlogDeallocBuffer(pLog->hNvLogWrap, NV_FALSE); pLog->hNvLogWrap = 0; } } diff --git a/src/common/uproc/os/libos-v2.0.0/debug/logdecode.h b/src/common/uproc/os/libos-v2.0.0/debug/logdecode.h index 48e6ea048..51f3b8123 100644 --- a/src/common/uproc/os/libos-v2.0.0/debug/logdecode.h +++ b/src/common/uproc/os/libos-v2.0.0/debug/logdecode.h @@ -108,6 +108,9 @@ struct LIBOS_LOG_DECODE_LOG NvU32 hNvLogNoWrap; // No wrap buffer captures first records. NvU32 hNvLogWrap; // Wrap buffer captures last records. NvBool bNvLogNoWrap; // NV_TRUE if no wrap buffer not full. + + NvBool bDidPush; // NV_TRUE if this buffer was ever pushed to + NvU64 preservedNoWrapPos; // Position in preserved nvlog buffer #endif #if LIBOS_LOG_DECODE_ENABLE @@ -170,6 +173,8 @@ void libosLogDestroy(LIBOS_LOG_DECODE *logDecode); void libosExtractLogs(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLog); +void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode); + #ifdef __cplusplus } #endif diff --git a/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h b/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h index 36685a026..8036811f9 100644 --- a/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h +++ b/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h @@ -110,6 +110,7 @@ typedef struct { } set_maxval; } NvKmsSyncPtOpParams; +NvBool nvkms_output_rounding_fix(void); void nvkms_call_rm (void *ops); void* nvkms_alloc (size_t size, diff --git a/src/nvidia-modeset/src/nvkms-evo3.c b/src/nvidia-modeset/src/nvkms-evo3.c index 76458f06d..6cb390ef8 100644 --- a/src/nvidia-modeset/src/nvkms-evo3.c +++ b/src/nvidia-modeset/src/nvkms-evo3.c @@ -1288,6 +1288,8 @@ static void EvoSetOCsc0C5(NVDispEvoPtr pDispEvo, const NvU32 head) const float32_t zeroF32 = NvU32viewAsF32(NV_FLOAT_ZERO); const float32_t oneF32 = NvU32viewAsF32(NV_FLOAT_ONE); + const float32_t inv2048F32 = f32_div(NvU32viewAsF32(NV_FLOAT_HALF), + NvU32viewAsF32(NV_FLOAT_1024)); /* divide satCos by the default setting of 1024 */ const float32_t satCos = f32_div(i32_to_f32(pHeadState->procAmp.satCos), NvU32viewAsF32(NV_FLOAT_1024)); @@ -1324,6 +1326,12 @@ static void EvoSetOCsc0C5(NVDispEvoPtr pDispEvo, const NvU32 head) ocsc0Matrix = nvMultiply3x4Matrix(&satHueMatrix, &ocsc0Matrix); ocsc0Matrix = nvMultiply3x4Matrix(&CrYCbtoRGBMatrix, &ocsc0Matrix); + if (nvkms_output_rounding_fix()) { + ocsc0Matrix.m[0][3] = f32_add(ocsc0Matrix.m[0][3], inv2048F32); + ocsc0Matrix.m[1][3] = f32_add(ocsc0Matrix.m[1][3], inv2048F32); + ocsc0Matrix.m[2][3] = f32_add(ocsc0Matrix.m[2][3], inv2048F32); + } + nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_OCSC0COEFFICIENT_C00(head), 12); nvDmaSetEvoMethodData(pChannel, DRF_NUM(C57D, _HEAD_SET_OCSC0COEFFICIENT_C00, _VALUE, cscCoefConvertS514(ocsc0Matrix.m[0][0]))); nvDmaSetEvoMethodData(pChannel, DRF_NUM(C57D, _HEAD_SET_OCSC0COEFFICIENT_C01, _VALUE, cscCoefConvertS514(ocsc0Matrix.m[0][1]))); @@ -1965,11 +1973,13 @@ static inline NvU32 GetMaxPixelsFetchedPerLine(NvU16 inWidth, static void SetScalingUsageBoundsOneWindow5( NVDevEvoPtr pDevEvo, NvU32 window, const struct NvKmsScalingUsageBounds *pScaling, + NvBool layerUsable, const NVHwModeViewPortEvo *pViewPort, NVEvoUpdateState *updateState) { NVEvoChannelPtr pChannel = pDevEvo->core; NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5; + NvU32 maxPixelsFetchedPerLine; nvUpdateUpdateState(pDevEvo, updateState, pChannel); @@ -1981,10 +1991,15 @@ static void SetScalingUsageBoundsOneWindow5( DRF_NUM(C57D, _WINDOW_SET_MAX_INPUT_SCALE_FACTOR, _VERTICAL, pScaling->maxVDownscaleFactor)); + if (layerUsable) { + maxPixelsFetchedPerLine = GetMaxPixelsFetchedPerLine(pViewPort->in.width, + pScaling->maxHDownscaleFactor); + } else { + maxPixelsFetchedPerLine = 0; + } + setWindowUsageBounds |= - (DRF_NUM(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE, - GetMaxPixelsFetchedPerLine(pViewPort->in.width, - pScaling->maxHDownscaleFactor))) | + (DRF_NUM(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,maxPixelsFetchedPerLine)) | (pScaling->vTaps >= NV_EVO_SCALER_5TAPS ? DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_5) : DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2)) | @@ -2056,8 +2071,9 @@ static NvBool EvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, needCoreUpdate = EvoSetUsageBounds3(pDevEvo, sd, head, pUsage, updateState); for (layer = 0; layer < pDevEvo->head[head].numLayers; layer++) { - if (!nvEvoScalingUsageBoundsEqual(&pCurrentUsage->layer[layer].scaling, - &pUsage->layer[layer].scaling)) { + if ((pCurrentUsage->layer[layer].usable != pUsage->layer[layer].usable) || + (!nvEvoScalingUsageBoundsEqual(&pCurrentUsage->layer[layer].scaling, + &pUsage->layer[layer].scaling))) { const NVHwModeViewPortEvo *pViewPort = &pDevEvo->gpus[sd].pDispEvo->headState[head].timings.viewPort; @@ -2066,6 +2082,7 @@ static NvBool EvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, NV_EVO_CHANNEL_MASK_WINDOW_NUMBER( pDevEvo->head[head].layer[layer]->channelMask), &pUsage->layer[layer].scaling, + pUsage->layer[layer].usable, pViewPort, updateState); needCoreUpdate = TRUE; @@ -4383,7 +4400,9 @@ static void EvoSetLUTContextDmaC5(const NVDispEvoRec *pDispEvo, nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_OLUT_CONTROL(head), 1); nvDmaSetEvoMethodData(pChannel, - DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _ENABLE) | + (!nvkms_output_rounding_fix() ? + DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _ENABLE) : + DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _DISABLE)) | DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _MIRROR, _DISABLE) | DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _MODE, _DIRECT10) | DRF_NUM(C57D, _HEAD_SET_OLUT_CONTROL, _SIZE, NV_LUT_VSS_HEADER_SIZE + @@ -5180,13 +5199,11 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head, const NVHwModeViewPortEvo *pViewPortMin, const NVHwModeViewPortEvo *pViewPort, const NVHwModeViewPortEvo *pViewPortMax, - NVEvoUpdateState *updateState, - NvU32 setWindowUsageBounds) + NVEvoUpdateState *updateState) { const NVEvoCapabilitiesPtr pEvoCaps = &pDevEvo->gpus[0].capabilities; NVEvoChannelPtr pChannel = pDevEvo->core; struct NvKmsScalingUsageBounds scalingUsageBounds = { }; - NvU32 win; /* These methods should only apply to a single pDpy */ nvAssert(pDevEvo->subDevMaskStackDepth > 0); @@ -5232,31 +5249,6 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head, DRF_NUM(C37D, _HEAD_SET_MAX_OUTPUT_SCALE_FACTOR, _VERTICAL, scalingUsageBounds.maxVDownscaleFactor)); - /* - * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds - * for each window that’s attached to the head. - * - * Precomp will clip the post-scaled window to the input viewport, reverse-scale - * this cropped size back to the input surface domain, and isohub will fetch - * this cropped size. This function assumes that there's no window scaling yet, - * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport - * width. SetScalingUsageBoundsOneWindow5() will take care of updating - * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later. - */ - setWindowUsageBounds |= - DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE, - GetMaxPixelsFetchedPerLine(pViewPort->in.width, - NV_EVO_SCALE_FACTOR_1X)); - - for (win = 0; win < pDevEvo->numWindows; win++) { - if (head != pDevEvo->headForWindow[win]) { - continue; - } - - nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1); - nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds); - } - return scalingUsageBounds.vUpscalingAllowed; } @@ -5267,10 +5259,11 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head, NVEvoUpdateState *updateState) { NVEvoChannelPtr pChannel = pDevEvo->core; + NvU32 win; + NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3; NvBool verticalUpscalingAllowed = EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort, - pViewPortMax, updateState, - NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3); + pViewPortMax, updateState); nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1); @@ -5280,6 +5273,34 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head, (verticalUpscalingAllowed ? DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) : DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE))); + /* + * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds + * for each window that is attached to the head. + * + * Precomp will clip the post-scaled window to the input viewport, reverse-scale + * this cropped size back to the input surface domain, and isohub will fetch + * this cropped size. This function assumes that there's no window scaling yet, + * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport + * width. SetScalingUsageBoundsOneWindow5() will take care of updating + * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later. + * On Volta, Program for each window that is attached to head. For turing+, + * SetScalingUsageBoundsOneWindow5() will take care of programming window + * usage bounds only for the layers/windows in use. + */ + + setWindowUsageBounds |= + DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE, + GetMaxPixelsFetchedPerLine(pViewPort->in.width, + NV_EVO_SCALE_FACTOR_1X)); + + for (win = 0; win < pDevEvo->numWindows; win++) { + if (head != pDevEvo->headForWindow[win]) { + continue; + } + + nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1); + nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds); + } } static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head, @@ -5289,13 +5310,9 @@ static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head, NVEvoUpdateState *updateState) { NVEvoChannelPtr pChannel = pDevEvo->core; - NvU32 setWindowUsageBounds = - (NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5 | - DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2) | - DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE)); NvU32 verticalUpscalingAllowed = EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort, - pViewPortMax, updateState, setWindowUsageBounds); + pViewPortMax, updateState); nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1); diff --git a/src/nvidia/arch/nvalloc/unix/include/nv.h b/src/nvidia/arch/nvalloc/unix/include/nv.h index 1349d634d..96324c7a2 100644 --- a/src/nvidia/arch/nvalloc/unix/include/nv.h +++ b/src/nvidia/arch/nvalloc/unix/include/nv.h @@ -619,27 +619,33 @@ typedef enum #define NV_GET_NV_STATE(pGpu) \ (nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL) -#define IS_REG_OFFSET(nv, offset, length) \ - (((offset) >= (nv)->regs->cpu_address) && \ - (((offset) + ((length)-1)) <= \ - (nv)->regs->cpu_address + ((nv)->regs->size-1))) - -#define IS_FB_OFFSET(nv, offset, length) \ - (((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \ - (((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1))) - -#define IS_UD_OFFSET(nv, offset, length) \ - (((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \ - ((offset) >= (nv)->ud.cpu_address) && \ - (((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1))) - -#define IS_IMEM_OFFSET(nv, offset, length) \ - (((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \ - ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \ - ((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \ - (((offset) + ((length) - 1)) <= \ - (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \ - ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))) +static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((offset >= nv->regs->cpu_address) && + ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1)))); +} + +static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((nv->fb) && (offset >= nv->fb->cpu_address) && + ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1)))); +} + +static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) && + (offset >= nv->ud.cpu_address) && + ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1)))); +} + +static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) +{ + return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && + (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && + (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && + ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + + (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))); +} #define NV_RM_MAX_MSIX_LINES 8 diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c index 13c7171b0..b832852d4 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osapi.c +++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c @@ -780,10 +780,8 @@ static NV_STATUS RmAccessRegistry( RmStatus = NV_ERR_INVALID_STRING_LENGTH; goto done; } - // get access to client's parmStr RMAPI_PARAM_COPY_INIT(parmStrParamCopy, tmpParmStr, clientParmStrAddress, ParmStrLength, 1); - parmStrParamCopy.flags |= RMAPI_PARAM_COPY_FLAGS_ZERO_BUFFER; RmStatus = rmapiParamsAcquire(&parmStrParamCopy, NV_TRUE); if (RmStatus != NV_OK) { @@ -2026,6 +2024,7 @@ static NV_STATUS RmGetAllocPrivate( PMEMORY_DESCRIPTOR pMemDesc; NvU32 pageOffset; NvU64 pageCount; + NvU64 endingOffset; RsResourceRef *pResourceRef; RmResource *pRmResource; void *pMemData; @@ -2086,8 +2085,9 @@ static NV_STATUS RmGetAllocPrivate( if (rmStatus != NV_OK) goto done; - pageCount = ((pageOffset + length) / os_page_size); - pageCount += (*pPageIndex + (((pageOffset + length) % os_page_size) ? 1 : 0)); + endingOffset = pageOffset + length; + pageCount = (endingOffset / os_page_size); + pageCount += (*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0)); if (pageCount > NV_RM_PAGES_TO_OS_PAGES(pMemDesc->PageCount)) { diff --git a/src/nvidia/arch/nvalloc/unix/src/osinit.c b/src/nvidia/arch/nvalloc/unix/src/osinit.c index 49ede1008..77eac6c14 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osinit.c +++ b/src/nvidia/arch/nvalloc/unix/src/osinit.c @@ -362,10 +362,6 @@ osHandleGpuLost pmc_boot_0 = NV_PRIV_REG_RD32(nv->regs->map_u, NV_PMC_BOOT_0); if (pmc_boot_0 != nvp->pmc_boot_0) { - RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); - NV2080_CTRL_GPU_GET_OEM_BOARD_INFO_PARAMS *pBoardInfoParams; - NV_STATUS status; - // // This doesn't support PEX Reset and Recovery yet. // This will help to prevent accessing registers of a GPU @@ -376,24 +372,11 @@ osHandleGpuLost NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "GPU has fallen off the bus.\n"); - pBoardInfoParams = portMemAllocNonPaged(sizeof(*pBoardInfoParams)); - if (pBoardInfoParams != NULL) + if (pGpu->boardInfo != NULL && pGpu->boardInfo->serialNumber[0] != '\0') { - portMemSet(pBoardInfoParams, 0, sizeof(*pBoardInfoParams)); - - status = pRmApi->Control(pRmApi, nv->rmapi.hClient, - nv->rmapi.hSubDevice, - NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO, - pBoardInfoParams, - sizeof(*pBoardInfoParams)); - if (status == NV_OK) - { - NV_DEV_PRINTF(NV_DBG_ERRORS, nv, - "GPU serial number is %s.\n", - pBoardInfoParams->serialNumber); - } - - portMemFree(pBoardInfoParams); + NV_DEV_PRINTF(NV_DBG_ERRORS, nv, + "GPU serial number is %s.\n", + pGpu->boardInfo->serialNumber); } gpuSetDisconnectedProperties(pGpu); diff --git a/src/nvidia/generated/g_gpu_nvoc.h b/src/nvidia/generated/g_gpu_nvoc.h index 40ecf92fd..41e5e01d6 100644 --- a/src/nvidia/generated/g_gpu_nvoc.h +++ b/src/nvidia/generated/g_gpu_nvoc.h @@ -60,6 +60,7 @@ typedef struct GPUATTACHARG GPUATTACHARG; * */ #include "ctrl/ctrl0080/ctrl0080gpu.h" // NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS (form hal) #include "ctrl/ctrl2080/ctrl2080internal.h" // NV2080_CTRL_CMD_INTERNAL_MAX_BSPS/NVENCS +#include "ctrl/ctrl2080/ctrl2080ecc.h" #include "ctrl/ctrl2080/ctrl2080nvd.h" #include "class/cl2080.h" #include "class/cl90cd.h" diff --git a/src/nvidia/generated/g_kernel_gsp_nvoc.h b/src/nvidia/generated/g_kernel_gsp_nvoc.h index 0b5d0758a..0657f317c 100644 --- a/src/nvidia/generated/g_kernel_gsp_nvoc.h +++ b/src/nvidia/generated/g_kernel_gsp_nvoc.h @@ -301,6 +301,7 @@ struct KernelGsp { LIBOS_LOG_DECODE logDecode; RM_LIBOS_LOG_MEM rmLibosLogMem[2]; void *pLogElf; + NvBool bInInit; MEMORY_DESCRIPTOR *pMemDesc_simAccessBuf; SimAccessBuffer *pSimAccessBuf; NvP64 pSimAccessBufPriv; diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index 9251e5d5c..dc5ffa1c4 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -806,6 +806,8 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x20B0, 0x1450, 0x10de, "NVIDIA A100-PG509-200" }, { 0x20B2, 0x1463, 0x10de, "NVIDIA A100-SXM4-80GB" }, { 0x20B2, 0x147f, 0x10de, "NVIDIA A100-SXM4-80GB" }, + { 0x20B2, 0x1622, 0x10de, "NVIDIA A100-SXM4-80GB" }, + { 0x20B2, 0x1623, 0x10de, "NVIDIA A100-SXM4-80GB" }, { 0x20B3, 0x14a7, 0x10de, "NVIDIA PG506-242" }, { 0x20B3, 0x14a8, 0x10de, "NVIDIA PG506-243" }, { 0x20B5, 0x1533, 0x10de, "NVIDIA A100 80GB PCIe" }, @@ -907,6 +909,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2507, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050" }, { 0x2508, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 OEM" }, { 0x2520, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" }, + { 0x2521, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" }, { 0x2523, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" }, { 0x2531, 0x151d, 0x1028, "NVIDIA RTX A2000" }, { 0x2531, 0x151d, 0x103c, "NVIDIA RTX A2000" }, diff --git a/src/nvidia/inc/libraries/nvlog/nvlog.h b/src/nvidia/inc/libraries/nvlog/nvlog.h index 00debf749..c7be92119 100644 --- a/src/nvidia/inc/libraries/nvlog/nvlog.h +++ b/src/nvidia/inc/libraries/nvlog/nvlog.h @@ -86,8 +86,9 @@ NV_STATUS nvlogAllocBuffer(NvU32 size, NvU32 flags, NvU32 tag, NVLOG_BUFFER_HAND * @brief Deallocate a buffer with the given handle * * @param[in] hBuffer Handle of the buffer to deallocate + * @param[in] bDeallocPreserved Deallocate preserved buffers */ -void nvlogDeallocBuffer(NVLOG_BUFFER_HANDLE hBuffer); +void nvlogDeallocBuffer(NVLOG_BUFFER_HANDLE hBuffer, NvBool bDeallocPreserved); /** * @brief Write to a buffer with the given handle diff --git a/src/nvidia/kernel/vgpu/nv/rpc.c b/src/nvidia/kernel/vgpu/nv/rpc.c index 4e4ee5205..c5766045a 100644 --- a/src/nvidia/kernel/vgpu/nv/rpc.c +++ b/src/nvidia/kernel/vgpu/nv/rpc.c @@ -265,8 +265,11 @@ static NV_STATUS _issueRpcLarge // should not be called in broadcast mode NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); + // // Copy the initial buffer - entryLength = NV_MIN(bufSize, pRpc->maxRpcSize); + // Temporary black magic WAR for bug 3594082: reducing the size by 1 + // + entryLength = NV_MIN(bufSize, pRpc->maxRpcSize - 1); if ((NvU8 *)vgpu_rpc_message_header_v != pBuf8) portMemCopy(vgpu_rpc_message_header_v, entryLength, pBuf8, entryLength); @@ -291,8 +294,11 @@ static NV_STATUS _issueRpcLarge remainingSize -= entryLength; pBuf8 += entryLength; + // // Copy the remaining buffers - entryLength = pRpc->maxRpcSize - sizeof(rpc_message_header_v); + // Temporary black magic WAR for bug 3594082: reducing the size by 1 + // + entryLength = pRpc->maxRpcSize - sizeof(rpc_message_header_v) - 1; while (remainingSize != 0) { if (entryLength > remainingSize) diff --git a/src/nvidia/src/kernel/diagnostics/nvlog.c b/src/nvidia/src/kernel/diagnostics/nvlog.c index 677d3726e..50638bf72 100644 --- a/src/nvidia/src/kernel/diagnostics/nvlog.c +++ b/src/nvidia/src/kernel/diagnostics/nvlog.c @@ -103,7 +103,7 @@ nvlogDestroy() tlsShutdown(); for (i = 0; i < NVLOG_MAX_BUFFERS; i++) { - nvlogDeallocBuffer(i); + nvlogDeallocBuffer(i, NV_TRUE); } if (NvLogLogger.mainLock != NULL) { @@ -261,7 +261,8 @@ nvlogAllocBuffer void nvlogDeallocBuffer ( - NVLOG_BUFFER_HANDLE hBuffer + NVLOG_BUFFER_HANDLE hBuffer, + NvBool bDeallocPreserved ) { NVLOG_BUFFER *pBuffer; @@ -271,6 +272,12 @@ nvlogDeallocBuffer pBuffer = NvLogLogger.pBuffers[hBuffer]; + if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pBuffer->flags) && + !bDeallocPreserved) + { + return; + } + pBuffer->flags = FLD_SET_DRF(LOG_BUFFER, _FLAGS, _DISABLED, _YES, pBuffer->flags); diff --git a/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c b/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c index 664049cd7..2fab6ca47 100644 --- a/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c +++ b/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c @@ -2502,15 +2502,19 @@ kbusFlushSingle_GM107 if (IS_GSP_CLIENT(pGpu)) { // - // on GSP client, we only support PCIE_READ to do flush - // a sysmembar flush should call kbusSendSysmembarSingle_HAL explicitly + // on GSP client, we should use PCIE_READ to do video memory flush. + // A sysmembar flush that touches registers is done through RPC and has + // lower effeciency. For cases where it needs sysmembar, the caller site + // should use kbusSendSysmembarSingle_HAL explicitly. // - NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_PATH); - } - else - { - return kbusSendSysmembarSingle_HAL(pGpu, pKernelBus); + NV_ASSERT(0); + + // This will dump a stack trace to assist debug on certain + // platforms. + osAssertFailed(); } + + return kbusSendSysmembarSingle_HAL(pGpu, pKernelBus); } } diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c index 8f880eea7..8e0b03ceb 100644 --- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c +++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c @@ -3750,6 +3750,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL Memory *pMemory; ContextDma *pContextDma; NvU32 addressSpace; + NvU64 notificationBufferSize; NV_STATUS status; hNotifier = pKernelChannel->hErrorContext; @@ -3758,6 +3759,8 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL NV_CHECK_OR_RETURN(LEVEL_INFO, index != NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR, NV_ERR_INVALID_ARGUMENT); + notificationBufferSize = (index + 1) * sizeof(NvNotification); + status = deviceGetByInstance(pClient, gpuGetDeviceInstance(pGpu), &pDevice); if (status != NV_OK) return NV_ERR_INVALID_DEVICE; @@ -3766,7 +3769,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL { addressSpace = memdescGetAddressSpace(pMemory->pMemDesc); - NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= ((index + 1) * sizeof(NvNotification)), + NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= notificationBufferSize, NV_ERR_OUT_OF_RANGE); switch (addressSpace) { @@ -3784,7 +3787,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL &pDmaMappingInfo), NV_ERR_GENERIC); - NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= ((index + 1) * sizeof(NvNotification)), + NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= notificationBufferSize, NV_ERR_OUT_OF_RANGE); break; } @@ -3799,7 +3802,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL } else if (NV_OK == ctxdmaGetByHandle(pClient, hNotifier, &pContextDma)) { - NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (((index + 1) * sizeof(NvNotification)) - 1), + NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (notificationBufferSize - 1), NV_ERR_OUT_OF_RANGE); } else diff --git a/src/nvidia/src/kernel/gpu/gpu.c b/src/nvidia/src/kernel/gpu/gpu.c index a8aee3676..275fcb0c5 100644 --- a/src/nvidia/src/kernel/gpu/gpu.c +++ b/src/nvidia/src/kernel/gpu/gpu.c @@ -1923,26 +1923,6 @@ gpuStatePreInit_IMPL } } - pGpu->boardInfo = portMemAllocNonPaged(sizeof(*pGpu->boardInfo)); - if (pGpu->boardInfo) - { - // To avoid potential race of xid reporting with the control, zero it out - portMemSet(pGpu->boardInfo, '\0', sizeof(*pGpu->boardInfo)); - - RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); - - if (pRmApi->Control(pRmApi, - pGpu->hInternalClient, - pGpu->hInternalSubdevice, - NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO, - pGpu->boardInfo, - sizeof(*pGpu->boardInfo)) != NV_OK) - { - portMemFree(pGpu->boardInfo); - pGpu->boardInfo = NULL; - } - } - return rmStatus; } @@ -2291,6 +2271,26 @@ gpuStatePostLoad goto gpuStatePostLoad_exit; } + pGpu->boardInfo = portMemAllocNonPaged(sizeof(*pGpu->boardInfo)); + if (pGpu->boardInfo) + { + // To avoid potential race of xid reporting with the control, zero it out + portMemSet(pGpu->boardInfo, '\0', sizeof(*pGpu->boardInfo)); + + RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); + + if(pRmApi->Control(pRmApi, + pGpu->hInternalClient, + pGpu->hInternalSubdevice, + NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO, + pGpu->boardInfo, + sizeof(*pGpu->boardInfo)) != NV_OK) + { + portMemFree(pGpu->boardInfo); + pGpu->boardInfo = NULL; + } + } + gpuStatePostLoad_exit: return rmStatus; } @@ -2326,6 +2326,9 @@ gpuStatePreUnload NvU32 curEngDescIdx; NV_STATUS rmStatus = NV_OK; + portMemFree(pGpu->boardInfo); + pGpu->boardInfo = NULL; + engDescriptorList = gpuGetUnloadEngineDescriptors(pGpu); numEngDescriptors = gpuGetNumEngDescriptors(pGpu); @@ -2648,9 +2651,6 @@ gpuStateDestroy_IMPL _gpuFreeInternalObjects(pGpu); gpuDestroyGenericKernelFalconList(pGpu); - portMemFree(pGpu->boardInfo); - pGpu->boardInfo = NULL; - portMemFree(pGpu->gspSupportedEngines); pGpu->gspSupportedEngines = NULL; diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c index 6c1d976c6..53961a1be 100644 --- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c +++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c @@ -1047,7 +1047,7 @@ _kgspInitLibosLoggingStructures // // Setup logging memory for each task. - // Use MEMDESC_FLAGS_CPU_ONLY -- to early to call memdescMapIommu. + // Use MEMDESC_FLAGS_CPU_ONLY -- too early to call memdescMapIommu. // NV_ASSERT_OK_OR_GOTO(nvStatus, memdescCreate(&pLog->pTaskLogDescriptor, @@ -1258,6 +1258,8 @@ kgspInitRm_IMPL return NV_ERR_INVALID_ARGUMENT; } + pKernelGsp->bInInit = NV_TRUE; + // Need to hold the GPU instance lock in order to write to the RPC queue NV_ASSERT_OK_OR_GOTO(status, rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE, @@ -1278,7 +1280,7 @@ kgspInitRm_IMPL { KernelGspVbiosImg *pVbiosImg = NULL; - // Try and extract a VBIOS image. + // Try and extract a VBIOS image. status = kgspExtractVbiosFromRom_HAL(pGpu, pKernelGsp, &pVbiosImg); if (status == NV_OK) @@ -1403,6 +1405,14 @@ kgspInitRm_IMPL NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspStartLogPolling(pGpu, pKernelGsp), done); done: + pKernelGsp->bInInit = NV_FALSE; + + if (status != NV_OK) + { + // Preserve any captured gsp-rm logs + libosPreserveLogs(&pKernelGsp->logDecode); + } + if (gpusLockedMask != 0) { rmGpuGroupLockRelease(gpusLockedMask, GPUS_LOCK_FLAGS_NONE); @@ -1520,7 +1530,7 @@ kgspDumpGspLogs_IMPL NvBool bSyncNvLog ) { - if (pKernelGsp->pLogElf || bSyncNvLog) + if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog) libosExtractLogs(&pKernelGsp->logDecode, bSyncNvLog); } diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c index 005888560..a2c742b58 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c @@ -1959,6 +1959,7 @@ memmgrFillComprInfo_IMPL { const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)); + NvU32 size; portMemSet(pComprInfo, 0, sizeof(*pComprInfo)); @@ -1969,10 +1970,12 @@ memmgrFillComprInfo_IMPL NV_ASSERT(compTagStartOffset != ~(NvU32)0); + size = pageSize * pageCount; + pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift; pComprInfo->compTagLineMin = compTagStartOffset; pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift); - pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift); + pComprInfo->compPageIndexHi = (NvU32)((surfOffset + size - 1) >> pComprInfo->compPageShift); pComprInfo->compTagLineMultiplier = 1; return NV_OK; diff --git a/src/nvidia/src/kernel/gpu_mgr/gpu_mgr.c b/src/nvidia/src/kernel/gpu_mgr/gpu_mgr.c index 5c342d9ce..d77843220 100644 --- a/src/nvidia/src/kernel/gpu_mgr/gpu_mgr.c +++ b/src/nvidia/src/kernel/gpu_mgr/gpu_mgr.c @@ -751,6 +751,8 @@ NvBool gpumgrIsDeviceRmFirmwareCapable 0x2236, // A10 SKU215 Pris-24 0x2237, // A10G SKU215 Pris-24 0x25B6, // A16 + 0x20F5, // A800-80 + 0x20F6, // A800-40 }; NvU32 count = NV_ARRAY_ELEMENTS(defaultGspRmGpus); NvU32 i;