| File: | dev/pci/drm/amd/amdgpu/amdgpu_gfx.c |
| Warning: | line 1008, column 12 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | |||
| 2 | * Copyright 2014 Advanced Micro Devices, Inc. | |||
| 3 | * Copyright 2008 Red Hat Inc. | |||
| 4 | * Copyright 2009 Jerome Glisse. | |||
| 5 | * | |||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
| 7 | * copy of this software and associated documentation files (the "Software"), | |||
| 8 | * to deal in the Software without restriction, including without limitation | |||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | |||
| 11 | * Software is furnished to do so, subject to the following conditions: | |||
| 12 | * | |||
| 13 | * The above copyright notice and this permission notice shall be included in | |||
| 14 | * all copies or substantial portions of the Software. | |||
| 15 | * | |||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| 19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
| 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
| 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
| 22 | * OTHER DEALINGS IN THE SOFTWARE. | |||
| 23 | * | |||
| 24 | */ | |||
| 25 | ||||
| 26 | #include <linux/firmware.h> | |||
| 27 | #include "amdgpu.h" | |||
| 28 | #include "amdgpu_gfx.h" | |||
| 29 | #include "amdgpu_rlc.h" | |||
| 30 | #include "amdgpu_ras.h" | |||
| 31 | ||||
| 32 | /* delay 0.1 second to enable gfx off feature */ | |||
| 33 | #define GFX_OFF_DELAY_ENABLE(((uint64_t)(100)) * hz / 1000) msecs_to_jiffies(100)(((uint64_t)(100)) * hz / 1000) | |||
| 34 | ||||
| 35 | #define GFX_OFF_NO_DELAY0 0 | |||
| 36 | ||||
| 37 | /* | |||
| 38 | * GPU GFX IP block helpers function. | |||
| 39 | */ | |||
| 40 | ||||
| 41 | int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, | |||
| 42 | int pipe, int queue) | |||
| 43 | { | |||
| 44 | int bit = 0; | |||
| 45 | ||||
| 46 | bit += mec * adev->gfx.mec.num_pipe_per_mec | |||
| 47 | * adev->gfx.mec.num_queue_per_pipe; | |||
| 48 | bit += pipe * adev->gfx.mec.num_queue_per_pipe; | |||
| 49 | bit += queue; | |||
| 50 | ||||
| 51 | return bit; | |||
| 52 | } | |||
| 53 | ||||
| 54 | void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, | |||
| 55 | int *mec, int *pipe, int *queue) | |||
| 56 | { | |||
| 57 | *queue = bit % adev->gfx.mec.num_queue_per_pipe; | |||
| 58 | *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) | |||
| 59 | % adev->gfx.mec.num_pipe_per_mec; | |||
| 60 | *mec = (bit / adev->gfx.mec.num_queue_per_pipe) | |||
| 61 | / adev->gfx.mec.num_pipe_per_mec; | |||
| 62 | ||||
| 63 | } | |||
| 64 | ||||
| 65 | bool_Bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, | |||
| 66 | int mec, int pipe, int queue) | |||
| 67 | { | |||
| 68 | return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), | |||
| 69 | adev->gfx.mec.queue_bitmap); | |||
| 70 | } | |||
| 71 | ||||
| 72 | int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, | |||
| 73 | int me, int pipe, int queue) | |||
| 74 | { | |||
| 75 | int bit = 0; | |||
| 76 | ||||
| 77 | bit += me * adev->gfx.me.num_pipe_per_me | |||
| 78 | * adev->gfx.me.num_queue_per_pipe; | |||
| 79 | bit += pipe * adev->gfx.me.num_queue_per_pipe; | |||
| 80 | bit += queue; | |||
| 81 | ||||
| 82 | return bit; | |||
| 83 | } | |||
| 84 | ||||
| 85 | void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, | |||
| 86 | int *me, int *pipe, int *queue) | |||
| 87 | { | |||
| 88 | *queue = bit % adev->gfx.me.num_queue_per_pipe; | |||
| 89 | *pipe = (bit / adev->gfx.me.num_queue_per_pipe) | |||
| 90 | % adev->gfx.me.num_pipe_per_me; | |||
| 91 | *me = (bit / adev->gfx.me.num_queue_per_pipe) | |||
| 92 | / adev->gfx.me.num_pipe_per_me; | |||
| 93 | } | |||
| 94 | ||||
| 95 | bool_Bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, | |||
| 96 | int me, int pipe, int queue) | |||
| 97 | { | |||
| 98 | return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), | |||
| 99 | adev->gfx.me.queue_bitmap); | |||
| 100 | } | |||
| 101 | ||||
| 102 | /** | |||
| 103 | * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter | |||
| 104 | * | |||
| 105 | * @mask: array in which the per-shader array disable masks will be stored | |||
| 106 | * @max_se: number of SEs | |||
| 107 | * @max_sh: number of SHs | |||
| 108 | * | |||
| 109 | * The bitmask of CUs to be disabled in the shader array determined by se and | |||
| 110 | * sh is stored in mask[se * max_sh + sh]. | |||
| 111 | */ | |||
| 112 | void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh) | |||
| 113 | { | |||
| 114 | unsigned se, sh, cu; | |||
| 115 | const char *p; | |||
| 116 | ||||
| 117 | memset(mask, 0, sizeof(*mask) * max_se * max_sh)__builtin_memset((mask), (0), (sizeof(*mask) * max_se * max_sh )); | |||
| 118 | ||||
| 119 | if (!amdgpu_disable_cu || !*amdgpu_disable_cu) | |||
| 120 | return; | |||
| 121 | ||||
| 122 | #ifdef notyet | |||
| 123 | p = amdgpu_disable_cu; | |||
| 124 | for (;;) { | |||
| 125 | char *next; | |||
| 126 | int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); | |||
| 127 | if (ret < 3) { | |||
| 128 | DRM_ERROR("amdgpu: could not parse disable_cu\n")__drm_err("amdgpu: could not parse disable_cu\n"); | |||
| 129 | return; | |||
| 130 | } | |||
| 131 | ||||
| 132 | if (se < max_se && sh < max_sh && cu < 16) { | |||
| 133 | DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu)printk("\0016" "[" "drm" "] " "amdgpu: disabling CU %u.%u.%u\n" , se, sh, cu); | |||
| 134 | mask[se * max_sh + sh] |= 1u << cu; | |||
| 135 | } else { | |||
| 136 | DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",__drm_err("amdgpu: disable_cu %u.%u.%u is out of range\n", se , sh, cu) | |||
| 137 | se, sh, cu)__drm_err("amdgpu: disable_cu %u.%u.%u is out of range\n", se , sh, cu); | |||
| 138 | } | |||
| 139 | ||||
| 140 | next = strchr(p, ','); | |||
| 141 | if (!next) | |||
| 142 | break; | |||
| 143 | p = next + 1; | |||
| 144 | } | |||
| 145 | #endif | |||
| 146 | } | |||
| 147 | ||||
| 148 | static bool_Bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) | |||
| 149 | { | |||
| 150 | return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; | |||
| 151 | } | |||
| 152 | ||||
| 153 | static bool_Bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) | |||
| 154 | { | |||
| 155 | if (amdgpu_compute_multipipe != -1) { | |||
| 156 | DRM_INFO("amdgpu: forcing compute pipe policy %d\n",printk("\0016" "[" "drm" "] " "amdgpu: forcing compute pipe policy %d\n" , amdgpu_compute_multipipe) | |||
| 157 | amdgpu_compute_multipipe)printk("\0016" "[" "drm" "] " "amdgpu: forcing compute pipe policy %d\n" , amdgpu_compute_multipipe); | |||
| 158 | return amdgpu_compute_multipipe == 1; | |||
| 159 | } | |||
| 160 | ||||
| 161 | if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)(((9) << 16) | ((0) << 8) | (0))) | |||
| 162 | return true1; | |||
| 163 | ||||
| 164 | /* FIXME: spreading the queues across pipes causes perf regressions | |||
| 165 | * on POLARIS11 compute workloads */ | |||
| 166 | if (adev->asic_type == CHIP_POLARIS11) | |||
| 167 | return false0; | |||
| 168 | ||||
| 169 | return adev->gfx.mec.num_mec > 1; | |||
| 170 | } | |||
| 171 | ||||
| 172 | bool_Bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, | |||
| 173 | struct amdgpu_ring *ring) | |||
| 174 | { | |||
| 175 | int queue = ring->queue; | |||
| 176 | int pipe = ring->pipe; | |||
| 177 | ||||
| 178 | /* Policy: use pipe1 queue0 as high priority graphics queue if we | |||
| 179 | * have more than one gfx pipe. | |||
| 180 | */ | |||
| 181 | if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && | |||
| 182 | adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { | |||
| 183 | int me = ring->me; | |||
| 184 | int bit; | |||
| 185 | ||||
| 186 | bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); | |||
| 187 | if (ring == &adev->gfx.gfx_ring[bit]) | |||
| 188 | return true1; | |||
| 189 | } | |||
| 190 | ||||
| 191 | return false0; | |||
| 192 | } | |||
| 193 | ||||
| 194 | bool_Bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, | |||
| 195 | struct amdgpu_ring *ring) | |||
| 196 | { | |||
| 197 | /* Policy: use 1st queue as high priority compute queue if we | |||
| 198 | * have more than one compute queue. | |||
| 199 | */ | |||
| 200 | if (adev->gfx.num_compute_rings > 1 && | |||
| 201 | ring == &adev->gfx.compute_ring[0]) | |||
| 202 | return true1; | |||
| 203 | ||||
| 204 | return false0; | |||
| 205 | } | |||
| 206 | ||||
| 207 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) | |||
| 208 | { | |||
| 209 | int i, queue, pipe; | |||
| 210 | bool_Bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); | |||
| 211 | int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *(((adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe )<(adev->gfx.num_compute_rings))?(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe):(adev->gfx.num_compute_rings )) | |||
| 212 | adev->gfx.mec.num_queue_per_pipe,(((adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe )<(adev->gfx.num_compute_rings))?(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe):(adev->gfx.num_compute_rings )) | |||
| 213 | adev->gfx.num_compute_rings)(((adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe )<(adev->gfx.num_compute_rings))?(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe):(adev->gfx.num_compute_rings )); | |||
| 214 | ||||
| 215 | if (multipipe_policy) { | |||
| 216 | /* policy: make queues evenly cross all pipes on MEC1 only */ | |||
| 217 | for (i = 0; i < max_queues_per_mec; i++) { | |||
| 218 | pipe = i % adev->gfx.mec.num_pipe_per_mec; | |||
| 219 | queue = (i / adev->gfx.mec.num_pipe_per_mec) % | |||
| 220 | adev->gfx.mec.num_queue_per_pipe; | |||
| 221 | ||||
| 222 | set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, | |||
| 223 | adev->gfx.mec.queue_bitmap); | |||
| 224 | } | |||
| 225 | } else { | |||
| 226 | /* policy: amdgpu owns all queues in the given pipe */ | |||
| 227 | for (i = 0; i < max_queues_per_mec; ++i) | |||
| 228 | set_bit(i, adev->gfx.mec.queue_bitmap); | |||
| 229 | } | |||
| 230 | ||||
| 231 | dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES))do { } while(0); | |||
| 232 | } | |||
| 233 | ||||
| 234 | void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) | |||
| 235 | { | |||
| 236 | int i, queue, pipe; | |||
| 237 | bool_Bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); | |||
| 238 | int max_queues_per_me = adev->gfx.me.num_pipe_per_me * | |||
| 239 | adev->gfx.me.num_queue_per_pipe; | |||
| 240 | ||||
| 241 | if (multipipe_policy) { | |||
| 242 | /* policy: amdgpu owns the first queue per pipe at this stage | |||
| 243 | * will extend to mulitple queues per pipe later */ | |||
| 244 | for (i = 0; i < max_queues_per_me; i++) { | |||
| 245 | pipe = i % adev->gfx.me.num_pipe_per_me; | |||
| 246 | queue = (i / adev->gfx.me.num_pipe_per_me) % | |||
| 247 | adev->gfx.me.num_queue_per_pipe; | |||
| 248 | ||||
| 249 | set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, | |||
| 250 | adev->gfx.me.queue_bitmap); | |||
| 251 | } | |||
| 252 | } else { | |||
| 253 | for (i = 0; i < max_queues_per_me; ++i) | |||
| 254 | set_bit(i, adev->gfx.me.queue_bitmap); | |||
| 255 | } | |||
| 256 | ||||
| 257 | /* update the number of active graphics rings */ | |||
| 258 | adev->gfx.num_gfx_rings = | |||
| 259 | bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES128); | |||
| 260 | } | |||
| 261 | ||||
| 262 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, | |||
| 263 | struct amdgpu_ring *ring) | |||
| 264 | { | |||
| 265 | int queue_bit; | |||
| 266 | int mec, pipe, queue; | |||
| 267 | ||||
| 268 | queue_bit = adev->gfx.mec.num_mec | |||
| 269 | * adev->gfx.mec.num_pipe_per_mec | |||
| 270 | * adev->gfx.mec.num_queue_per_pipe; | |||
| 271 | ||||
| 272 | while (--queue_bit >= 0) { | |||
| 273 | if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) | |||
| 274 | continue; | |||
| 275 | ||||
| 276 | amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); | |||
| 277 | ||||
| 278 | /* | |||
| 279 | * 1. Using pipes 2/3 from MEC 2 seems cause problems. | |||
| 280 | * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN | |||
| 281 | * only can be issued on queue 0. | |||
| 282 | */ | |||
| 283 | if ((mec == 1 && pipe > 1) || queue != 0) | |||
| 284 | continue; | |||
| 285 | ||||
| 286 | ring->me = mec + 1; | |||
| 287 | ring->pipe = pipe; | |||
| 288 | ring->queue = queue; | |||
| 289 | ||||
| 290 | return 0; | |||
| 291 | } | |||
| 292 | ||||
| 293 | dev_err(adev->dev, "Failed to find a queue for KIQ\n")printf("drm:pid%d:%s *ERROR* " "Failed to find a queue for KIQ\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); | |||
| 294 | return -EINVAL22; | |||
| 295 | } | |||
| 296 | ||||
| 297 | int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, | |||
| 298 | struct amdgpu_ring *ring, | |||
| 299 | struct amdgpu_irq_src *irq) | |||
| 300 | { | |||
| 301 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 302 | int r = 0; | |||
| 303 | ||||
| 304 | mtx_init(&kiq->ring_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&kiq-> ring_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); | |||
| 305 | ||||
| 306 | ring->adev = NULL((void *)0); | |||
| 307 | ring->ring_obj = NULL((void *)0); | |||
| 308 | ring->use_doorbell = true1; | |||
| 309 | ring->doorbell_index = adev->doorbell_index.kiq; | |||
| 310 | ||||
| 311 | r = amdgpu_gfx_kiq_acquire(adev, ring); | |||
| 312 | if (r) | |||
| 313 | return r; | |||
| 314 | ||||
| 315 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | |||
| 316 | ring->no_scheduler = true1; | |||
| 317 | snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); | |||
| 318 | r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, | |||
| 319 | AMDGPU_RING_PRIO_DEFAULT, NULL((void *)0)); | |||
| 320 | if (r) | |||
| 321 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r)printf("drm:pid%d:%s *WARNING* " "(%d) failed to init kiq ring\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
| 322 | ||||
| 323 | return r; | |||
| 324 | } | |||
| 325 | ||||
| 326 | void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) | |||
| 327 | { | |||
| 328 | amdgpu_ring_fini(ring); | |||
| 329 | } | |||
| 330 | ||||
| 331 | void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) | |||
| 332 | { | |||
| 333 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 334 | ||||
| 335 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL((void *)0)); | |||
| 336 | } | |||
| 337 | ||||
| 338 | int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, | |||
| 339 | unsigned hpd_size) | |||
| 340 | { | |||
| 341 | int r; | |||
| 342 | u32 *hpd; | |||
| 343 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 344 | ||||
| 345 | r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE(1 << 12), | |||
| 346 | AMDGPU_GEM_DOMAIN_GTT0x2, &kiq->eop_obj, | |||
| 347 | &kiq->eop_gpu_addr, (void **)&hpd); | |||
| 348 | if (r) { | |||
| 349 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r)printf("drm:pid%d:%s *WARNING* " "failed to create KIQ bo (%d).\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
| 350 | return r; | |||
| 351 | } | |||
| 352 | ||||
| 353 | memset(hpd, 0, hpd_size)__builtin_memset((hpd), (0), (hpd_size)); | |||
| 354 | ||||
| 355 | r = amdgpu_bo_reserve(kiq->eop_obj, true1); | |||
| 356 | if (unlikely(r != 0)__builtin_expect(!!(r != 0), 0)) | |||
| 357 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r)printf("drm:pid%d:%s *WARNING* " "(%d) reserve kiq eop bo failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
| 358 | amdgpu_bo_kunmap(kiq->eop_obj); | |||
| 359 | amdgpu_bo_unreserve(kiq->eop_obj); | |||
| 360 | ||||
| 361 | return 0; | |||
| 362 | } | |||
| 363 | ||||
| 364 | /* create MQD for each compute/gfx queue */ | |||
| 365 | int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, | |||
| 366 | unsigned mqd_size) | |||
| 367 | { | |||
| 368 | struct amdgpu_ring *ring = NULL((void *)0); | |||
| 369 | int r, i; | |||
| 370 | ||||
| 371 | /* create MQD for KIQ */ | |||
| 372 | ring = &adev->gfx.kiq.ring; | |||
| 373 | if (!adev->enable_mes_kiq && !ring->mqd_obj) { | |||
| 374 | /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must | |||
| 375 | * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD | |||
| 376 | * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for | |||
| 377 | * KIQ MQD no matter SRIOV or Bare-metal | |||
| 378 | */ | |||
| 379 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), | |||
| 380 | AMDGPU_GEM_DOMAIN_VRAM0x4, &ring->mqd_obj, | |||
| 381 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |||
| 382 | if (r) { | |||
| 383 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create ring mqd ob (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
| 384 | return r; | |||
| 385 | } | |||
| 386 | ||||
| 387 | /* prepare MQD backup */ | |||
| 388 | adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS8] = kmalloc(mqd_size, GFP_KERNEL(0x0001 | 0x0004)); | |||
| 389 | if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS8]) | |||
| 390 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name)printf("drm:pid%d:%s *WARNING* " "no memory to create MQD backup for ring %s\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , ring-> name); | |||
| 391 | } | |||
| 392 | ||||
| 393 | if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { | |||
| 394 | /* create MQD for each KGQ */ | |||
| 395 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |||
| 396 | ring = &adev->gfx.gfx_ring[i]; | |||
| 397 | if (!ring->mqd_obj) { | |||
| 398 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), | |||
| 399 | AMDGPU_GEM_DOMAIN_GTT0x2, &ring->mqd_obj, | |||
| 400 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |||
| 401 | if (r) { | |||
| 402 | dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create ring mqd bo (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
| 403 | return r; | |||
| 404 | } | |||
| 405 | ||||
| 406 | /* prepare MQD backup */ | |||
| 407 | adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL(0x0001 | 0x0004)); | |||
| 408 | if (!adev->gfx.me.mqd_backup[i]) | |||
| 409 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name)printf("drm:pid%d:%s *WARNING* " "no memory to create MQD backup for ring %s\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , ring-> name); | |||
| 410 | } | |||
| 411 | } | |||
| 412 | } | |||
| 413 | ||||
| 414 | /* create MQD for each KCQ */ | |||
| 415 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |||
| 416 | ring = &adev->gfx.compute_ring[i]; | |||
| 417 | if (!ring->mqd_obj) { | |||
| 418 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), | |||
| 419 | AMDGPU_GEM_DOMAIN_GTT0x2, &ring->mqd_obj, | |||
| 420 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |||
| 421 | if (r) { | |||
| 422 | dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create ring mqd bo (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
| 423 | return r; | |||
| 424 | } | |||
| 425 | ||||
| 426 | /* prepare MQD backup */ | |||
| 427 | adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL(0x0001 | 0x0004)); | |||
| 428 | if (!adev->gfx.mec.mqd_backup[i]) | |||
| 429 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name)printf("drm:pid%d:%s *WARNING* " "no memory to create MQD backup for ring %s\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , ring-> name); | |||
| 430 | } | |||
| 431 | } | |||
| 432 | ||||
| 433 | return 0; | |||
| 434 | } | |||
| 435 | ||||
| 436 | void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) | |||
| 437 | { | |||
| 438 | struct amdgpu_ring *ring = NULL((void *)0); | |||
| 439 | int i; | |||
| 440 | ||||
| 441 | if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { | |||
| 442 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |||
| 443 | ring = &adev->gfx.gfx_ring[i]; | |||
| 444 | kfree(adev->gfx.me.mqd_backup[i]); | |||
| 445 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |||
| 446 | &ring->mqd_gpu_addr, | |||
| 447 | &ring->mqd_ptr); | |||
| 448 | } | |||
| 449 | } | |||
| 450 | ||||
| 451 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |||
| 452 | ring = &adev->gfx.compute_ring[i]; | |||
| 453 | kfree(adev->gfx.mec.mqd_backup[i]); | |||
| 454 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |||
| 455 | &ring->mqd_gpu_addr, | |||
| 456 | &ring->mqd_ptr); | |||
| 457 | } | |||
| 458 | ||||
| 459 | ring = &adev->gfx.kiq.ring; | |||
| 460 | kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS8]); | |||
| 461 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |||
| 462 | &ring->mqd_gpu_addr, | |||
| 463 | &ring->mqd_ptr); | |||
| 464 | } | |||
| 465 | ||||
| 466 | int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) | |||
| 467 | { | |||
| 468 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 469 | struct amdgpu_ring *kiq_ring = &kiq->ring; | |||
| 470 | int i, r = 0; | |||
| 471 | ||||
| 472 | if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) | |||
| 473 | return -EINVAL22; | |||
| 474 | ||||
| 475 | spin_lock(&adev->gfx.kiq.ring_lock)mtx_enter(&adev->gfx.kiq.ring_lock); | |||
| 476 | if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * | |||
| 477 | adev->gfx.num_compute_rings)) { | |||
| 478 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
| 479 | return -ENOMEM12; | |||
| 480 | } | |||
| 481 | ||||
| 482 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | |||
| 483 | kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], | |||
| 484 | RESET_QUEUES, 0, 0); | |||
| 485 | ||||
| 486 | if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) | |||
| 487 | r = amdgpu_ring_test_helper(kiq_ring); | |||
| 488 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
| 489 | ||||
| 490 | return r; | |||
| 491 | } | |||
| 492 | ||||
| 493 | int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, | |||
| 494 | int queue_bit) | |||
| 495 | { | |||
| 496 | int mec, pipe, queue; | |||
| 497 | int set_resource_bit = 0; | |||
| 498 | ||||
| 499 | amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); | |||
| 500 | ||||
| 501 | set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; | |||
| 502 | ||||
| 503 | return set_resource_bit; | |||
| 504 | } | |||
| 505 | ||||
| 506 | int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) | |||
| 507 | { | |||
| 508 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 509 | struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; | |||
| 510 | uint64_t queue_mask = 0; | |||
| 511 | int r, i; | |||
| 512 | ||||
| 513 | if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) | |||
| 514 | return -EINVAL22; | |||
| 515 | ||||
| 516 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES128; ++i) { | |||
| 517 | if (!test_bit(i, adev->gfx.mec.queue_bitmap)) | |||
| 518 | continue; | |||
| 519 | ||||
| 520 | /* This situation may be hit in the future if a new HW | |||
| 521 | * generation exposes more than 64 queues. If so, the | |||
| 522 | * definition of queue_mask needs updating */ | |||
| 523 | if (WARN_ON(i > (sizeof(queue_mask)*8))({ int __ret = !!(i > (sizeof(queue_mask)*8)); if (__ret) printf ("WARNING %s failed at %s:%d\n", "i > (sizeof(queue_mask)*8)" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c", 523); __builtin_expect (!!(__ret), 0); })) { | |||
| 524 | DRM_ERROR("Invalid KCQ enabled: %d\n", i)__drm_err("Invalid KCQ enabled: %d\n", i); | |||
| 525 | break; | |||
| 526 | } | |||
| 527 | ||||
| 528 | queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); | |||
| 529 | } | |||
| 530 | ||||
| 531 | DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,printk("\0016" "[" "drm" "] " "kiq ring mec %d pipe %d q %d\n" , kiq_ring->me, kiq_ring->pipe, kiq_ring->queue) | |||
| 532 | kiq_ring->queue)printk("\0016" "[" "drm" "] " "kiq ring mec %d pipe %d q %d\n" , kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); | |||
| 533 | spin_lock(&adev->gfx.kiq.ring_lock)mtx_enter(&adev->gfx.kiq.ring_lock); | |||
| 534 | r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * | |||
| 535 | adev->gfx.num_compute_rings + | |||
| 536 | kiq->pmf->set_resources_size); | |||
| 537 | if (r) { | |||
| 538 | DRM_ERROR("Failed to lock KIQ (%d).\n", r)__drm_err("Failed to lock KIQ (%d).\n", r); | |||
| 539 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
| 540 | return r; | |||
| 541 | } | |||
| 542 | ||||
| 543 | if (adev->enable_mes) | |||
| 544 | queue_mask = ~0ULL; | |||
| 545 | ||||
| 546 | kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); | |||
| 547 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | |||
| 548 | kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); | |||
| 549 | ||||
| 550 | r = amdgpu_ring_test_helper(kiq_ring); | |||
| 551 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
| 552 | if (r) | |||
| 553 | DRM_ERROR("KCQ enable failed\n")__drm_err("KCQ enable failed\n"); | |||
| 554 | ||||
| 555 | return r; | |||
| 556 | } | |||
| 557 | ||||
| 558 | /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable | |||
| 559 | * | |||
| 560 | * @adev: amdgpu_device pointer | |||
| 561 | * @bool enable true: enable gfx off feature, false: disable gfx off feature | |||
| 562 | * | |||
| 563 | * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. | |||
| 564 | * 2. other client can send request to disable gfx off feature, the request should be honored. | |||
| 565 | * 3. other client can cancel their request of disable gfx off feature | |||
| 566 | * 4. other client should not send request to enable gfx off feature before disable gfx off feature. | |||
| 567 | */ | |||
| 568 | ||||
| 569 | void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool_Bool enable) | |||
| 570 | { | |||
| 571 | unsigned long delay = GFX_OFF_DELAY_ENABLE(((uint64_t)(100)) * hz / 1000); | |||
| 572 | ||||
| 573 | if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) | |||
| 574 | return; | |||
| 575 | ||||
| 576 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
| 577 | ||||
| 578 | if (enable) { | |||
| 579 | /* If the count is already 0, it means there's an imbalance bug somewhere. | |||
| 580 | * Note that the bug may be in a different caller than the one which triggers the | |||
| 581 | * WARN_ON_ONCE. | |||
| 582 | */ | |||
| 583 | if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)({ static int __warned; int __ret = !!(adev->gfx.gfx_off_req_count == 0); if (__ret && !__warned) { printf("WARNING %s failed at %s:%d\n" , "adev->gfx.gfx_off_req_count == 0", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c" , 583); __warned = 1; } __builtin_expect(!!(__ret), 0); })) | |||
| 584 | goto unlock; | |||
| 585 | ||||
| 586 | adev->gfx.gfx_off_req_count--; | |||
| 587 | ||||
| 588 | if (adev->gfx.gfx_off_req_count == 0 && | |||
| 589 | !adev->gfx.gfx_off_state) { | |||
| 590 | schedule_delayed_work(&adev->gfx.gfx_off_delay_work, | |||
| 591 | delay); | |||
| 592 | } | |||
| 593 | } else { | |||
| 594 | if (adev->gfx.gfx_off_req_count == 0) { | |||
| 595 | cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); | |||
| 596 | ||||
| 597 | if (adev->gfx.gfx_off_state && | |||
| 598 | !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false0)) { | |||
| 599 | adev->gfx.gfx_off_state = false0; | |||
| 600 | ||||
| 601 | if (adev->gfx.funcs->init_spm_golden) { | |||
| 602 | dev_dbg(adev->dev,do { } while(0) | |||
| 603 | "GFXOFF is disabled, re-init SPM golden settings\n")do { } while(0); | |||
| 604 | amdgpu_gfx_init_spm_golden(adev)(adev)->gfx.funcs->init_spm_golden((adev)); | |||
| 605 | } | |||
| 606 | } | |||
| 607 | } | |||
| 608 | ||||
| 609 | adev->gfx.gfx_off_req_count++; | |||
| 610 | } | |||
| 611 | ||||
| 612 | unlock: | |||
| 613 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
| 614 | } | |||
| 615 | ||||
| 616 | int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool_Bool value) | |||
| 617 | { | |||
| 618 | int r = 0; | |||
| 619 | ||||
| 620 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
| 621 | ||||
| 622 | r = amdgpu_dpm_set_residency_gfxoff(adev, value); | |||
| 623 | ||||
| 624 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
| 625 | ||||
| 626 | return r; | |||
| 627 | } | |||
| 628 | ||||
| 629 | int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) | |||
| 630 | { | |||
| 631 | int r = 0; | |||
| 632 | ||||
| 633 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
| 634 | ||||
| 635 | r = amdgpu_dpm_get_residency_gfxoff(adev, value); | |||
| 636 | ||||
| 637 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
| 638 | ||||
| 639 | return r; | |||
| 640 | } | |||
| 641 | ||||
| 642 | int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) | |||
| 643 | { | |||
| 644 | int r = 0; | |||
| 645 | ||||
| 646 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
| 647 | ||||
| 648 | r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); | |||
| 649 | ||||
| 650 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
| 651 | ||||
| 652 | return r; | |||
| 653 | } | |||
| 654 | ||||
| 655 | int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) | |||
| 656 | { | |||
| 657 | ||||
| 658 | int r = 0; | |||
| 659 | ||||
| 660 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
| 661 | ||||
| 662 | r = amdgpu_dpm_get_status_gfxoff(adev, value); | |||
| 663 | ||||
| 664 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
| 665 | ||||
| 666 | return r; | |||
| 667 | } | |||
| 668 | ||||
| 669 | int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) | |||
| 670 | { | |||
| 671 | int r; | |||
| 672 | ||||
| 673 | if (amdgpu_ras_is_supported(adev, ras_block->block)) { | |||
| 674 | if (!amdgpu_persistent_edc_harvesting_supported(adev)) | |||
| 675 | amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); | |||
| 676 | ||||
| 677 | r = amdgpu_ras_block_late_init(adev, ras_block); | |||
| 678 | if (r) | |||
| 679 | return r; | |||
| 680 | ||||
| 681 | if (adev->gfx.cp_ecc_error_irq.funcs) { | |||
| 682 | r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); | |||
| 683 | if (r) | |||
| 684 | goto late_fini; | |||
| 685 | } | |||
| 686 | } else { | |||
| 687 | amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); | |||
| 688 | } | |||
| 689 | ||||
| 690 | return 0; | |||
| 691 | late_fini: | |||
| 692 | amdgpu_ras_block_late_fini(adev, ras_block); | |||
| 693 | return r; | |||
| 694 | } | |||
| 695 | ||||
| 696 | int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, | |||
| 697 | void *err_data, | |||
| 698 | struct amdgpu_iv_entry *entry) | |||
| 699 | { | |||
| 700 | /* TODO ue will trigger an interrupt. | |||
| 701 | * | |||
| 702 | * When “Full RAS” is enabled, the per-IP interrupt sources should | |||
| 703 | * be disabled and the driver should only look for the aggregated | |||
| 704 | * interrupt via sync flood | |||
| 705 | */ | |||
| 706 | if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { | |||
| 707 | kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); | |||
| 708 | if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops && | |||
| 709 | adev->gfx.ras->ras_block.hw_ops->query_ras_error_count) | |||
| 710 | adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); | |||
| 711 | amdgpu_ras_reset_gpu(adev); | |||
| 712 | } | |||
| 713 | return AMDGPU_RAS_SUCCESS; | |||
| 714 | } | |||
| 715 | ||||
| 716 | int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, | |||
| 717 | struct amdgpu_irq_src *source, | |||
| 718 | struct amdgpu_iv_entry *entry) | |||
| 719 | { | |||
| 720 | struct ras_common_if *ras_if = adev->gfx.ras_if; | |||
| 721 | struct ras_dispatch_if ih_data = { | |||
| 722 | .entry = entry, | |||
| 723 | }; | |||
| 724 | ||||
| 725 | if (!ras_if) | |||
| 726 | return 0; | |||
| 727 | ||||
| 728 | ih_data.head = *ras_if; | |||
| 729 | ||||
| 730 | DRM_ERROR("CP ECC ERROR IRQ\n")__drm_err("CP ECC ERROR IRQ\n"); | |||
| 731 | amdgpu_ras_interrupt_dispatch(adev, &ih_data); | |||
| 732 | return 0; | |||
| 733 | } | |||
| 734 | ||||
| 735 | uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) | |||
| 736 | { | |||
| 737 | signed long r, cnt = 0; | |||
| 738 | unsigned long flags; | |||
| 739 | uint32_t seq, reg_val_offs = 0, value = 0; | |||
| 740 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 741 | struct amdgpu_ring *ring = &kiq->ring; | |||
| 742 | ||||
| 743 | if (amdgpu_device_skip_hw_access(adev)) | |||
| 744 | return 0; | |||
| 745 | ||||
| 746 | if (adev->mes.ring.sched.ready) | |||
| 747 | return amdgpu_mes_rreg(adev, reg); | |||
| 748 | ||||
| 749 | BUG_ON(!ring->funcs->emit_rreg)((!(!ring->funcs->emit_rreg)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c", 749, "!(!ring->funcs->emit_rreg)" )); | |||
| 750 | ||||
| 751 | spin_lock_irqsave(&kiq->ring_lock, flags)do { flags = 0; mtx_enter(&kiq->ring_lock); } while (0 ); | |||
| 752 | if (amdgpu_device_wb_get(adev, ®_val_offs)) { | |||
| 753 | pr_err("critical bug! too many kiq readers\n")printk("\0013" "amdgpu: " "critical bug! too many kiq readers\n" ); | |||
| 754 | goto failed_unlock; | |||
| 755 | } | |||
| 756 | amdgpu_ring_alloc(ring, 32); | |||
| 757 | amdgpu_ring_emit_rreg(ring, reg, reg_val_offs)(ring)->funcs->emit_rreg((ring), (reg), (reg_val_offs)); | |||
| 758 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT5000); | |||
| 759 | if (r) | |||
| 760 | goto failed_undo; | |||
| 761 | ||||
| 762 | amdgpu_ring_commit(ring); | |||
| 763 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
| 764 | ||||
| 765 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
| 766 | ||||
| 767 | /* don't wait anymore for gpu reset case because this way may | |||
| 768 | * block gpu_recover() routine forever, e.g. this virt_kiq_rreg | |||
| 769 | * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will | |||
| 770 | * never return if we keep waiting in virt_kiq_rreg, which cause | |||
| 771 | * gpu_recover() hang there. | |||
| 772 | * | |||
| 773 | * also don't wait anymore for IRQ context | |||
| 774 | * */ | |||
| 775 | if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) | |||
| 776 | goto failed_kiq_read; | |||
| 777 | ||||
| 778 | might_sleep()assertwaitok(); | |||
| 779 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY1000) { | |||
| 780 | drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL)mdelay(5); | |||
| 781 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
| 782 | } | |||
| 783 | ||||
| 784 | if (cnt > MAX_KIQ_REG_TRY1000) | |||
| 785 | goto failed_kiq_read; | |||
| 786 | ||||
| 787 | mb()do { __asm volatile("mfence" ::: "memory"); } while (0); | |||
| 788 | value = adev->wb.wb[reg_val_offs]; | |||
| 789 | amdgpu_device_wb_free(adev, reg_val_offs); | |||
| 790 | return value; | |||
| 791 | ||||
| 792 | failed_undo: | |||
| 793 | amdgpu_ring_undo(ring); | |||
| 794 | failed_unlock: | |||
| 795 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
| 796 | failed_kiq_read: | |||
| 797 | if (reg_val_offs) | |||
| 798 | amdgpu_device_wb_free(adev, reg_val_offs); | |||
| 799 | dev_err(adev->dev, "failed to read reg:%x\n", reg)printf("drm:pid%d:%s *ERROR* " "failed to read reg:%x\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci ) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci; })->ci_curproc->p_p->ps_pid, __func__ , reg); | |||
| 800 | return ~0; | |||
| 801 | } | |||
| 802 | ||||
| 803 | void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) | |||
| 804 | { | |||
| 805 | signed long r, cnt = 0; | |||
| 806 | unsigned long flags; | |||
| 807 | uint32_t seq; | |||
| 808 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
| 809 | struct amdgpu_ring *ring = &kiq->ring; | |||
| 810 | ||||
| 811 | BUG_ON(!ring->funcs->emit_wreg)((!(!ring->funcs->emit_wreg)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c", 811, "!(!ring->funcs->emit_wreg)" )); | |||
| 812 | ||||
| 813 | if (amdgpu_device_skip_hw_access(adev)) | |||
| 814 | return; | |||
| 815 | ||||
| 816 | if (adev->mes.ring.sched.ready) { | |||
| 817 | amdgpu_mes_wreg(adev, reg, v); | |||
| 818 | return; | |||
| 819 | } | |||
| 820 | ||||
| 821 | spin_lock_irqsave(&kiq->ring_lock, flags)do { flags = 0; mtx_enter(&kiq->ring_lock); } while (0 ); | |||
| 822 | amdgpu_ring_alloc(ring, 32); | |||
| 823 | amdgpu_ring_emit_wreg(ring, reg, v)(ring)->funcs->emit_wreg((ring), (reg), (v)); | |||
| 824 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT5000); | |||
| 825 | if (r) | |||
| 826 | goto failed_undo; | |||
| 827 | ||||
| 828 | amdgpu_ring_commit(ring); | |||
| 829 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
| 830 | ||||
| 831 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
| 832 | ||||
| 833 | /* don't wait anymore for gpu reset case because this way may | |||
| 834 | * block gpu_recover() routine forever, e.g. this virt_kiq_rreg | |||
| 835 | * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will | |||
| 836 | * never return if we keep waiting in virt_kiq_rreg, which cause | |||
| 837 | * gpu_recover() hang there. | |||
| 838 | * | |||
| 839 | * also don't wait anymore for IRQ context | |||
| 840 | * */ | |||
| 841 | if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) | |||
| 842 | goto failed_kiq_write; | |||
| 843 | ||||
| 844 | might_sleep()assertwaitok(); | |||
| 845 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY1000) { | |||
| 846 | ||||
| 847 | drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL)mdelay(5); | |||
| 848 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
| 849 | } | |||
| 850 | ||||
| 851 | if (cnt > MAX_KIQ_REG_TRY1000) | |||
| 852 | goto failed_kiq_write; | |||
| 853 | ||||
| 854 | return; | |||
| 855 | ||||
| 856 | failed_undo: | |||
| 857 | amdgpu_ring_undo(ring); | |||
| 858 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
| 859 | failed_kiq_write: | |||
| 860 | dev_err(adev->dev, "failed to write reg:%x\n", reg)printf("drm:pid%d:%s *ERROR* " "failed to write reg:%x\n", ({ struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , reg); | |||
| 861 | } | |||
| 862 | ||||
| 863 | int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) | |||
| 864 | { | |||
| 865 | if (amdgpu_num_kcq == -1) { | |||
| 866 | return 8; | |||
| 867 | } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { | |||
| 868 | dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n")printf("drm:pid%d:%s *WARNING* " "set kernel compute queue number to 8 due to invalid parameter provided by user\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); | |||
| 869 | return 8; | |||
| 870 | } | |||
| 871 | return amdgpu_num_kcq; | |||
| 872 | } | |||
| 873 | ||||
| 874 | void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, | |||
| 875 | uint32_t ucode_id) | |||
| 876 | { | |||
| 877 | const struct gfx_firmware_header_v1_0 *cp_hdr; | |||
| 878 | const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; | |||
| 879 | struct amdgpu_firmware_info *info = NULL((void *)0); | |||
| 880 | const struct firmware *ucode_fw; | |||
| ||||
| 881 | unsigned int fw_size; | |||
| 882 | ||||
| 883 | switch (ucode_id) { | |||
| 884 | case AMDGPU_UCODE_ID_CP_PFP: | |||
| 885 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 886 | adev->gfx.pfp_fw->data; | |||
| 887 | adev->gfx.pfp_fw_version = | |||
| 888 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
| 889 | adev->gfx.pfp_feature_version = | |||
| 890 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
| 891 | ucode_fw = adev->gfx.pfp_fw; | |||
| 892 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)); | |||
| 893 | break; | |||
| 894 | case AMDGPU_UCODE_ID_CP_RS64_PFP: | |||
| 895 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
| 896 | adev->gfx.pfp_fw->data; | |||
| 897 | adev->gfx.pfp_fw_version = | |||
| 898 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version)((__uint32_t)(cp_hdr_v2_0->header.ucode_version)); | |||
| 899 | adev->gfx.pfp_feature_version = | |||
| 900 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version)((__uint32_t)(cp_hdr_v2_0->ucode_feature_version)); | |||
| 901 | ucode_fw = adev->gfx.pfp_fw; | |||
| 902 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes)((__uint32_t)(cp_hdr_v2_0->ucode_size_bytes)); | |||
| 903 | break; | |||
| 904 | case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: | |||
| 905 | case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: | |||
| 906 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
| 907 | adev->gfx.pfp_fw->data; | |||
| 908 | ucode_fw = adev->gfx.pfp_fw; | |||
| 909 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes)((__uint32_t)(cp_hdr_v2_0->data_size_bytes)); | |||
| 910 | break; | |||
| 911 | case AMDGPU_UCODE_ID_CP_ME: | |||
| 912 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 913 | adev->gfx.me_fw->data; | |||
| 914 | adev->gfx.me_fw_version = | |||
| 915 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
| 916 | adev->gfx.me_feature_version = | |||
| 917 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
| 918 | ucode_fw = adev->gfx.me_fw; | |||
| 919 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)); | |||
| 920 | break; | |||
| 921 | case AMDGPU_UCODE_ID_CP_RS64_ME: | |||
| 922 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
| 923 | adev->gfx.me_fw->data; | |||
| 924 | adev->gfx.me_fw_version = | |||
| 925 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version)((__uint32_t)(cp_hdr_v2_0->header.ucode_version)); | |||
| 926 | adev->gfx.me_feature_version = | |||
| 927 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version)((__uint32_t)(cp_hdr_v2_0->ucode_feature_version)); | |||
| 928 | ucode_fw = adev->gfx.me_fw; | |||
| 929 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes)((__uint32_t)(cp_hdr_v2_0->ucode_size_bytes)); | |||
| 930 | break; | |||
| 931 | case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: | |||
| 932 | case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: | |||
| 933 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
| 934 | adev->gfx.me_fw->data; | |||
| 935 | ucode_fw = adev->gfx.me_fw; | |||
| 936 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes)((__uint32_t)(cp_hdr_v2_0->data_size_bytes)); | |||
| 937 | break; | |||
| 938 | case AMDGPU_UCODE_ID_CP_CE: | |||
| 939 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 940 | adev->gfx.ce_fw->data; | |||
| 941 | adev->gfx.ce_fw_version = | |||
| 942 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
| 943 | adev->gfx.ce_feature_version = | |||
| 944 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
| 945 | ucode_fw = adev->gfx.ce_fw; | |||
| 946 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)); | |||
| 947 | break; | |||
| 948 | case AMDGPU_UCODE_ID_CP_MEC1: | |||
| 949 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 950 | adev->gfx.mec_fw->data; | |||
| 951 | adev->gfx.mec_fw_version = | |||
| 952 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
| 953 | adev->gfx.mec_feature_version = | |||
| 954 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
| 955 | ucode_fw = adev->gfx.mec_fw; | |||
| 956 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)) - | |||
| 957 | le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
| 958 | break; | |||
| 959 | case AMDGPU_UCODE_ID_CP_MEC1_JT: | |||
| 960 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 961 | adev->gfx.mec_fw->data; | |||
| 962 | ucode_fw = adev->gfx.mec_fw; | |||
| 963 | fw_size = le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
| 964 | break; | |||
| 965 | case AMDGPU_UCODE_ID_CP_MEC2: | |||
| 966 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 967 | adev->gfx.mec2_fw->data; | |||
| 968 | adev->gfx.mec2_fw_version = | |||
| 969 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
| 970 | adev->gfx.mec2_feature_version = | |||
| 971 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
| 972 | ucode_fw = adev->gfx.mec2_fw; | |||
| 973 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)) - | |||
| 974 | le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
| 975 | break; | |||
| 976 | case AMDGPU_UCODE_ID_CP_MEC2_JT: | |||
| 977 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
| 978 | adev->gfx.mec2_fw->data; | |||
| 979 | ucode_fw = adev->gfx.mec2_fw; | |||
| 980 | fw_size = le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
| 981 | break; | |||
| 982 | case AMDGPU_UCODE_ID_CP_RS64_MEC: | |||
| 983 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
| 984 | adev->gfx.mec_fw->data; | |||
| 985 | adev->gfx.mec_fw_version = | |||
| 986 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version)((__uint32_t)(cp_hdr_v2_0->header.ucode_version)); | |||
| 987 | adev->gfx.mec_feature_version = | |||
| 988 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version)((__uint32_t)(cp_hdr_v2_0->ucode_feature_version)); | |||
| 989 | ucode_fw = adev->gfx.mec_fw; | |||
| 990 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes)((__uint32_t)(cp_hdr_v2_0->ucode_size_bytes)); | |||
| 991 | break; | |||
| 992 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: | |||
| 993 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: | |||
| 994 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: | |||
| 995 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: | |||
| 996 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
| 997 | adev->gfx.mec_fw->data; | |||
| 998 | ucode_fw = adev->gfx.mec_fw; | |||
| 999 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes)((__uint32_t)(cp_hdr_v2_0->data_size_bytes)); | |||
| 1000 | break; | |||
| 1001 | default: | |||
| 1002 | break; | |||
| 1003 | } | |||
| 1004 | ||||
| 1005 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | |||
| 1006 | info = &adev->firmware.ucode[ucode_id]; | |||
| 1007 | info->ucode_id = ucode_id; | |||
| 1008 | info->fw = ucode_fw; | |||
| ||||
| 1009 | adev->firmware.fw_size += roundup2(fw_size, PAGE_SIZE)(((fw_size) + (((1 << 12)) - 1)) & (~((__typeof(fw_size ))((1 << 12)) - 1))); | |||
| 1010 | } | |||
| 1011 | } |