From 3437d6700d045e8859de11c1a70a6d934734956e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 16 Oct 2022 21:03:09 -0700 Subject: [PATCH 01/14] rename UpdateTemplateEntry to UpdateTemplateSlot to reduce confusion with VkDescriptorUpdateTemplateEntry --- src/mantle/mantle_cmd_buf.c | 26 +++---- src/mantle/mantle_object.h | 8 +-- src/mantle/mantle_object_man.c | 9 ++- src/mantle/mantle_shader_pipeline.c | 104 ++++++++++++++-------------- 4 files changed, 73 insertions(+), 74 deletions(-) diff --git a/src/mantle/mantle_cmd_buf.c b/src/mantle/mantle_cmd_buf.c index 8efd0b7e..6ea37ce2 100644 --- a/src/mantle/mantle_cmd_buf.c +++ b/src/mantle/mantle_cmd_buf.c @@ -50,34 +50,34 @@ static void updateVkDescriptorSet( const BindPoint* bindPoint, const GrDescriptorSet* grDescriptorSet, unsigned slotOffset, - unsigned updateTemplateEntryCount, - const UpdateTemplateEntry* updateTemplateEntries, + unsigned updateTemplateSlotCount, + const UpdateTemplateSlot* updateTemplateSlots, VkPipelineLayout pipelineLayout) { - for (unsigned i = 0; i < updateTemplateEntryCount; i++) { - const UpdateTemplateEntry* entry = &updateTemplateEntries[i]; + for (unsigned i = 0; i < updateTemplateSlotCount; i++) { + const UpdateTemplateSlot* templateSlot = &updateTemplateSlots[i]; const DescriptorSetSlot* slot; - if (entry->isDynamic) { + if (templateSlot->isDynamic) { slot = &bindPoint->dynamicMemoryView; } else { slot = &grDescriptorSet->slots[slotOffset]; - for (unsigned j = 0; j < entry->pathDepth; j++) { - slot = &slot[entry->path[j]]; + for (unsigned j = 0; j < templateSlot->pathDepth; j++) { + slot = &slot[templateSlot->path[j]]; slot = &slot->nested.nextSet->slots[slot->nested.slotOffset]; } } VKD.vkUpdateDescriptorSetWithTemplate(grDevice->device, bindPoint->descriptorSet, - entry->updateTemplate, (void*)slot); + templateSlot->updateTemplate, (void*)slot); // Pass buffer strides down to the shader - for (unsigned j = 0; j < entry->strideCount; j++) { + for (unsigned j = 0; j < templateSlot->strideCount; j++) { VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, - entry->strideOffsets[j], sizeof(uint32_t), - &slot[entry->strideSlotIndexes[j]].buffer.stride); + templateSlot->strideOffsets[j], sizeof(uint32_t), + &slot[templateSlot->strideSlotIndexes[j]].buffer.stride); } } } @@ -175,8 +175,8 @@ static void grCmdBufferUpdateDescriptorSet( for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { updateVkDescriptorSet(grDevice, grCmdBuffer, bindPoint, bindPoint->grDescriptorSets[i], bindPoint->slotOffsets[i], - grPipeline->updateTemplateEntryCounts[i], - grPipeline->updateTemplateEntries[i], + grPipeline->updateTemplateSlotCounts[i], + grPipeline->updateTemplateSlots[i], grPipeline->pipelineLayout); } } diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 5b18f545..8e99844c 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -124,7 +124,7 @@ typedef struct _PipelineSlot const GrRasterStateObject* grRasterState; } PipelineSlot; -typedef struct _UpdateTemplateEntry { +typedef struct _UpdateTemplateSlot { VkDescriptorUpdateTemplate updateTemplate; bool isDynamic; unsigned pathDepth; @@ -132,7 +132,7 @@ typedef struct _UpdateTemplateEntry { unsigned strideCount; unsigned strideOffsets[MAX_STRIDES]; unsigned strideSlotIndexes[MAX_STRIDES]; -} UpdateTemplateEntry; +} UpdateTemplateSlot; // Base object typedef struct _GrBaseObject { @@ -307,8 +307,8 @@ typedef struct _GrPipeline { unsigned stageCount; VkDescriptorSetLayout descriptorSetLayout; unsigned dynamicOffsetCount; - unsigned updateTemplateEntryCounts[GR_MAX_DESCRIPTOR_SETS]; - UpdateTemplateEntry* updateTemplateEntries[GR_MAX_DESCRIPTOR_SETS]; + unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS]; + UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS]; } GrPipeline; typedef struct _GrQueueSemaphore { diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index 019cb64a..a73233ec 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -92,12 +92,11 @@ GR_RESULT GR_STDCALL grDestroyObject( VKD.vkDestroyPipelineLayout(grDevice->device, grPipeline->pipelineLayout, NULL); VKD.vkDestroyDescriptorSetLayout(grDevice->device, grPipeline->descriptorSetLayout, NULL); for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - for (unsigned j = 0; j < grPipeline->updateTemplateEntryCounts[i]; j++) { - UpdateTemplateEntry* entry = &grPipeline->updateTemplateEntries[i][j]; - VKD.vkDestroyDescriptorUpdateTemplate(grDevice->device, entry->updateTemplate, - NULL); + for (unsigned j = 0; j < grPipeline->updateTemplateSlotCounts[i]; j++) { + UpdateTemplateSlot* slot = &grPipeline->updateTemplateSlots[i][j]; + VKD.vkDestroyDescriptorUpdateTemplate(grDevice->device, slot->updateTemplate, NULL); } - free(grPipeline->updateTemplateEntries[i]); + free(grPipeline->updateTemplateSlots[i]); } } break; case GR_OBJ_TYPE_QUEUE_SEMAPHORE: { diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index 730dc5e1..b24690f7 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -38,9 +38,9 @@ static VkDescriptorUpdateTemplate getVkDescriptorUpdateTemplate( return descriptorUpdateTemplate; } -static void addDynamicUpdateTemplateEntry( - unsigned* updateTemplateEntryCount, - UpdateTemplateEntry** updateTemplateEntries, +static void addDynamicUpdateTemplateSlot( + unsigned* updateTemplateSlotCount, + UpdateTemplateSlot** updateTemplateSlots, const GrDevice* grDevice, unsigned stageCount, const Stage* stages, @@ -104,10 +104,10 @@ static void addDynamicUpdateTemplateEntry( getVkDescriptorUpdateTemplate(grDevice, descriptorUpdateEntryCount, descriptorUpdateEntries, descriptorSetLayout); - (*updateTemplateEntryCount)++; - *updateTemplateEntries = realloc(*updateTemplateEntries, - *updateTemplateEntryCount * sizeof(UpdateTemplateEntry)); - (*updateTemplateEntries)[*updateTemplateEntryCount - 1] = (UpdateTemplateEntry) { + (*updateTemplateSlotCount)++; + *updateTemplateSlots = realloc(*updateTemplateSlots, + *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { .updateTemplate = updateTemplate, .isDynamic = true, .pathDepth = 0, @@ -117,18 +117,18 @@ static void addDynamicUpdateTemplateEntry( .strideSlotIndexes = { 0 }, // Initialized below }; - memcpy((*updateTemplateEntries)[*updateTemplateEntryCount - 1].strideOffsets, + memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets, strideOffsets, strideCount * sizeof(unsigned)); - memcpy((*updateTemplateEntries)[*updateTemplateEntryCount - 1].strideSlotIndexes, + memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes, strideSlotIndexes, strideCount * sizeof(unsigned)); free(descriptorUpdateEntries); } } -static void addUpdateTemplateEntriesFromMapping( - unsigned* updateTemplateEntryCount, - UpdateTemplateEntry** updateTemplateEntries, +static void addUpdateTemplateSlotsFromMapping( + unsigned* updateTemplateSlotCount, + UpdateTemplateSlot** updateTemplateSlots, const GrDevice* grDevice, const GR_DESCRIPTOR_SET_MAPPING* mapping, unsigned bindingCount, @@ -159,10 +159,10 @@ static void addUpdateTemplateEntriesFromMapping( path[pathDepth] = i; // Build update template for the nested set - addUpdateTemplateEntriesFromMapping(updateTemplateEntryCount, updateTemplateEntries, - grDevice, slotInfo->pNextLevelSet, - bindingCount, bindings, descriptorSetLayout, - pathDepth + 1, path); + addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, + grDevice, slotInfo->pNextLevelSet, + bindingCount, bindings, descriptorSetLayout, + pathDepth + 1, path); continue; } @@ -233,10 +233,10 @@ static void addUpdateTemplateEntriesFromMapping( getVkDescriptorUpdateTemplate(grDevice, descriptorUpdateEntryCount, descriptorUpdateEntries, descriptorSetLayout); - (*updateTemplateEntryCount)++; - *updateTemplateEntries = realloc(*updateTemplateEntries, - *updateTemplateEntryCount * sizeof(UpdateTemplateEntry)); - (*updateTemplateEntries)[*updateTemplateEntryCount - 1] = (UpdateTemplateEntry) { + (*updateTemplateSlotCount)++; + *updateTemplateSlots = realloc(*updateTemplateSlots, + *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { .updateTemplate = updateTemplate, .isDynamic = false, .pathDepth = pathDepth, @@ -246,28 +246,28 @@ static void addUpdateTemplateEntriesFromMapping( .strideSlotIndexes = { 0 }, // Initialized below }; - memcpy((*updateTemplateEntries)[*updateTemplateEntryCount - 1].path, + memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].path, path, pathDepth * sizeof(unsigned)); - memcpy((*updateTemplateEntries)[*updateTemplateEntryCount - 1].strideOffsets, + memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets, strideOffsets, strideCount * sizeof(unsigned)); - memcpy((*updateTemplateEntries)[*updateTemplateEntryCount - 1].strideSlotIndexes, + memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes, strideSlotIndexes, strideCount * sizeof(unsigned)); free(descriptorUpdateEntries); } } -static void getDescriptorUpdateEntries( - unsigned* updateTemplateEntryCount, - UpdateTemplateEntry** updateTemplateEntries, +static void getUpdateTemplateSlots( + unsigned* updateTemplateSlotCount, + UpdateTemplateSlot** updateTemplateSlots, const GrDevice* grDevice, unsigned stageCount, const Stage* stages, unsigned mappingIndex, VkDescriptorSetLayout descriptorSetLayout) { - addDynamicUpdateTemplateEntry(updateTemplateEntryCount, updateTemplateEntries, grDevice, - stageCount, stages, descriptorSetLayout); + addDynamicUpdateTemplateSlot(updateTemplateSlotCount, updateTemplateSlots, grDevice, + stageCount, stages, descriptorSetLayout); for (unsigned i = 0; i < stageCount; i++) { const Stage* stage = &stages[i]; @@ -280,10 +280,10 @@ static void getDescriptorUpdateEntries( } // TODO merge entries across stages - addUpdateTemplateEntriesFromMapping(updateTemplateEntryCount, updateTemplateEntries, - grDevice, &shader->descriptorSetMapping[mappingIndex], - grShader->bindingCount, grShader->bindings, - descriptorSetLayout, 0, path); + addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, + grDevice, &shader->descriptorSetMapping[mappingIndex], + grShader->bindingCount, grShader->bindings, + descriptorSetLayout, 0, path); } } @@ -702,8 +702,8 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; VkShaderModule rectangleShaderModule = VK_NULL_HANDLE; unsigned dynamicOffsetCount = 0; - unsigned updateTemplateEntryCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; - UpdateTemplateEntry* updateTemplateEntries[GR_MAX_DESCRIPTOR_SETS] = { NULL }; + unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; + UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; GrShader* grShaderRefs[MAX_STAGE_COUNT] = { NULL }; VkResult vkRes; @@ -856,8 +856,8 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( } for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - getDescriptorUpdateEntries(&updateTemplateEntryCounts[i], &updateTemplateEntries[i], - grDevice, COUNT_OF(stages), stages, i, descriptorSetLayout); + getUpdateTemplateSlots(&updateTemplateSlotCounts[i], &updateTemplateSlots[i], + grDevice, COUNT_OF(stages), stages, i, descriptorSetLayout); } // TODO keep track of rectangle shader module @@ -873,15 +873,15 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( .stageCount = COUNT_OF(stages), .descriptorSetLayout = descriptorSetLayout, .dynamicOffsetCount = dynamicOffsetCount, - .updateTemplateEntryCounts = { 0 }, // Initialized below - .updateTemplateEntries = { NULL }, // Initialized below + .updateTemplateSlotCounts = { 0 }, // Initialized below + .updateTemplateSlots = { NULL }, // Initialized below }; memcpy(grPipeline->grShaderRefs, grShaderRefs, sizeof(grPipeline->grShaderRefs)); - memcpy(grPipeline->updateTemplateEntryCounts, updateTemplateEntryCounts, - sizeof(grPipeline->updateTemplateEntryCounts)); - memcpy(grPipeline->updateTemplateEntries, updateTemplateEntries, - sizeof(grPipeline->updateTemplateEntries)); + memcpy(grPipeline->updateTemplateSlotCounts, updateTemplateSlotCounts, + sizeof(grPipeline->updateTemplateSlotCounts)); + memcpy(grPipeline->updateTemplateSlots, updateTemplateSlots, + sizeof(grPipeline->updateTemplateSlots)); *pPipeline = (GR_PIPELINE)grPipeline; return GR_SUCCESS; @@ -906,8 +906,8 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; VkPipeline vkPipeline = VK_NULL_HANDLE; unsigned dynamicOffsetCount = 0; - unsigned updateTemplateEntryCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; - UpdateTemplateEntry* updateTemplateEntries[GR_MAX_DESCRIPTOR_SETS] = { NULL }; + unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; + UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; // TODO validate parameters @@ -946,8 +946,8 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( } for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - getDescriptorUpdateEntries(&updateTemplateEntryCounts[i], &updateTemplateEntries[i], - grDevice, 1, &stage, i, descriptorSetLayout); + getUpdateTemplateSlots(&updateTemplateSlotCounts[i], &updateTemplateSlots[i], + grDevice, 1, &stage, i, descriptorSetLayout); } const VkComputePipelineCreateInfo pipelineCreateInfo = { @@ -987,14 +987,14 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( .stageCount = 1, .descriptorSetLayout = descriptorSetLayout, .dynamicOffsetCount = dynamicOffsetCount, - .updateTemplateEntryCounts = { 0 }, // Initialized below - .updateTemplateEntries = { NULL }, // Initialized below + .updateTemplateSlotCounts = { 0 }, // Initialized below + .updateTemplateSlots = { NULL }, // Initialized below }; - memcpy(grPipeline->updateTemplateEntryCounts, updateTemplateEntryCounts, - sizeof(grPipeline->updateTemplateEntryCounts)); - memcpy(grPipeline->updateTemplateEntries, updateTemplateEntries, - sizeof(grPipeline->updateTemplateEntries)); + memcpy(grPipeline->updateTemplateSlotCounts, updateTemplateSlotCounts, + sizeof(grPipeline->updateTemplateSlotCounts)); + memcpy(grPipeline->updateTemplateSlots, updateTemplateSlots, + sizeof(grPipeline->updateTemplateSlots)); *pPipeline = (GR_PIPELINE)grPipeline; return GR_SUCCESS; From 7af39f1aac1915b1d794d782465d89443a4b90ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 30 Oct 2022 11:19:18 -0700 Subject: [PATCH 02/14] merge descriptor update entries across stages lowers the number of update templates created. fixes #50 --- src/mantle/mantle_object.h | 2 +- src/mantle/mantle_shader_pipeline.c | 230 ++++++++++++++++------------ 2 files changed, 136 insertions(+), 96 deletions(-) diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 8e99844c..bf16eeea 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -10,7 +10,7 @@ #define MAX_STAGE_COUNT 5 // VS, HS, DS, GS, PS #define MAX_PATH_DEPTH 8 // Levels of nested descriptor sets -#define MAX_STRIDES 8 // Number of strides per descriptor update entry +#define MAX_STRIDES 8 // Number of buffer strides per update template slot #define UNIVERSAL_ATOMIC_COUNTERS_COUNT (512) #define COMPUTE_ATOMIC_COUNTERS_COUNT (1024) diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index b24690f7..5bdb7d75 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -41,17 +41,9 @@ static VkDescriptorUpdateTemplate getVkDescriptorUpdateTemplate( static void addDynamicUpdateTemplateSlot( unsigned* updateTemplateSlotCount, UpdateTemplateSlot** updateTemplateSlots, - const GrDevice* grDevice, unsigned stageCount, - const Stage* stages, - VkDescriptorSetLayout descriptorSetLayout) + const Stage* stages) { - unsigned descriptorUpdateEntryCount = 0; - VkDescriptorUpdateTemplateEntry* descriptorUpdateEntries = NULL; - unsigned strideCount = 0; - unsigned strideOffsets[MAX_STRIDES]; - unsigned strideSlotIndexes[MAX_STRIDES]; - // Find all dynamic memory view descriptors across all stages, // to be updated in a single update template for (unsigned i = 0; i < stageCount; i++) { @@ -71,12 +63,9 @@ static void addDynamicUpdateTemplateSlot( binding->ilIndex == dynamicSlotInfo->shaderEntityIndex && binding->type == ILC_BINDING_RESOURCE) { // Found a dynamic memory view descriptor - descriptorUpdateEntryCount++; - descriptorUpdateEntries = realloc(descriptorUpdateEntries, - descriptorUpdateEntryCount * - sizeof(VkDescriptorUpdateTemplateEntry)); - descriptorUpdateEntries[descriptorUpdateEntryCount - 1] = - (VkDescriptorUpdateTemplateEntry) { + VkDescriptorUpdateTemplateEntry* entry = + malloc(sizeof(VkDescriptorUpdateTemplateEntry)); + *entry = (VkDescriptorUpdateTemplateEntry) { .dstBinding = binding->vkIndex, .dstArrayElement = 0, .descriptorCount = 1, @@ -85,64 +74,41 @@ static void addDynamicUpdateTemplateSlot( .stride = 0, }; + (*updateTemplateSlotCount)++; + *updateTemplateSlots = realloc(*updateTemplateSlots, + *updateTemplateSlotCount * + sizeof(UpdateTemplateSlot)); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { + .updateTemplate = (VkDescriptorUpdateTemplate)entry, // Stuff the entry here + .isDynamic = true, + .pathDepth = 0, + .path = { 0 }, + .strideCount = 0, // Initialized below + .strideOffsets = { 0 }, // Initialized below + .strideSlotIndexes = { 0 }, // Initialized below + }; + if (binding->strideIndex >= 0) { - if (strideCount >= MAX_STRIDES) { - LOGE("exceeded max strides of %d\n", MAX_STRIDES); - assert(false); - } - - strideCount++; - strideOffsets[strideCount - 1] = binding->strideIndex * sizeof(uint32_t); - strideSlotIndexes[strideCount - 1] = 0; + unsigned strideOffset = binding->strideIndex * sizeof(uint32_t); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideCount = 1; + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets[0] = + strideOffset; + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes[0] = 0; } } } } - - if (descriptorUpdateEntryCount > 0) { - VkDescriptorUpdateTemplate updateTemplate = - getVkDescriptorUpdateTemplate(grDevice, descriptorUpdateEntryCount, - descriptorUpdateEntries, descriptorSetLayout); - - (*updateTemplateSlotCount)++; - *updateTemplateSlots = realloc(*updateTemplateSlots, - *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { - .updateTemplate = updateTemplate, - .isDynamic = true, - .pathDepth = 0, - .path = { 0 }, - .strideCount = strideCount, - .strideOffsets = { 0 }, // Initialized below - .strideSlotIndexes = { 0 }, // Initialized below - }; - - memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets, - strideOffsets, strideCount * sizeof(unsigned)); - memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes, - strideSlotIndexes, strideCount * sizeof(unsigned)); - - free(descriptorUpdateEntries); - } } static void addUpdateTemplateSlotsFromMapping( unsigned* updateTemplateSlotCount, UpdateTemplateSlot** updateTemplateSlots, - const GrDevice* grDevice, const GR_DESCRIPTOR_SET_MAPPING* mapping, unsigned bindingCount, const IlcBinding* bindings, - VkDescriptorSetLayout descriptorSetLayout, unsigned pathDepth, unsigned* path) { - unsigned descriptorUpdateEntryCount = 0; - VkDescriptorUpdateTemplateEntry* descriptorUpdateEntries = NULL; - unsigned strideCount = 0; - unsigned strideOffsets[MAX_STRIDES]; - unsigned strideSlotIndexes[MAX_STRIDES]; - for (unsigned i = 0; i < mapping->descriptorCount; i++) { const GR_DESCRIPTOR_SLOT_INFO* slotInfo = &mapping->pDescriptorInfo[i]; const IlcBinding* binding = NULL; @@ -158,10 +124,9 @@ static void addUpdateTemplateSlotsFromMapping( // Mark path path[pathDepth] = i; - // Build update template for the nested set + // Add slots from the nested set addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, - grDevice, slotInfo->pNextLevelSet, - bindingCount, bindings, descriptorSetLayout, + slotInfo->pNextLevelSet, bindingCount, bindings, pathDepth + 1, path); continue; } @@ -202,12 +167,8 @@ static void addUpdateTemplateSlotsFromMapping( assert(false); } - descriptorUpdateEntryCount++; - descriptorUpdateEntries = realloc(descriptorUpdateEntries, - descriptorUpdateEntryCount * - sizeof(VkDescriptorUpdateTemplateEntry)); - descriptorUpdateEntries[descriptorUpdateEntryCount - 1] = - (VkDescriptorUpdateTemplateEntry) { + VkDescriptorUpdateTemplateEntry* entry = malloc(sizeof(VkDescriptorUpdateTemplateEntry)); + *entry = (VkDescriptorUpdateTemplateEntry) { .dstBinding = binding->vkIndex, .dstArrayElement = 0, .descriptorCount = 1, @@ -216,44 +177,122 @@ static void addUpdateTemplateSlotsFromMapping( .stride = 0, }; - if (binding->strideIndex >= 0) { - if (strideCount >= MAX_STRIDES) { - LOGE("exceeded max strides of %d\n", MAX_STRIDES); - assert(false); - } - - strideCount++; - strideOffsets[strideCount - 1] = binding->strideIndex * sizeof(uint32_t); - strideSlotIndexes[strideCount - 1] = i; - } - } - - if (descriptorUpdateEntryCount > 0) { - VkDescriptorUpdateTemplate updateTemplate = - getVkDescriptorUpdateTemplate(grDevice, descriptorUpdateEntryCount, - descriptorUpdateEntries, descriptorSetLayout); - (*updateTemplateSlotCount)++; *updateTemplateSlots = realloc(*updateTemplateSlots, *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { - .updateTemplate = updateTemplate, + .updateTemplate = (VkDescriptorUpdateTemplate)entry, // Stuff the entry here .isDynamic = false, .pathDepth = pathDepth, .path = { 0 }, // Initialized below - .strideCount = strideCount, + .strideCount = 0, // Initialized below .strideOffsets = { 0 }, // Initialized below .strideSlotIndexes = { 0 }, // Initialized below }; memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].path, path, pathDepth * sizeof(unsigned)); - memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets, - strideOffsets, strideCount * sizeof(unsigned)); - memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes, - strideSlotIndexes, strideCount * sizeof(unsigned)); + if (binding->strideIndex >= 0) { + unsigned strideOffset = binding->strideIndex * sizeof(uint32_t); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideCount = 1; + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets[0] = strideOffset; + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes[0] = i; + } + } +} + +static int compareUpdateTemplateSlots( + const void* a, + const void* b) +{ + const UpdateTemplateSlot* slotA = a; + const UpdateTemplateSlot* slotB = b; + + // Make slots with the same path adjacent + if (slotA->isDynamic != slotB->isDynamic) { + return (int)slotA->isDynamic - (int)slotB->isDynamic; + } + if (slotA->pathDepth != slotB->pathDepth) { + return (int)slotA->pathDepth - (int)slotB->pathDepth; + } + return memcmp(slotA->path, slotB->path, slotA->pathDepth * sizeof(slotA->path[0])); +} + +static void mergeUpdateTemplateSlots( + unsigned* updateTemplateSlotCount, + UpdateTemplateSlot** updateTemplateSlots, + const GrDevice* grDevice, + VkDescriptorSetLayout descriptorSetLayout) +{ + // Group slots by path + qsort(*updateTemplateSlots, *updateTemplateSlotCount, sizeof(UpdateTemplateSlot), + compareUpdateTemplateSlots); + + unsigned descriptorUpdateEntryCount = 0; + VkDescriptorUpdateTemplateEntry* descriptorUpdateEntries = NULL; + + for (unsigned i = 0; i < *updateTemplateSlotCount; i++) { + bool isLastSlot = (i + 1) == *updateTemplateSlotCount; + UpdateTemplateSlot* slot = &(*updateTemplateSlots)[i]; + UpdateTemplateSlot* nextSlot = &(*updateTemplateSlots)[i + 1]; + + // Add new entry + VkDescriptorUpdateTemplateEntry* entry = + (VkDescriptorUpdateTemplateEntry*)slot->updateTemplate; + + descriptorUpdateEntryCount++; + descriptorUpdateEntries = realloc(descriptorUpdateEntries, + descriptorUpdateEntryCount * + sizeof(VkDescriptorUpdateTemplateEntry)); + descriptorUpdateEntries[descriptorUpdateEntryCount - 1] = *entry; + free(entry); + + if (!isLastSlot && + slot->isDynamic == nextSlot->isDynamic && + slot->pathDepth == nextSlot->pathDepth && + memcmp(slot->path, nextSlot->path, slot->pathDepth * sizeof(slot->path[0])) == 0) { + // Can't merge yet + continue; + } + + unsigned mergedIdx = i - descriptorUpdateEntryCount + 1; + UpdateTemplateSlot* mergedSlot = &(*updateTemplateSlots)[mergedIdx]; + + mergedSlot->updateTemplate = + getVkDescriptorUpdateTemplate(grDevice, descriptorUpdateEntryCount, + descriptorUpdateEntries, descriptorSetLayout); free(descriptorUpdateEntries); + + // TODO deduplicate strides + for (unsigned j = mergedIdx + 1; j <= i; j++) { + UpdateTemplateSlot* slotToMerge = &(*updateTemplateSlots)[j]; + + if (slotToMerge->strideCount == 1) { + if (mergedSlot->strideCount >= MAX_STRIDES) { + LOGE("exceeded max strides of %d\n", MAX_STRIDES); + assert(false); + } + + mergedSlot->strideCount++; + mergedSlot->strideOffsets[mergedSlot->strideCount - 1] = + slotToMerge->strideOffsets[0]; + mergedSlot->strideSlotIndexes[mergedSlot->strideCount - 1] = + slotToMerge->strideSlotIndexes[0]; + } + } + + // Drop temporary slots + memmove(mergedSlot + 1, nextSlot, + (*updateTemplateSlotCount - i - 1) * sizeof(UpdateTemplateSlot)); + *updateTemplateSlotCount -= descriptorUpdateEntryCount - 1; + *updateTemplateSlots = realloc(*updateTemplateSlots, + *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); + + // Update state + i = mergedIdx; + descriptorUpdateEntryCount = 0; + descriptorUpdateEntries = NULL; } } @@ -266,8 +305,8 @@ static void getUpdateTemplateSlots( unsigned mappingIndex, VkDescriptorSetLayout descriptorSetLayout) { - addDynamicUpdateTemplateSlot(updateTemplateSlotCount, updateTemplateSlots, grDevice, - stageCount, stages, descriptorSetLayout); + // TODO move to loop below + addDynamicUpdateTemplateSlot(updateTemplateSlotCount, updateTemplateSlots, stageCount, stages); for (unsigned i = 0; i < stageCount; i++) { const Stage* stage = &stages[i]; @@ -279,12 +318,13 @@ static void getUpdateTemplateSlots( continue; } - // TODO merge entries across stages addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, - grDevice, &shader->descriptorSetMapping[mappingIndex], - grShader->bindingCount, grShader->bindings, - descriptorSetLayout, 0, path); + &shader->descriptorSetMapping[mappingIndex], + grShader->bindingCount, grShader->bindings, 0, path); } + + mergeUpdateTemplateSlots(updateTemplateSlotCount, updateTemplateSlots, grDevice, + descriptorSetLayout); } static VkDescriptorSetLayout getVkDescriptorSetLayout( From 3b53c17c079e07aaae1ab9f7b3767c56c186cde3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 30 Oct 2022 16:34:10 -0700 Subject: [PATCH 03/14] move addDynamicUpdateTemplateSlots to loop we can do this now that everything gets merged after the fact. --- src/mantle/mantle_shader_pipeline.c | 98 +++++++++++++---------------- 1 file changed, 43 insertions(+), 55 deletions(-) diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index 5bdb7d75..c4dfdaf1 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -38,63 +38,51 @@ static VkDescriptorUpdateTemplate getVkDescriptorUpdateTemplate( return descriptorUpdateTemplate; } -static void addDynamicUpdateTemplateSlot( +static void addDynamicUpdateTemplateSlots( unsigned* updateTemplateSlotCount, UpdateTemplateSlot** updateTemplateSlots, - unsigned stageCount, - const Stage* stages) + const GR_DYNAMIC_MEMORY_VIEW_SLOT_INFO* dynamicMapping, + unsigned bindingCount, + const IlcBinding* bindings) { - // Find all dynamic memory view descriptors across all stages, - // to be updated in a single update template - for (unsigned i = 0; i < stageCount; i++) { - const Stage* stage = &stages[i]; - const GR_PIPELINE_SHADER* shader = stage->shader; - const GR_DYNAMIC_MEMORY_VIEW_SLOT_INFO* dynamicSlotInfo = &shader->dynamicMemoryViewMapping; - const GrShader* grShader = shader->shader; - - if (grShader == NULL) { - continue; - } + for (unsigned i = 0; i < bindingCount; i++) { + const IlcBinding* binding = &bindings[i]; + + if (dynamicMapping->slotObjectType != GR_SLOT_UNUSED && + binding->ilIndex == dynamicMapping->shaderEntityIndex && + binding->type == ILC_BINDING_RESOURCE) { + // Found a dynamic memory view descriptor + VkDescriptorUpdateTemplateEntry* entry = + malloc(sizeof(VkDescriptorUpdateTemplateEntry)); + *entry = (VkDescriptorUpdateTemplateEntry) { + .dstBinding = binding->vkIndex, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, + .offset = OFFSET_OF_UNION(DescriptorSetSlot, buffer, bufferInfo), + .stride = 0, + }; - for (unsigned j = 0; j < grShader->bindingCount; j++) { - const IlcBinding* binding = &grShader->bindings[j]; + (*updateTemplateSlotCount)++; + *updateTemplateSlots = realloc(*updateTemplateSlots, + *updateTemplateSlotCount * + sizeof(UpdateTemplateSlot)); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { + .updateTemplate = (VkDescriptorUpdateTemplate)entry, // Stuff the entry here + .isDynamic = true, + .pathDepth = 0, + .path = { 0 }, + .strideCount = 0, // Initialized below + .strideOffsets = { 0 }, // Initialized below + .strideSlotIndexes = { 0 }, // Initialized below + }; - if (dynamicSlotInfo->slotObjectType != GR_SLOT_UNUSED && - binding->ilIndex == dynamicSlotInfo->shaderEntityIndex && - binding->type == ILC_BINDING_RESOURCE) { - // Found a dynamic memory view descriptor - VkDescriptorUpdateTemplateEntry* entry = - malloc(sizeof(VkDescriptorUpdateTemplateEntry)); - *entry = (VkDescriptorUpdateTemplateEntry) { - .dstBinding = binding->vkIndex, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, - .offset = OFFSET_OF_UNION(DescriptorSetSlot, buffer, bufferInfo), - .stride = 0, - }; - - (*updateTemplateSlotCount)++; - *updateTemplateSlots = realloc(*updateTemplateSlots, - *updateTemplateSlotCount * - sizeof(UpdateTemplateSlot)); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { - .updateTemplate = (VkDescriptorUpdateTemplate)entry, // Stuff the entry here - .isDynamic = true, - .pathDepth = 0, - .path = { 0 }, - .strideCount = 0, // Initialized below - .strideOffsets = { 0 }, // Initialized below - .strideSlotIndexes = { 0 }, // Initialized below - }; - - if (binding->strideIndex >= 0) { - unsigned strideOffset = binding->strideIndex * sizeof(uint32_t); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideCount = 1; - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets[0] = - strideOffset; - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes[0] = 0; - } + if (binding->strideIndex >= 0) { + unsigned strideOffset = binding->strideIndex * sizeof(uint32_t); + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideCount = 1; + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets[0] = + strideOffset; + (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes[0] = 0; } } } @@ -305,9 +293,6 @@ static void getUpdateTemplateSlots( unsigned mappingIndex, VkDescriptorSetLayout descriptorSetLayout) { - // TODO move to loop below - addDynamicUpdateTemplateSlot(updateTemplateSlotCount, updateTemplateSlots, stageCount, stages); - for (unsigned i = 0; i < stageCount; i++) { const Stage* stage = &stages[i]; const GR_PIPELINE_SHADER* shader = stage->shader; @@ -318,6 +303,9 @@ static void getUpdateTemplateSlots( continue; } + addDynamicUpdateTemplateSlots(updateTemplateSlotCount, updateTemplateSlots, + &shader->dynamicMemoryViewMapping, + grShader->bindingCount, grShader->bindings); addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, &shader->descriptorSetMapping[mappingIndex], grShader->bindingCount, grShader->bindings, 0, path); From cab0d092d9202f581016e971556015f31460ce17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 30 Oct 2022 18:16:15 -0700 Subject: [PATCH 04/14] delete workaround for linear transfer-only images used by Star Swarm and RADV doesn't support them, but it runs regardless. --- src/mantle/mantle_cmd_buf.c | 107 ++++++++------------------- src/mantle/mantle_image_sample.c | 120 +------------------------------ src/mantle/mantle_object.h | 18 ----- src/mantle/mantle_object_man.c | 17 +---- 4 files changed, 33 insertions(+), 229 deletions(-) diff --git a/src/mantle/mantle_cmd_buf.c b/src/mantle/mantle_cmd_buf.c index 6ea37ce2..81f050f6 100644 --- a/src/mantle/mantle_cmd_buf.c +++ b/src/mantle/mantle_cmd_buf.c @@ -824,87 +824,38 @@ GR_VOID GR_STDCALL grCmdCopyImage( grCmdBufferEndRenderPass(grCmdBuffer); - if (grSrcImage->image != VK_NULL_HANDLE) { - STACK_ARRAY(VkImageCopy, vkRegions, 128, regionCount); - - for (unsigned i = 0; i < regionCount; i++) { - const GR_IMAGE_COPY* region = &pRegions[i]; - - vkRegions[i] = (VkImageCopy) { - .srcSubresource = getVkImageSubresourceLayers(region->srcSubresource), - .srcOffset = { - region->srcOffset.x * srcTileSize, - region->srcOffset.y * srcTileSize, - region->srcOffset.z, - }, - .dstSubresource = getVkImageSubresourceLayers(region->destSubresource), - .dstOffset = { - region->destOffset.x * dstTileSize, - region->destOffset.y * dstTileSize, - region->destOffset.z, - }, - .extent = { - region->extent.width * extentTileSize, - region->extent.height * extentTileSize, - region->extent.depth, - }, - }; - } - - VKD.vkCmdCopyImage(grCmdBuffer->commandBuffer, - grSrcImage->image, getVkImageLayout(GR_IMAGE_STATE_DATA_TRANSFER), - grDstImage->image, getVkImageLayout(GR_IMAGE_STATE_DATA_TRANSFER), - regionCount, vkRegions); - - STACK_ARRAY_FINISH(vkRegions); - } else { - STACK_ARRAY(VkBufferImageCopy, vkRegions, 128, regionCount); - - for (unsigned i = 0; i < regionCount; i++) { - const GR_IMAGE_COPY* region = &pRegions[i]; - - if (region->srcSubresource.aspect != GR_IMAGE_ASPECT_COLOR) { - LOGW("unhandled non-color aspect 0x%X\n", region->srcSubresource.aspect); - } - if (region->srcOffset.x != 0 || region->srcOffset.y != 0 || region->srcOffset.z != 0) { - LOGW("unhandled region offset %u %u %u for buffer\n", - region->srcOffset.x, region->srcOffset.y, region->srcOffset.z); - } - - const VkExtent3D srcTexelExtent = { - grSrcImage->extent.width * srcTileSize, - grSrcImage->extent.height * srcTileSize, - grSrcImage->extent.depth, - }; + STACK_ARRAY(VkImageCopy, vkRegions, 128, regionCount); - vkRegions[i] = (VkBufferImageCopy) { - .bufferOffset = grImageGetBufferOffset(srcTexelExtent, grSrcImage->format, - region->srcSubresource.arraySlice, - grSrcImage->arrayLayers, - region->srcSubresource.mipLevel), - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = getVkImageSubresourceLayers(region->destSubresource), - .imageOffset = { - region->destOffset.x * dstTileSize, - region->destOffset.y * dstTileSize, - region->destOffset.z, - }, - .imageExtent = { - region->extent.width * dstTileSize, - region->extent.height * dstTileSize, - region->extent.depth, - }, - }; - } + for (unsigned i = 0; i < regionCount; i++) { + const GR_IMAGE_COPY* region = &pRegions[i]; + + vkRegions[i] = (VkImageCopy) { + .srcSubresource = getVkImageSubresourceLayers(region->srcSubresource), + .srcOffset = { + region->srcOffset.x * srcTileSize, + region->srcOffset.y * srcTileSize, + region->srcOffset.z, + }, + .dstSubresource = getVkImageSubresourceLayers(region->destSubresource), + .dstOffset = { + region->destOffset.x * dstTileSize, + region->destOffset.y * dstTileSize, + region->destOffset.z, + }, + .extent = { + region->extent.width * extentTileSize, + region->extent.height * extentTileSize, + region->extent.depth, + }, + }; + } - VKD.vkCmdCopyBufferToImage(grCmdBuffer->commandBuffer, - grSrcImage->buffer, grDstImage->image, - getVkImageLayout(GR_IMAGE_STATE_DATA_TRANSFER), - regionCount, vkRegions); + VKD.vkCmdCopyImage(grCmdBuffer->commandBuffer, + grSrcImage->image, getVkImageLayout(GR_IMAGE_STATE_DATA_TRANSFER), + grDstImage->image, getVkImageLayout(GR_IMAGE_STATE_DATA_TRANSFER), + regionCount, vkRegions); - STACK_ARRAY_FINISH(vkRegions); - } + STACK_ARRAY_FINISH(vkRegions); } GR_VOID GR_STDCALL grCmdCopyMemoryToImage( diff --git a/src/mantle/mantle_image_sample.c b/src/mantle/mantle_image_sample.c index cddf6a5c..02d8dd01 100644 --- a/src/mantle/mantle_image_sample.c +++ b/src/mantle/mantle_image_sample.c @@ -21,57 +21,6 @@ static bool isMsaaSupported( return formatProps.sampleCounts > VK_SAMPLE_COUNT_1_BIT; } -unsigned grImageGetBufferOffset( - VkExtent3D extent, - VkFormat format, - unsigned arraySlice, - unsigned arraySize, - unsigned mipLevel) -{ - unsigned offset = 0; - - // Find mipmap base offset - for (unsigned i = 0; i < mipLevel; i++) { - for (unsigned j = 0; j < arraySize; j++) { - offset += grImageGetBufferDepthPitch(extent, format, i) * - MIP(extent.depth, i); - } - } - - // Find slice offset - for (unsigned i = 0; i < arraySlice; i++) { - offset += grImageGetBufferDepthPitch(extent, format, mipLevel) * - MIP(extent.depth, i); - } - - return offset; -} - -unsigned grImageGetBufferRowPitch( - VkExtent3D extent, - VkFormat format, - unsigned mipLevel) -{ - unsigned texelSize = getVkFormatTexelSize(format); - unsigned tileSize = getVkFormatTileSize(format); - - return texelSize * - MIP(CEILDIV(extent.width, tileSize), mipLevel); -} - -unsigned grImageGetBufferDepthPitch( - VkExtent3D extent, - VkFormat format, - unsigned mipLevel) -{ - unsigned texelSize = getVkFormatTexelSize(format); - unsigned tileSize = getVkFormatTileSize(format); - - return texelSize * - MIP(CEILDIV(extent.width, tileSize), mipLevel) * - MIP(CEILDIV(extent.height, tileSize), mipLevel); -} - // Image and Sample Functions GR_RESULT GR_STDCALL grGetFormatInfo( @@ -210,51 +159,6 @@ GR_RESULT GR_STDCALL grCreateImage( : VK_IMAGE_LAYOUT_PREINITIALIZED, }; - // Use a buffer for linear transfer-only images - if (pCreateInfo->samples <= 1 && - pCreateInfo->tiling == GR_LINEAR_TILING && - pCreateInfo->usage == 0) { - VkBuffer vkBuffer = VK_NULL_HANDLE; - - unsigned size = grImageGetBufferOffset(createInfo.extent, createInfo.format, - 0, createInfo.arrayLayers, createInfo.mipLevels); - - const VkBufferCreateInfo bufferCreateInfo = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .size = size, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = NULL, - }; - - vkRes = VKD.vkCreateBuffer(grDevice->device, &bufferCreateInfo, NULL, &vkBuffer); - if (vkRes != VK_SUCCESS) { - LOGE("vkCreateBuffer failed (%d)\n", vkRes); - return getGrResult(vkRes); - } - - GrImage* grImage = malloc(sizeof(GrImage)); - *grImage = (GrImage) { - .grObj = { GR_OBJ_TYPE_IMAGE, grDevice }, - .image = VK_NULL_HANDLE, - .buffer = vkBuffer, - .imageType = createInfo.imageType, - .extent = createInfo.extent, - .arrayLayers = createInfo.arrayLayers, - .format = createInfo.format, - .usage = createInfo.usage, - .multiplyCubeLayers = false, - .isOpaque = false, - }; - - *pImage = (GR_IMAGE)grImage; - return GR_SUCCESS; - } - VkImageFormatProperties imageFormatProperties; vkRes = vki.vkGetPhysicalDeviceImageFormatProperties(grDevice->physicalDevice, createInfo.format, createInfo.imageType, @@ -279,7 +183,6 @@ GR_RESULT GR_STDCALL grCreateImage( *grImage = (GrImage) { .grObj = { GR_OBJ_TYPE_IMAGE, grDevice }, .image = vkImage, - .buffer = VK_NULL_HANDLE, .imageType = createInfo.imageType, .extent = createInfo.extent, .arrayLayers = createInfo.arrayLayers, @@ -331,28 +234,7 @@ GR_RESULT GR_STDCALL grGetImageSubresourceInfo( return GR_SUCCESS; } - if (grImage->buffer != VK_NULL_HANDLE) { - if (pSubresource->aspect != GR_IMAGE_ASPECT_COLOR) { - LOGE("unhandled non-color aspect 0x%X\n", pSubresource->aspect); - assert(false); - } - - unsigned offset = grImageGetBufferOffset(grImage->extent, grImage->format, - pSubresource->arraySlice, grImage->arrayLayers, - pSubresource->mipLevel); - unsigned size = grImageGetBufferOffset(grImage->extent, grImage->format, - pSubresource->arraySlice + 1, grImage->arrayLayers, - pSubresource->mipLevel) - offset; - - *(GR_SUBRESOURCE_LAYOUT*)pData = (GR_SUBRESOURCE_LAYOUT) { - .offset = offset, - .size = size, - .rowPitch = grImageGetBufferRowPitch(grImage->extent, grImage->format, - pSubresource->mipLevel), - .depthPitch = grImageGetBufferDepthPitch(grImage->extent, grImage->format, - pSubresource->mipLevel), - }; - } else if (grImage->isOpaque) { + if (grImage->isOpaque) { // Mantle spec: "For opaque images, the returned pitch values are zero." *(GR_SUBRESOURCE_LAYOUT*)pData = (GR_SUBRESOURCE_LAYOUT) { .offset = 0, diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index bf16eeea..49460151 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -268,7 +268,6 @@ typedef struct _GrGpuMemory { typedef struct _GrImage { GrObject grObj; VkImage image; - VkBuffer buffer; VkImageType imageType; VkExtent3D extent; unsigned arrayLayers; @@ -380,23 +379,6 @@ void grCmdBufferEndRenderPass( void grCmdBufferResetState( GrCmdBuffer* grCmdBuffer); -unsigned grImageGetBufferOffset( - VkExtent3D extent, - VkFormat format, - unsigned arraySlice, - unsigned arraySize, - unsigned mipLevel); - -unsigned grImageGetBufferRowPitch( - VkExtent3D extent, - VkFormat format, - unsigned mipLevel); - -unsigned grImageGetBufferDepthPitch( - VkExtent3D extent, - VkFormat format, - unsigned mipLevel); - VkPipeline grPipelineFindOrCreateVkPipeline( GrPipeline* grPipeline, const GrColorBlendStateObject* grColorBlendState, diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index a73233ec..91afbd73 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -61,7 +61,6 @@ GR_RESULT GR_STDCALL grDestroyObject( GrImage* grImage = (GrImage*)grObject; VKD.vkDestroyImage(grDevice->device, grImage->image, NULL); - VKD.vkDestroyBuffer(grDevice->device, grImage->buffer, NULL); grQueueRemoveInitialImage(grImage); grWsiDestroyImage(grImage); @@ -185,12 +184,7 @@ GR_RESULT GR_STDCALL grGetObjectInfo( GrImage* grImage = (GrImage*)grBaseObject; GrDevice* grDevice = GET_OBJ_DEVICE(grBaseObject); - if (grImage->image != VK_NULL_HANDLE) { - VKD.vkGetImageMemoryRequirements(grDevice->device, grImage->image, &memReqs); - } else { - VKD.vkGetBufferMemoryRequirements(grDevice->device, grImage->buffer, &memReqs); - } - + VKD.vkGetImageMemoryRequirements(grDevice->device, grImage->image, &memReqs); *grMemReqs = getGrMemoryRequirements(memReqs); } break; case GR_OBJ_TYPE_BORDER_COLOR_PALETTE: @@ -308,13 +302,8 @@ GR_RESULT GR_STDCALL grBindObjectMemory( GrImage* grImage = (GrImage*)grObject; GrDevice* grDevice = GET_OBJ_DEVICE(grObject); - if (grImage->image != VK_NULL_HANDLE) { - vkRes = VKD.vkBindImageMemory(grDevice->device, grImage->image, - grGpuMemory->deviceMemory, offset); - } else { - vkRes = VKD.vkBindBufferMemory(grDevice->device, grImage->buffer, - grGpuMemory->deviceMemory, offset); - } + vkRes = VKD.vkBindImageMemory(grDevice->device, grImage->image, + grGpuMemory->deviceMemory, offset); } break; case GR_OBJ_TYPE_BORDER_COLOR_PALETTE: case GR_OBJ_TYPE_COLOR_TARGET_VIEW: From 5d446e157fcded4b069ecf46a18fd6a9e27f6c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 30 Oct 2022 18:22:40 -0700 Subject: [PATCH 05/14] add SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS quirk for RADV/Star Swarm only --- src/mantle/mantle_image_sample.c | 11 ++++++++--- src/mantle/quirk.c | 3 ++- src/mantle/quirk.h | 4 ++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mantle/mantle_image_sample.c b/src/mantle/mantle_image_sample.c index 02d8dd01..35f3595a 100644 --- a/src/mantle/mantle_image_sample.c +++ b/src/mantle/mantle_image_sample.c @@ -165,9 +165,14 @@ GR_RESULT GR_STDCALL grCreateImage( createInfo.tiling, createInfo.usage, createInfo.flags, &imageFormatProperties); if (vkRes == VK_ERROR_FORMAT_NOT_SUPPORTED) { - LOGW("unsupported format %d for image type %d, tiling %d, usage 0x%X and flags 0x%X\n", - createInfo.format, createInfo.imageType, createInfo.tiling, createInfo.usage, - createInfo.flags); + if (!(quirkHas(QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS) && + pCreateInfo->samples <= 1 && + pCreateInfo->tiling == GR_LINEAR_TILING && + pCreateInfo->usage == 0)) { + LOGW("unsupported format %d for image type %d, tiling %d, usage 0x%X and flags 0x%X\n", + createInfo.format, createInfo.imageType, createInfo.tiling, createInfo.usage, + createInfo.flags); + } } else if (vkRes != VK_SUCCESS) { LOGE("vkGetPhysicalDeviceImageFormatProperties failed (%d)\n", vkRes); return getGrResult(vkRes); diff --git a/src/mantle/quirk.c b/src/mantle/quirk.c index bb41ec01..6401fc0c 100644 --- a/src/mantle/quirk.c +++ b/src/mantle/quirk.c @@ -11,7 +11,8 @@ void quirkInit( QUIRK_IMAGE_DATA_TRANSFER_STATE_FOR_RAW_CLEAR | QUIRK_COMPRESSED_IMAGE_COPY_IN_TEXELS | QUIRK_INVALID_CMD_BUFFER_RESET | - QUIRK_CUBEMAP_LAYER_DIV_6; + QUIRK_CUBEMAP_LAYER_DIV_6 | + QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS; } else if (!strcmp(appInfo->pEngineName, "CivTech")) { mQuirks = QUIRK_NON_ZERO_MEM_REQ | QUIRK_READ_ONLY_IMAGE_STATE_MISMATCH | diff --git a/src/mantle/quirk.h b/src/mantle/quirk.h index 0096982d..448c882f 100644 --- a/src/mantle/quirk.h +++ b/src/mantle/quirk.h @@ -24,6 +24,10 @@ typedef enum { // Not all objects are destroyed before the device gets destroyed QUIRK_KEEP_VK_DEVICE = 1 << 6, + + // RADV doesn't support linear transfer-only images used by Star Swarm, but it has no effect + // on rendering. Silence it. + QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS = 1 << 7, } QUIRK_FLAGS; void quirkInit( From 772042112de13b7f6a067e4b4d8b3d4776429406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 6 Nov 2022 12:28:28 -0800 Subject: [PATCH 06/14] mantle: filter out device coherent/uncached memory types causes performance issues and glitches on RDNA2. --- src/mantle/mantle_init_device.c | 22 ++++++++++++++++++++++ src/mantle/mantle_memory_man.c | 18 ++++++++++++------ src/mantle/mantle_object.h | 2 ++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/mantle/mantle_init_device.c b/src/mantle/mantle_init_device.c index d2035fce..6da85692 100644 --- a/src/mantle/mantle_init_device.c +++ b/src/mantle/mantle_init_device.c @@ -730,6 +730,25 @@ GR_RESULT GR_STDCALL grCreateDevice( VkPhysicalDeviceMemoryProperties memoryProperties; vki.vkGetPhysicalDeviceMemoryProperties(grPhysicalGpu->physicalDevice, &memoryProperties); + // Build memory heap map + uint32_t memoryHeapMap[GR_MAX_MEMORY_HEAPS]; + unsigned memoryHeapCount = 0; + for (unsigned i = 0; i < memoryProperties.memoryTypeCount; i++) { + if (memoryProperties.memoryTypes[i].propertyFlags & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | + VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD)) { + LOGW("ignoring device coherent/uncached memory type %d\n", i); + continue; + } + + if (memoryHeapCount >= GR_MAX_MEMORY_HEAPS) { + LOGW("can't map memory type (exceeded %d Mantle heaps)\n", GR_MAX_MEMORY_HEAPS); + } + + memoryHeapCount++; + memoryHeapMap[memoryHeapCount - 1] = i; + } + GrDevice* grDevice = malloc(sizeof(GrDevice)); *grDevice = (GrDevice) { .grBaseObj = { GR_OBJ_TYPE_DEVICE }, @@ -737,6 +756,8 @@ GR_RESULT GR_STDCALL grCreateDevice( .device = vkDevice, .physicalDevice = grPhysicalGpu->physicalDevice, .memoryProperties = memoryProperties, + .memoryHeapMap = { 0 }, // Initialized below + .memoryHeapCount = memoryHeapCount, .atomicCounterSetLayout = VK_NULL_HANDLE, // Initialized below .grUniversalQueue = NULL, // Initialized below .grComputeQueue = NULL, // Initialized below @@ -748,6 +769,7 @@ GR_RESULT GR_STDCALL grCreateDevice( .grBorderColorPalette = NULL, }; + memcpy(grDevice->memoryHeapMap, memoryHeapMap, memoryHeapCount * sizeof(uint32_t)); grDevice->atomicCounterSetLayout = getAtomicCounterDescriptorSetLayout(grDevice); if (universalQueueFamilyIndex != INVALID_QUEUE_INDEX) { diff --git a/src/mantle/mantle_memory_man.c b/src/mantle/mantle_memory_man.c index 9830014a..5f931b2b 100644 --- a/src/mantle/mantle_memory_man.c +++ b/src/mantle/mantle_memory_man.c @@ -17,7 +17,7 @@ GR_RESULT GR_STDCALL grGetMemoryHeapCount( return GR_ERROR_INVALID_POINTER; } - *pCount = grDevice->memoryProperties.memoryTypeCount; + *pCount = grDevice->memoryHeapCount; return GR_SUCCESS; } @@ -43,7 +43,7 @@ GR_RESULT GR_STDCALL grGetMemoryHeapInfo( return GR_ERROR_INVALID_MEMORY_SIZE; } - if (heapId >= grDevice->memoryProperties.memoryTypeCount) { + if (heapId >= grDevice->memoryHeapCount) { return GR_ERROR_INVALID_ORDINAL; } @@ -53,8 +53,10 @@ GR_RESULT GR_STDCALL grGetMemoryHeapInfo( return GR_SUCCESS; } - VkMemoryPropertyFlags flags = grDevice->memoryProperties.memoryTypes[heapId].propertyFlags; - uint32_t vkHeapIndex = grDevice->memoryProperties.memoryTypes[heapId].heapIndex; + const VkPhysicalDeviceMemoryProperties* memoryProperties = &grDevice->memoryProperties; + uint32_t vkMemoryTypeIndex = grDevice->memoryHeapMap[heapId]; + VkMemoryPropertyFlags flags = memoryProperties->memoryTypes[vkMemoryTypeIndex].propertyFlags; + uint32_t vkHeapIndex = memoryProperties->memoryTypes[vkMemoryTypeIndex].heapIndex; bool deviceLocal = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; bool hostVisible = (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; bool hostCoherent = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; @@ -67,7 +69,7 @@ GR_RESULT GR_STDCALL grGetMemoryHeapInfo( // https://gpuopen.com/learn/vulkan-device-memory/ *(GR_MEMORY_HEAP_PROPERTIES*)pData = (GR_MEMORY_HEAP_PROPERTIES) { .heapMemoryType = deviceLocal ? GR_HEAP_MEMORY_LOCAL : GR_HEAP_MEMORY_REMOTE, - .heapSize = grDevice->memoryProperties.memoryHeaps[vkHeapIndex].size, + .heapSize = memoryProperties->memoryHeaps[vkHeapIndex].size, .pageSize = 65536, // 19.4.3 .flags = (hostVisible ? GR_MEMORY_HEAP_CPU_VISIBLE : 0) | (hostCoherent ? GR_MEMORY_HEAP_CPU_GPU_COHERENT : 0) | @@ -121,11 +123,15 @@ GR_RESULT GR_STDCALL grAllocMemory( // Try to allocate from the best heap vkRes = VK_ERROR_UNKNOWN; for (int i = 0; i < pAllocInfo->heapCount; i++) { + if (pAllocInfo->heaps[i] >= grDevice->memoryHeapCount) { + return GR_ERROR_INVALID_ORDINAL; + } + const VkMemoryAllocateInfo allocateInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = NULL, .allocationSize = pAllocInfo->size, - .memoryTypeIndex = pAllocInfo->heaps[i], + .memoryTypeIndex = grDevice->memoryHeapMap[pAllocInfo->heaps[i]], }; vkRes = VKD.vkAllocateMemory(grDevice->device, &allocateInfo, NULL, &vkMemory); diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 49460151..8bb94cdb 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -232,6 +232,8 @@ typedef struct _GrDevice { VkDevice device; VkPhysicalDevice physicalDevice; VkPhysicalDeviceMemoryProperties memoryProperties; + uint32_t memoryHeapMap[GR_MAX_MEMORY_HEAPS]; + unsigned memoryHeapCount; VkDescriptorSetLayout atomicCounterSetLayout; GrQueue* grUniversalQueue; GrQueue* grComputeQueue; From a11ae6c10b47b6c97b304b16e5bf40b4a5b7d3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Gu=C3=A9rin?= Date: Sun, 6 Nov 2022 17:29:24 -0800 Subject: [PATCH 07/14] mantle: bind atomic set once --- src/mantle/mantle_cmd_buf.c | 13 ++++++++++++- src/mantle/mantle_object.h | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/mantle/mantle_cmd_buf.c b/src/mantle/mantle_cmd_buf.c index 81f050f6..02ba382c 100644 --- a/src/mantle/mantle_cmd_buf.c +++ b/src/mantle/mantle_cmd_buf.c @@ -189,11 +189,22 @@ static void grCmdBufferBindDescriptorSet( const BindPoint* bindPoint = &grCmdBuffer->bindPoints[vkBindPoint]; const GrPipeline* grPipeline = bindPoint->grPipeline; + unsigned descriptorSetCount = 1; const VkDescriptorSet descriptorSets[] = { bindPoint->descriptorSet, grCmdBuffer->atomicCounterSet, }; + if (vkBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS && + !grCmdBuffer->isGraphicsAtomicSetBound) { + descriptorSetCount = 2; + grCmdBuffer->isGraphicsAtomicSetBound = true; + } else if (vkBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE && + !grCmdBuffer->isComputeAtomicSetBound) { + descriptorSetCount = 2; + grCmdBuffer->isComputeAtomicSetBound = true; + } + uint32_t dynamicOffsets[MAX_STAGE_COUNT]; for (unsigned i = 0; i < grPipeline->dynamicOffsetCount; i++) { @@ -201,7 +212,7 @@ static void grCmdBufferBindDescriptorSet( } VKD.vkCmdBindDescriptorSets(grCmdBuffer->commandBuffer, vkBindPoint, grPipeline->pipelineLayout, - 0, COUNT_OF(descriptorSets), descriptorSets, + 0, descriptorSetCount, descriptorSets, grPipeline->dynamicOffsetCount, dynamicOffsets); } diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 8bb94cdb..8e6a68dd 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -165,6 +165,8 @@ typedef struct _GrCmdBuffer { // NOTE: grCmdBufferResetState resets everything past that point bool isBuilding; bool isRendering; + bool isGraphicsAtomicSetBound; + bool isComputeAtomicSetBound; int descriptorPoolIndex; GrFence* submitFence; // Graphics and compute bind points From 45b0c90c6df3406a964be35ee147fe25d8125f2f Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Mon, 11 Oct 2021 19:08:37 +0300 Subject: [PATCH 08/14] amdilc: add SPIRV instructions for function parameters and function call --- src/amdilc/amdilc_spirv.c | 33 +++++++++++++++++++++++++++++++++ src/amdilc/amdilc_spirv.h | 11 +++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/amdilc/amdilc_spirv.c b/src/amdilc/amdilc_spirv.c index a3e8915e..61ce7437 100644 --- a/src/amdilc/amdilc_spirv.c +++ b/src/amdilc/amdilc_spirv.c @@ -522,6 +522,19 @@ void ilcSpvPutFunction( putWord(buffer, type); } +IlcSpvId ilcSpvPutFunctionParameter( + IlcSpvModule* module, + IlcSpvId resultType) +{ + IlcSpvBuffer* buffer = &module->buffer[ID_CODE]; + IlcSpvId id = ilcSpvAllocId(module); + putInstr(buffer, SpvOpFunctionParameter, 3); + putWord(buffer, resultType); + putWord(buffer, id); + return id; +} + + void ilcSpvPutFunctionEnd( IlcSpvModule* module) { @@ -530,6 +543,26 @@ void ilcSpvPutFunctionEnd( putInstr(buffer, SpvOpFunctionEnd, 1); } +IlcSpvId ilcSpvPutFunctionCall( + IlcSpvModule* module, + IlcSpvId resultTypeId, + IlcSpvId functionId, + unsigned paramCount, + IlcSpvId* parameters) +{ + IlcSpvBuffer* buffer = &module->buffer[ID_CODE]; + IlcSpvId id = ilcSpvAllocId(module); + + putInstr(buffer, SpvOpFunctionCall, 4 + paramCount); + putWord(buffer, resultTypeId); + putWord(buffer, id); + putWord(buffer, functionId); + for (unsigned i = 0; i < paramCount; ++i) { + putWord(buffer, parameters[i]); + } + return id; +} + IlcSpvId ilcSpvPutVariable( IlcSpvModule* module, IlcSpvId resultTypeId, diff --git a/src/amdilc/amdilc_spirv.h b/src/amdilc/amdilc_spirv.h index 811ed111..3aefb3d9 100644 --- a/src/amdilc/amdilc_spirv.h +++ b/src/amdilc/amdilc_spirv.h @@ -173,9 +173,20 @@ void ilcSpvPutFunction( SpvFunctionControlMask control, IlcSpvId type); +IlcSpvId ilcSpvPutFunctionParameter( + IlcSpvModule* module, + IlcSpvId resultType); + void ilcSpvPutFunctionEnd( IlcSpvModule* module); +IlcSpvId ilcSpvPutFunctionCall( + IlcSpvModule* module, + IlcSpvId resultTypeId, + IlcSpvId functionId, + unsigned paramCount, + IlcSpvId* parameters); + IlcSpvId ilcSpvPutVariable( IlcSpvModule* module, IlcSpvId resultTypeId, From 06ec72fae6b6e4a4996e3aace8037ee62937ce9d Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Mon, 11 Oct 2021 19:45:34 +0300 Subject: [PATCH 09/14] amdilc: add stage-specific function alongside main --- src/amdilc/amdilc_compiler.c | 56 +++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index a8f3491d..55aa3919 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -105,6 +105,7 @@ typedef struct { unsigned inputCount; IlcInput* inputs; IlcSpvId entryPointId; + IlcSpvId stageFunctionId; IlcSpvId uintId; IlcSpvId uint4Id; IlcSpvId intId; @@ -3354,6 +3355,7 @@ static void emitInstr( if (compiler->isInFunction) { ilcSpvPutFunctionEnd(compiler->module); compiler->isInFunction = false; + compiler->isAfterReturn = false; } break; case IL_OP_ENDIF: @@ -3597,6 +3599,7 @@ IlcShader ilcCompileKernel( .inputCount = 0, .inputs = NULL, .entryPointId = ilcSpvAllocId(&module), + .stageFunctionId = (compiler.kernel->shaderType != IL_SHADER_HULL && compiler.kernel->shaderType != IL_SHADER_DOMAIN) ? ilcSpvAllocId(&module) : 0, .uintId = uintId, .uint4Id = ilcSpvPutVectorType(&module, uintId, 4), .intId = intId, @@ -3614,22 +3617,67 @@ IlcShader ilcCompileKernel( .samplers = NULL, .controlFlowBlockCount = 0, .controlFlowBlocks = NULL, - .isInFunction = true, + .isInFunction = false, .isAfterReturn = false, }; emitImplicitInputs(&compiler); - emitFunc(&compiler, compiler.entryPointId); if (compiler.kernel->shaderType == IL_SHADER_HULL || compiler.kernel->shaderType == IL_SHADER_DOMAIN) { LOGW("unhandled hull/domain shader type\n"); - ilcSpvPutReturn(compiler.module); - ilcSpvPutFunctionEnd(compiler.module); } else { + compiler.isInFunction = true; + emitFunc(&compiler, compiler.stageFunctionId); + const char* stageFunctionName = "stage_main"; + switch (compiler.kernel->shaderType) { + case IL_SHADER_VERTEX: + stageFunctionName = "vs_main"; + break; + case IL_SHADER_GEOMETRY: + stageFunctionName = "gs_main"; + break; + case IL_SHADER_PIXEL: + stageFunctionName = "ps_main"; + break; + case IL_SHADER_COMPUTE: + stageFunctionName = "cs_main"; + break; + default: + break; + } + ilcSpvPutName(compiler.module, compiler.stageFunctionId, stageFunctionName); + for (int i = 0; i < kernel->instrCount; i++) { emitInstr(&compiler, &kernel->instrs[i]); } + // close stage main function if not yet ended + if (compiler.isInFunction) { + if (!compiler.isAfterReturn) { + ilcSpvPutReturn(compiler.module); + compiler.isAfterReturn = true; + } + ilcSpvPutFunctionEnd(compiler.module); + compiler.isInFunction = false; + } + } + + compiler.isInFunction = true; + compiler.isAfterReturn = false; + emitFunc(&compiler, compiler.entryPointId); + if (compiler.stageFunctionId != 0) { + IlcSpvId voidTypeId = ilcSpvPutVoidType(compiler.module); + // call stage main + ilcSpvPutFunctionCall(compiler.module, voidTypeId, compiler.stageFunctionId, 0, NULL); + } + // close real main function + if (compiler.isInFunction) { + if (!compiler.isAfterReturn) { + ilcSpvPutReturn(compiler.module); + compiler.isAfterReturn = true; + } + ilcSpvPutFunctionEnd(compiler.module); + compiler.isInFunction = false; } emitEntryPoint(&compiler); From 548efbdbedd0e9447c976c0c12a56c4e9d2a88f8 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Mon, 11 Oct 2021 19:51:54 +0300 Subject: [PATCH 10/14] amdilc: add vertex shader finalization --- src/amdilc/amdilc_compiler.c | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 55aa3919..2cd4b51a 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -399,6 +399,22 @@ static const IlcRegister* findRegister( return NULL; } +static const IlcRegister* findRegisterByType( + IlcCompiler* compiler, + uint32_t type, + uint32_t importUsage) +{ + for (int i = 0; i < compiler->regCount; i++) { + const IlcRegister* reg = &compiler->regs[i]; + + if (reg->ilType == type && reg->ilImportUsage == importUsage) { + return reg; + } + } + + return NULL; +} + static const IlcRegister* findOrCreateRegister( IlcCompiler* compiler, uint32_t type, @@ -3213,6 +3229,34 @@ static void emitStructuredSrvLoad( storeDestination(compiler, dst, resId, compiler->float4Id); } + +static void finalizeVertexStage( + IlcCompiler* compiler) +{ + const IlcRegister* posReg = findRegisterByType(compiler, IL_REGTYPE_OUTPUT, IL_IMPORTUSAGE_POS); + if (posReg != NULL) { + IlcSpvId outputId = emitVariable(compiler, posReg->typeId, SpvStorageClassOutput); + IlcSpvId locationIdx = posReg->ilNum; + ilcSpvPutDecoration(compiler->module, outputId, SpvDecorationLocation, 1, &locationIdx); + ilcSpvPutDecoration(compiler->module, outputId, SpvDecorationInvariant, 0, NULL); + IlcSpvId loadedPosId = ilcSpvPutLoad(compiler->module, posReg->typeId, posReg->id); + ilcSpvPutStore(compiler->module, outputId, loadedPosId); + const IlcRegister reg = { + .id = outputId, + .interfaceId = outputId, + .typeId = posReg->typeId, + .componentTypeId = posReg->componentTypeId, + .componentCount = posReg->componentCount, + .ilType = IL_REGTYPE_OUTPUT, + .ilNum = posReg->ilNum,//idk what to place here (not needed :) ) + .ilImportUsage = IL_IMPORTUSAGE_GENERIC, + .ilInterpMode = 0, + }; + + addRegister(compiler, ®, "oPos"); + } +} + static void emitImplicitInput( IlcCompiler* compiler, SpvBuiltIn spvBuiltIn, @@ -3670,6 +3714,9 @@ IlcShader ilcCompileKernel( // call stage main ilcSpvPutFunctionCall(compiler.module, voidTypeId, compiler.stageFunctionId, 0, NULL); } + if (compiler.kernel->shaderType == IL_SHADER_VERTEX) { + finalizeVertexStage(&compiler); + } // close real main function if (compiler.isInFunction) { if (!compiler.isAfterReturn) { From be41b1c30cdcad7a27a93c720f5ac44a5a586d06 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Mon, 11 Oct 2021 20:18:45 +0300 Subject: [PATCH 11/14] amdilc: add emitting inputs and outputs of a shader --- src/amdilc/amdilc.h | 2 ++ src/amdilc/amdilc_compiler.c | 19 +++++++++++++++++++ src/mantle/mantle_object.h | 2 ++ src/mantle/mantle_object_man.c | 1 + src/mantle/mantle_shader_pipeline.c | 3 +++ 5 files changed, 27 insertions(+) diff --git a/src/amdilc/amdilc.h b/src/amdilc/amdilc.h index 8fed7302..8044760d 100644 --- a/src/amdilc/amdilc.h +++ b/src/amdilc/amdilc.h @@ -36,6 +36,8 @@ typedef struct _IlcShader { IlcBinding* bindings; unsigned inputCount; IlcInput* inputs; + unsigned outputCount; + uint32_t* outputLocations; char* name; } IlcShader; diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 2cd4b51a..5e48c178 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -104,6 +104,8 @@ typedef struct { IlcBinding* bindings; unsigned inputCount; IlcInput* inputs; + unsigned outputCount; + uint32_t* outputLocations; IlcSpvId entryPointId; IlcSpvId stageFunctionId; IlcSpvId uintId; @@ -981,6 +983,16 @@ static void emitLiteral( addRegister(compiler, ®, "l"); } +static void emitGenericOutputInfo( + IlcCompiler* compiler, + uint32_t location) +{ + // emit output info + compiler->outputCount++; + compiler->outputLocations = realloc(compiler->outputLocations, compiler->outputCount * sizeof(uint32_t)); + compiler->outputLocations[compiler->outputCount - 1] = location; +} + static void emitOutput( IlcCompiler* compiler, const Instruction* instr) @@ -1041,6 +1053,7 @@ static void emitOutput( } else if (importUsage == IL_IMPORTUSAGE_GENERIC) { IlcSpvWord locationIdx = dst->registerNum; ilcSpvPutDecoration(compiler->module, outputId, SpvDecorationLocation, 1, &locationIdx); + emitGenericOutputInfo(compiler, locationIdx); } else { LOGW("unhandled import usage %d\n", importUsage); } @@ -3254,6 +3267,8 @@ static void finalizeVertexStage( }; addRegister(compiler, ®, "oPos"); + // store output location + emitGenericOutputInfo(compiler, locationIdx); } } @@ -3642,6 +3657,8 @@ IlcShader ilcCompileKernel( .bindings = NULL, .inputCount = 0, .inputs = NULL, + .outputCount = 0, + .outputLocations = NULL, .entryPointId = ilcSpvAllocId(&module), .stageFunctionId = (compiler.kernel->shaderType != IL_SHADER_HULL && compiler.kernel->shaderType != IL_SHADER_DOMAIN) ? ilcSpvAllocId(&module) : 0, .uintId = uintId, @@ -3742,6 +3759,8 @@ IlcShader ilcCompileKernel( .bindings = compiler.bindings, .inputCount = compiler.inputCount, .inputs = compiler.inputs, + .outputCount = compiler.outputCount, + .outputLocations = compiler.outputLocations, .name = strdup(name), }; } diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 8e6a68dd..79819479 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -343,6 +343,8 @@ typedef struct _GrShader { IlcBinding* bindings; unsigned inputCount; IlcInput* inputs; + unsigned outputCount; + uint32_t* outputLocations; char* name; } GrShader; diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index 91afbd73..3582b65b 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -121,6 +121,7 @@ GR_RESULT GR_STDCALL grDestroyObject( VKD.vkDestroyShaderModule(grDevice->device, grShader->shaderModule, NULL); free(grShader->bindings); free(grShader->inputs); + free(grShader->outputLocations); free(grShader->name); } break; case GR_OBJ_TYPE_QUERY_POOL: { diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index c4dfdaf1..c40775e5 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -696,6 +696,7 @@ GR_RESULT GR_STDCALL grCreateShader( free(ilcShader.code); free(ilcShader.bindings); free(ilcShader.inputs); + free(ilcShader.outputLocations); free(ilcShader.name); return getGrResult(res); } @@ -711,6 +712,8 @@ GR_RESULT GR_STDCALL grCreateShader( .bindings = ilcShader.bindings, .inputCount = ilcShader.inputCount, .inputs = ilcShader.inputs, + .outputCount = ilcShader.outputCount, + .outputLocations = ilcShader.outputLocations, .name = ilcShader.name, }; From c5131bd3e434bb270752d92347cd93701b2e04fa Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Fri, 1 Oct 2021 08:57:10 +0300 Subject: [PATCH 12/14] amdilc: fixed immediate modifier reading if ADDR_REG_RELATIVE addressing is being used --- src/amdilc/amdilc_decoder.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/amdilc/amdilc_decoder.c b/src/amdilc/amdilc_decoder.c index f43aeb08..6deca24b 100644 --- a/src/amdilc/amdilc_decoder.c +++ b/src/amdilc/amdilc_decoder.c @@ -277,14 +277,20 @@ static unsigned decodeDestination( } else if (relativeAddress == IL_ADDR_REG_RELATIVE) { dst->relativeSrcCount = dimension ? 2 : 1; dst->relativeSrcs = malloc(dst->relativeSrcCount * sizeof(Source)); - for (unsigned i = 0; i < dst->relativeSrcCount; i++) { - idx += decodeSource(&dst->relativeSrcs[i], &token[idx]); + idx += decodeSource(&dst->relativeSrcs[0], &token[idx]); + // the immediate after the first addr reg + if (dst->hasImmediate) { + dst->immediate = token[idx]; + idx++; + } + if (dst->relativeSrcCount > 1) { + idx += decodeSource(&dst->relativeSrcs[1], &token[idx]); } } else { assert(false); } - if (dst->hasImmediate) { + if (dst->hasImmediate && relativeAddress != IL_ADDR_REG_RELATIVE) { dst->immediate = token[idx]; idx++; } @@ -355,14 +361,20 @@ static unsigned decodeSource( } else if (relativeAddress == IL_ADDR_REG_RELATIVE) { src->srcCount = dimension ? 2 : 1; src->srcs = malloc(src->srcCount * sizeof(Source)); - for (unsigned i = 0; i < src->srcCount; i++) { - idx += decodeSource(&src->srcs[i], &token[idx]); + idx += decodeSource(&src->srcs[0], &token[idx]); + // the immediate after the first addr reg + if (src->hasImmediate) { + src->immediate = token[idx]; + idx++; + } + if (src->srcCount > 1) { + idx += decodeSource(&src->srcs[1], &token[idx]); } } else { assert(false); } - if (src->hasImmediate) { + if (src->hasImmediate && relativeAddress != IL_ADDR_REG_RELATIVE) { src->immediate = token[idx]; idx++; } From 213045498c293675e4ac590cad88ab2f864349d7 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Mon, 11 Oct 2021 23:50:12 +0300 Subject: [PATCH 13/14] amdilc: add arrayed registers type handling --- src/amdilc/amdilc_compiler.c | 123 +++++++++++++++++++++++++++-------- 1 file changed, 96 insertions(+), 27 deletions(-) diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 5e48c178..45925ec3 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -37,8 +37,11 @@ typedef struct { IlcSpvId id; IlcSpvId interfaceId; IlcSpvId typeId; + IlcSpvId vecTypeId; IlcSpvId componentTypeId; unsigned componentCount; + unsigned arrayItemCount; + unsigned arrayItemOffset;//for use with arrayed registers like InnerTessLevel, which has offsets uint32_t ilType; // ILRegType uint32_t ilNum; uint8_t ilImportUsage; // Input/output only @@ -374,10 +377,11 @@ static const IlcRegister* addRegister( const IlcRegister* reg, const char* identifier) { - char name[32]; - snprintf(name, sizeof(name), "%s%u", identifier, reg->ilNum); - ilcSpvPutName(compiler->module, reg->id, name); - + if (reg->interfaceId != 0) { + char name[32]; + snprintf(name, sizeof(name), "%s%u", identifier, reg->ilNum); + ilcSpvPutName(compiler->module, reg->id, name); + } compiler->regCount++; compiler->regs = realloc(compiler->regs, sizeof(IlcRegister) * compiler->regCount); compiler->regs[compiler->regCount - 1] = *reg; @@ -433,8 +437,10 @@ static const IlcRegister* findOrCreateRegister( .id = tempId, .interfaceId = tempId, .typeId = tempTypeId, + .vecTypeId = tempTypeId, .componentTypeId = compiler->floatId, .componentCount = 4, + .arrayItemCount = 0, .ilType = type, .ilNum = num, .ilImportUsage = 0, @@ -593,19 +599,28 @@ static IlcSpvId loadSource( { const IlcRegister* reg; + uint32_t regNum = src->registerNum;// = src->srcCount == 0 ? src->registerNum : src->srcs[src->srcCount - 1].registerNum; + if (src->srcCount > 0) { + //idk how to select register here + for (unsigned i = 0; i < src->srcCount; ++i) { + if (src->srcs[i].registerType == src->registerType) { + regNum = src->srcs[i].registerNum; + } + } + } if ((src->swizzle[0] == IL_COMPSEL_0 || src->swizzle[0] == IL_COMPSEL_1) && (src->swizzle[1] == IL_COMPSEL_0 || src->swizzle[1] == IL_COMPSEL_1) && (src->swizzle[2] == IL_COMPSEL_0 || src->swizzle[2] == IL_COMPSEL_1) && (src->swizzle[3] == IL_COMPSEL_0 || src->swizzle[3] == IL_COMPSEL_1)) { // We're reading only 0 or 1s so it's safe to create a temporary register // Example: r4096.0001 as seen in 3DMark shader - reg = findOrCreateRegister(compiler, src->registerType, src->registerNum); + reg = findOrCreateRegister(compiler, src->registerType, regNum); } else { - reg = findRegister(compiler, src->registerType, src->registerNum); + reg = findRegister(compiler, src->registerType, regNum); } if (reg == NULL) { - LOGE("source register %d %d not found\n", src->registerType, src->registerNum); + LOGE("source register %d %d not found\n", src->registerType, regNum); return 0; } @@ -614,7 +629,7 @@ static IlcSpvId loadSource( src->registerType == IL_REGTYPE_IMMED_CONST_BUFF) { // 1D arrays IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassPrivate, - reg->typeId); + reg->vecTypeId); IlcSpvId indexId = ilcSpvPutConstant(compiler->module, compiler->intId, src->hasImmediate ? src->immediate : 0); if (src->srcCount > 0) { @@ -635,7 +650,7 @@ static IlcSpvId loadSource( ptrId = reg->id; } - IlcSpvId varId = ilcSpvPutLoad(compiler->module, reg->typeId, ptrId); + IlcSpvId varId = ilcSpvPutLoad(compiler->module, reg->vecTypeId, ptrId); IlcSpvId componentTypeId = 0; if (reg->componentTypeId == compiler->boolId) { @@ -747,10 +762,11 @@ static void storeDestination( return; } - if (typeId != reg->typeId && reg->componentCount == 4) { + if (typeId != reg->vecTypeId && reg->componentCount == 4) { // Need to cast to the expected type - varId = ilcSpvPutBitcast(compiler->module, reg->typeId, varId); + varId = ilcSpvPutBitcast(compiler->module, reg->vecTypeId, varId); } + //TODO: cast to other types IlcSpvId ptrId = 0; if (dst->registerType == IL_REGTYPE_ITEMP) { @@ -772,16 +788,34 @@ static void storeDestination( } ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, reg->id, 1, &indexId); } else { + if (dst->hasImmediate) { + LOGW("unhandled immediate\n"); + } if (dst->absoluteSrc != NULL) { LOGW("unhandled absolute source\n"); } if (dst->relativeSrcCount > 0) { - LOGW("unhandled relative source (%u)\n", dst->relativeSrcCount); - } - if (dst->hasImmediate) { - LOGW("unhandled immediate\n"); + if (dst->relativeSrcCount == 1 && reg->arrayItemCount > 0) { + IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassOutput, reg->vecTypeId); + + IlcSpvId indexId = loadSource(compiler, &dst->relativeSrcs[0], COMP_MASK_XYZW, compiler->int4Id); + indexId = emitVectorTrim(compiler, indexId, compiler->int4Id, COMP_INDEX_X, 1); + // HACK: helps to handle the cases with dcl_array of gl_TessLevel stuff + ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, (reg->vecTypeId == reg->typeId) ? reg->interfaceId : reg->id, 1, &indexId); + } else { + if (reg->arrayItemCount == 0) { + LOGW("expected to have arrayed type on register %d\n", dst->registerNum); + } + ptrId = reg->id; + LOGW("unhandled relative source (%u) for type %u\n", dst->relativeSrcCount, dst->registerType); + } + } else if (reg->arrayItemCount > 0 && reg->vecTypeId != reg->typeId) { + IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassOutput, reg->vecTypeId); + IlcSpvId indexId = ilcSpvPutConstant(compiler->module, compiler->intId, reg->arrayItemOffset); + ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, reg->id, 1, &indexId); + } else { + ptrId = reg->id; } - ptrId = reg->id; } if (dst->shiftScale != IL_SHIFT_NONE) { @@ -830,7 +864,7 @@ static void storeDestination( (dst->component[2] == IL_MODCOMP_0 || dst->component[2] == IL_MODCOMP_1) || (dst->component[3] == IL_MODCOMP_0 || dst->component[3] == IL_MODCOMP_1)) { // Select components from {x, y, z, w, 0.f, 1.f} - IlcSpvId zeroOneId = emitZeroOneVector(compiler, compiler->floatId); + IlcSpvId zeroOneId = emitZeroOneVector(compiler, reg->componentTypeId); const IlcSpvWord components[] = { dst->component[0] == IL_MODCOMP_0 ? 4 : (dst->component[0] == IL_MODCOMP_1 ? 5 : 0), @@ -905,9 +939,11 @@ static void emitConstBuffer( const IlcRegister constBufferReg = { .id = arrayId, .interfaceId = arrayId, - .typeId = typeId, + .typeId = arrayTypeId, + .vecTypeId = typeId, .componentTypeId = compiler->floatId, .componentCount = 4, + .arrayItemCount = arraySize, .ilType = IL_REGTYPE_IMMED_CONST_BUFF, .ilNum = 0, .ilImportUsage = 0, @@ -935,8 +971,10 @@ static void emitIndexedTempArray( .id = arrayId, .interfaceId = arrayId, .typeId = compiler->float4Id, + .vecTypeId = compiler->float4Id, .componentTypeId = compiler->floatId, .componentCount = 4, + .arrayItemCount = arraySize, .ilType = src->registerType, .ilNum = src->registerNum, .ilImportUsage = 0, @@ -965,15 +1003,16 @@ static void emitLiteral( }; IlcSpvId compositeId = ilcSpvPutConstantComposite(compiler->module, literalTypeId, 4, consistuentIds); - ilcSpvPutStore(compiler->module, literalId, compositeId); const IlcRegister reg = { .id = literalId, .interfaceId = literalId, .typeId = literalTypeId, + .vecTypeId = literalTypeId, .componentTypeId = compiler->floatId, .componentCount = 4, + .arrayItemCount = 0, .ilType = src->registerType, .ilNum = src->registerNum, .ilImportUsage = 0, @@ -1016,8 +1055,11 @@ static void emitOutput( IlcSpvId outputId = 0; IlcSpvId outputInterfaceId = 0; IlcSpvId outputComponentTypeId = 0; + IlcSpvId outputVectorTypeId = 0; IlcSpvId outputTypeId = 0; unsigned outputComponentCount = 0; + unsigned outputArrayItemCount = 0; + unsigned outputArrayItemOffset = 0; const char* outputPrefix = NULL; if (dst->registerType == IL_REGTYPE_OUTPUT && importUsage == IL_IMPORTUSAGE_CLIPDISTANCE) { @@ -1041,6 +1083,7 @@ static void emitOutput( outputPrefix = "o"; } else if (dst->registerType == IL_REGTYPE_OUTPUT) { outputTypeId = compiler->float4Id; + outputVectorTypeId = outputTypeId; outputId = emitVariable(compiler, outputTypeId, SpvStorageClassOutput); outputInterfaceId = outputId; outputComponentTypeId = compiler->floatId; @@ -1089,18 +1132,21 @@ static void emitOutput( outputComponentCount = 1; outputPrefix = "oMask"; } else { - LOGW("unhandled output register type %u\n", dst->registerType); + LOGE("unhandled output register type %d %d\n", dst->registerType, importUsage); assert(false); } const IlcRegister reg = { .id = outputId, - .interfaceId = outputInterfaceId, + .interfaceId = (dupeReg != NULL) ? 0 : outputInterfaceId, .typeId = outputTypeId, + .vecTypeId = outputVectorTypeId == 0 ? outputTypeId : outputVectorTypeId, .componentTypeId = outputComponentTypeId, .componentCount = outputComponentCount, + .arrayItemCount = outputArrayItemCount, + .arrayItemOffset = outputArrayItemOffset, .ilType = dst->registerType, - .ilNum = dst->registerNum, + .ilNum = dst->absoluteSrc == NULL ? dst->registerNum : dst->absoluteSrc->registerNum, .ilImportUsage = importUsage, .ilInterpMode = 0, }; @@ -1117,7 +1163,9 @@ static void emitInput( IlcSpvId inputId = 0; IlcSpvId inputTypeId = 0; IlcSpvId inputComponentTypeId = 0; + unsigned inputArrayItemCount = 0; unsigned inputComponentCount = 0; + IlcSpvId inputVecTypeId = 0; assert(instr->dstCount == 1 && instr->srcCount == 0 && @@ -1130,6 +1178,7 @@ static void emitInput( dst->shiftScale == IL_SHIFT_NONE); const IlcRegister* dupeReg = findRegister(compiler, dst->registerType, dst->registerNum); + const char* name = "v"; if (dupeReg != NULL) { // Inputs are allowed to be redeclared with different components. // Can be safely ignored as long as the import usage and interp mode are equivalent. @@ -1225,17 +1274,20 @@ static void emitInput( const IlcRegister reg = { .id = inputId, - .interfaceId = inputId, + .interfaceId = (dupeReg != NULL) ? 0 : inputId, .typeId = inputTypeId, + .vecTypeId = inputVecTypeId == 0 ? inputTypeId : inputVecTypeId, .componentTypeId = inputComponentTypeId, .componentCount = inputComponentCount, + .arrayItemCount = inputArrayItemCount, + .arrayItemOffset = 0, .ilType = dst->registerType, .ilNum = dst->registerNum, .ilImportUsage = importUsage, .ilInterpMode = interpMode, }; - addRegister(compiler, ®, "v"); + addRegister(compiler, ®, name); } static void emitResource( @@ -3242,7 +3294,6 @@ static void emitStructuredSrvLoad( storeDestination(compiler, dst, resId, compiler->float4Id); } - static void finalizeVertexStage( IlcCompiler* compiler) { @@ -3258,8 +3309,11 @@ static void finalizeVertexStage( .id = outputId, .interfaceId = outputId, .typeId = posReg->typeId, + .vecTypeId = posReg->vecTypeId, .componentTypeId = posReg->componentTypeId, .componentCount = posReg->componentCount, + .arrayItemCount = 0, + .arrayItemOffset = 0, .ilType = IL_REGTYPE_OUTPUT, .ilNum = posReg->ilNum,//idk what to place here (not needed :) ) .ilImportUsage = IL_IMPORTUSAGE_GENERIC, @@ -3295,8 +3349,10 @@ static void emitImplicitInput( .id = inputId, .interfaceId = inputId, .typeId = inputTypeId, + .vecTypeId = inputTypeId, .componentTypeId = componentTypeId, .componentCount = componentCount, + .arrayItemCount = 0, .ilType = ilType, .ilNum = 0, .ilImportUsage = 0, @@ -3602,25 +3658,38 @@ static void emitEntryPoint( for (int i = 0; i < compiler->regCount; i++) { const IlcRegister* reg = &compiler->regs[i]; - + if (reg->interfaceId == 0) { + continue; + } interfaces[interfaceIndex] = reg->interfaceId; interfaceIndex++; + if (compiler->regs[i].id == 0) { + LOGW("got empty reg %d %d %d\n", compiler->regs[i].ilNum, compiler->regs[i].ilType, compiler->regs[i].ilImportUsage); + } + } for (int i = 0; i < compiler->resourceCount; i++) { const IlcResource* resource = &compiler->resources[i]; interfaces[interfaceIndex] = resource->id; + if (compiler->resources[i].id == 0) { + LOGW("got empty sampler %d\n", resource->id); + } + interfaceIndex++; } for (int i = 0; i < compiler->samplerCount; i++) { const IlcSampler* sampler = &compiler->samplers[i]; interfaces[interfaceIndex] = sampler->id; + if (compiler->regs[i].id == 0) { + LOGW("got empty sampler %d\n", sampler->id); + } interfaceIndex++; } ilcSpvPutEntryPoint(compiler->module, compiler->entryPointId, execution, name, - interfaceCount, interfaces); + interfaceIndex, interfaces); ilcSpvPutName(compiler->module, compiler->entryPointId, name); switch (compiler->kernel->shaderType) { From dfed637e03a6e3fe43fe3d659a77f90eb0d7f268 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Tue, 12 Oct 2021 00:24:38 +0300 Subject: [PATCH 14/14] amdilc, mantle: handle tesselation shaders translation --- src/amdilc/amdilc.c | 9 + src/amdilc/amdilc.h | 11 + src/amdilc/amdilc_compiler.c | 681 +++++++++++++++++++++-- src/amdilc/amdilc_dump.c | 2 +- src/amdilc/amdilc_internal.h | 6 + src/amdilc/amdilc_passthrough_compiler.c | 278 +++++++++ src/amdilc/amdilc_spirv.c | 41 ++ src/amdilc/amdilc_spirv.h | 6 + src/amdilc/meson.build | 1 + src/mantle/mantle_object.h | 4 + src/mantle/mantle_object_man.c | 6 + src/mantle/mantle_shader_pipeline.c | 41 +- 12 files changed, 1028 insertions(+), 58 deletions(-) create mode 100644 src/amdilc/amdilc_passthrough_compiler.c diff --git a/src/amdilc/amdilc.c b/src/amdilc/amdilc.c index 74dd3def..e0ee73e1 100644 --- a/src/amdilc/amdilc.c +++ b/src/amdilc/amdilc.c @@ -165,3 +165,12 @@ void ilcDisassembleShader( freeKernel(kernel); free(kernel); } + +IlcRecompiledShader ilcRecompileShader( + const void* code, + unsigned size, + const unsigned* inputPassthroughLocations, + unsigned passthroughCount) +{ + return ilcRecompileKernel(code, size / sizeof(uint32_t), inputPassthroughLocations, passthroughCount); +} diff --git a/src/amdilc/amdilc.h b/src/amdilc/amdilc.h index 8044760d..70a723a3 100644 --- a/src/amdilc/amdilc.h +++ b/src/amdilc/amdilc.h @@ -41,10 +41,21 @@ typedef struct _IlcShader { char* name; } IlcShader; +typedef struct _IlcRecompiledShader { + unsigned codeSize; + uint32_t* code; +} IlcRecompiledShader; + IlcShader ilcCompileShader( const void* code, unsigned size); +IlcRecompiledShader ilcRecompileShader( + const void* code, + unsigned size, + const unsigned* inputPassthroughLocations, + unsigned passthroughCount); + IlcShader ilcCompileRectangleGeometryShader( unsigned psInputCount, const IlcInput* psInputs); diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 45925ec3..3639a372 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -20,6 +20,8 @@ #define COMP_MASK_XYZW (COMP_MASK_XYZ | COMP_MASK_W) #define NO_STRIDE_INDEX (-1) +#define V_ARRAY_REG_ID 0xFEFFFFFF + typedef enum { RES_TYPE_GENERIC, RES_TYPE_LDS, @@ -33,6 +35,12 @@ typedef enum { BLOCK_SWITCH_CASE = 4, } IlcControlFlowBlockType; +typedef enum { + CONTROL_POINT, + FORK, + JOIN +} IlcHullPhase; + typedef struct { IlcSpvId id; IlcSpvId interfaceId; @@ -64,12 +72,14 @@ typedef struct { } IlcSampler; typedef struct { + IlcSpvId labelBeginId; IlcSpvId labelElseId; IlcSpvId labelEndId; bool hasElseBlock; } IlcIfElseBlock; typedef struct { + IlcSpvId labelBeginId; IlcSpvId labelHeaderId; IlcSpvId labelContinueId; IlcSpvId labelBreakId; @@ -100,6 +110,13 @@ typedef struct { }; } IlcControlFlowBlock; +typedef struct { + IlcHullPhase type; + unsigned phaseId; + IlcSpvId functionId; + unsigned invocationCount; +} IlcHullPhaseBlock; + typedef struct { const Kernel* kernel; IlcSpvModule* module; @@ -109,6 +126,7 @@ typedef struct { IlcInput* inputs; unsigned outputCount; uint32_t* outputLocations; + bool emitHullOutputFinalize; IlcSpvId entryPointId; IlcSpvId stageFunctionId; IlcSpvId uintId; @@ -120,6 +138,8 @@ typedef struct { IlcSpvId boolId; IlcSpvId bool4Id; unsigned currentStrideIndex; + unsigned inputArraySize; + IlcSpvWord inputControlPointSize; unsigned regCount; IlcRegister* regs; unsigned resourceCount; @@ -128,6 +148,16 @@ typedef struct { IlcSampler* samplers; unsigned controlFlowBlockCount; IlcControlFlowBlock* controlFlowBlocks; + IlcSpvId currentFunctionLabelId; + unsigned forkPhaseCount; + IlcHullPhaseBlock* forkPhases; + unsigned joinPhaseCount; + IlcHullPhaseBlock* joinPhases; + IlcHullPhaseBlock controlPointPhase; + struct { + IlcHullPhase type; + unsigned phaseId; + } currentHullPhase; bool isInFunction; bool isAfterReturn; } IlcCompiler; @@ -377,11 +407,9 @@ static const IlcRegister* addRegister( const IlcRegister* reg, const char* identifier) { - if (reg->interfaceId != 0) { - char name[32]; - snprintf(name, sizeof(name), "%s%u", identifier, reg->ilNum); - ilcSpvPutName(compiler->module, reg->id, name); - } + char name[32]; + snprintf(name, sizeof(name), "%s%u", identifier, reg->ilNum); + ilcSpvPutName(compiler->module, reg->id, name); compiler->regCount++; compiler->regs = realloc(compiler->regs, sizeof(IlcRegister) * compiler->regCount); compiler->regs[compiler->regCount - 1] = *reg; @@ -413,7 +441,7 @@ static const IlcRegister* findRegisterByType( for (int i = 0; i < compiler->regCount; i++) { const IlcRegister* reg = &compiler->regs[i]; - if (reg->ilType == type && reg->ilImportUsage == importUsage) { + if (reg->ilType == type && reg->ilImportUsage == importUsage && reg->interfaceId != 0) { return reg; } } @@ -431,6 +459,7 @@ static const IlcRegister* findOrCreateRegister( if (reg == NULL && type == IL_REGTYPE_TEMP) { // Create temporary register IlcSpvId tempTypeId = compiler->float4Id; + IlcSpvId tempId = emitVariable(compiler, tempTypeId, SpvStorageClassPrivate); const IlcRegister tempReg = { @@ -640,6 +669,31 @@ static IlcSpvId loadSource( indexId = ilcSpvPutOp2(compiler->module, SpvOpIAdd, compiler->intId, indexId, relId); } ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, reg->id, 1, &indexId); + } else if (src->registerType == IL_REGTYPE_INPUTCP) { + IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassInput, + reg->vecTypeId); + IlcSpvId indexId; + if (src->srcCount > 1) {//last source is always the attribute + IlcSpvId rel4Id = loadSource(compiler, &src->srcs[0], COMP_MASK_XYZW, + compiler->int4Id); + indexId = emitVectorTrim(compiler, rel4Id, compiler->int4Id, 0, 1); + } else if (src->srcCount == 1 && src->srcs[0].registerType == IL_REGTYPE_INPUTCP) { + indexId = ilcSpvPutConstant(compiler->module, compiler->intId, src->registerNum); + } else { + LOGE("unhandled source count %d\n", src->srcCount); + assert(false); + } + if (src->hasImmediate) { + IlcSpvId immId = ilcSpvPutConstant(compiler->module, compiler->intId, src->immediate); + indexId = ilcSpvPutOp2(compiler->module, SpvOpIAdd, compiler->intId, indexId, immId); + } + if (compiler->kernel->shaderType == IL_SHADER_DOMAIN) { + IlcSpvId indexIds[] = { indexId, ilcSpvPutConstant(compiler->module, compiler->intId, reg->ilNum) }; + ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, reg->id, 2, indexIds); + } else { + //extract the first register num + ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, reg->id, 1, &indexId); + } } else { if (src->hasImmediate) { LOGW("unhandled immediate\n"); @@ -1040,6 +1094,7 @@ static void emitOutput( const Destination* dst = &instr->dsts[0]; const IlcRegister* dupeReg = findRegister(compiler, dst->registerType, dst->registerNum); + if (dupeReg != NULL) { // Outputs are allowed to be redeclared with different components. // Can be safely ignored as long as the import usage is equivalent. @@ -1084,19 +1139,59 @@ static void emitOutput( } else if (dst->registerType == IL_REGTYPE_OUTPUT) { outputTypeId = compiler->float4Id; outputVectorTypeId = outputTypeId; - outputId = emitVariable(compiler, outputTypeId, SpvStorageClassOutput); - outputInterfaceId = outputId; outputComponentTypeId = compiler->floatId; outputComponentCount = 4; + if (importUsage == IL_IMPORTUSAGE_EDGE_TESSFACTOR || importUsage == IL_IMPORTUSAGE_INSIDE_TESSFACTOR) { + dupeReg = findRegisterByType(compiler, dst->registerType, importUsage); + outputComponentTypeId = compiler->floatId; + outputVectorTypeId = outputComponentTypeId; + outputTypeId = outputVectorTypeId; + outputComponentCount = 1; + if (dupeReg != NULL) { + IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassOutput, + outputTypeId); + IlcSpvId indexId = ilcSpvPutConstant(compiler->module, compiler->intId, dst->registerNum - dupeReg->ilNum); + outputId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, dupeReg->interfaceId, 1, &indexId); + } else { + outputArrayItemCount = (importUsage == IL_IMPORTUSAGE_INSIDE_TESSFACTOR) ? 2 : 4; + IlcSpvId lengthId = ilcSpvPutConstant(compiler->module, compiler->intId, outputArrayItemCount); + IlcSpvId arrayTypeId = ilcSpvPutArrayType(compiler->module, outputComponentTypeId, lengthId); + outputInterfaceId = emitVariable(compiler, arrayTypeId, SpvStorageClassOutput); + + IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassOutput, + outputTypeId); + IlcSpvId indexId = ilcSpvPutConstant(compiler->module, compiler->intId, 0); + outputId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, outputInterfaceId, 1, &indexId); + } + } else { + outputTypeId = compiler->float4Id; + outputId = emitVariable(compiler, outputTypeId, SpvStorageClassOutput); + outputInterfaceId = outputId; + outputComponentCount = 4; + } outputPrefix = "o"; - + if (dupeReg != NULL) { + goto register_create; + } if (importUsage == IL_IMPORTUSAGE_POS) { IlcSpvWord builtInType = SpvBuiltInPosition; - ilcSpvPutDecoration(compiler->module, outputId, SpvDecorationBuiltIn, 1, &builtInType); + ilcSpvPutDecoration(compiler->module, outputInterfaceId, SpvDecorationBuiltIn, 1, &builtInType); } else if (importUsage == IL_IMPORTUSAGE_GENERIC) { IlcSpvWord locationIdx = dst->registerNum; - ilcSpvPutDecoration(compiler->module, outputId, SpvDecorationLocation, 1, &locationIdx); + ilcSpvPutDecoration(compiler->module, outputInterfaceId, SpvDecorationLocation, 1, &locationIdx); emitGenericOutputInfo(compiler, locationIdx); + } else if (importUsage == IL_IMPORTUSAGE_EDGE_TESSFACTOR) { + //glLevelOuter + ilcSpvPutDecoration(compiler->module, outputInterfaceId, SpvDecorationPatch, 0, NULL); + IlcSpvWord builtInType = SpvBuiltInTessLevelOuter; + ilcSpvPutDecoration(compiler->module, outputInterfaceId, SpvDecorationBuiltIn, 1, &builtInType); + outputPrefix = "oTessLevelOuter"; + } else if (importUsage == IL_IMPORTUSAGE_INSIDE_TESSFACTOR) { + //glLevelOuter + ilcSpvPutDecoration(compiler->module, outputInterfaceId, SpvDecorationPatch, 0, NULL); + IlcSpvWord builtInType = SpvBuiltInTessLevelInner; + ilcSpvPutDecoration(compiler->module, outputInterfaceId, SpvDecorationBuiltIn, 1, &builtInType); + outputPrefix = "oTessLevelInner"; } else { LOGW("unhandled import usage %d\n", importUsage); } @@ -1135,7 +1230,7 @@ static void emitOutput( LOGE("unhandled output register type %d %d\n", dst->registerType, importUsage); assert(false); } - +register_create: const IlcRegister reg = { .id = outputId, .interfaceId = (dupeReg != NULL) ? 0 : outputInterfaceId, @@ -1154,6 +1249,38 @@ static void emitOutput( addRegister(compiler, ®, outputPrefix); } + +static IlcSpvId createInvocationId( + IlcCompiler* compiler) +{ + const IlcRegister* existingInvocationIdReg = findRegister(compiler, IL_REGTYPE_OUTPUTCP, 0); + if (existingInvocationIdReg != NULL) { + return existingInvocationIdReg->id; + } + //just for the moment there is no invocationId + IlcSpvId invocationId = emitVariable(compiler, compiler->intId, SpvStorageClassInput); + + IlcSpvWord builtInType = SpvBuiltInInvocationId; + ilcSpvPutDecoration(compiler->module, invocationId, SpvDecorationBuiltIn, 1, &builtInType); + const IlcRegister invocationIdReg = { + .id = invocationId, + .interfaceId = invocationId, + .typeId = compiler->intId, + .vecTypeId = compiler->intId, + .componentTypeId = compiler->intId, + .componentCount = 1, + .arrayItemCount = 0, + .arrayItemOffset = 0, + .ilType = IL_REGTYPE_OUTPUTCP, + .ilNum = 0,//idk what to place here (not needed :) ) + .ilImportUsage = IL_IMPORTUSAGE_GENERIC, + .ilInterpMode = 0, + }; + + addRegister(compiler, &invocationIdReg, "invocationId"); + return invocationId; +} + static void emitInput( IlcCompiler* compiler, const Instruction* instr) @@ -1173,12 +1300,19 @@ static void emitInput( const Destination* dst = &instr->dsts[0]; - assert(dst->registerType == IL_REGTYPE_INPUT && + if (!((dst->registerType == IL_REGTYPE_INPUT || dst->registerType == IL_REGTYPE_SHADER_INSTANCE_ID || + dst->registerType == IL_REGTYPE_INPUTCP || dst->registerType == IL_REGTYPE_PATCHCONST || + dst->registerType == IL_REGTYPE_DOMAINLOCATION) && + !dst->clamp && + dst->shiftScale == IL_SHIFT_NONE)) { + LOGE("unhandled input data %d %d\n",dst->registerType, dst->shiftScale); + } + assert((dst->registerType == IL_REGTYPE_INPUT || dst->registerType == IL_REGTYPE_SHADER_INSTANCE_ID || dst->registerType == IL_REGTYPE_INPUTCP || + dst->registerType == IL_REGTYPE_PATCHCONST || dst->registerType == IL_REGTYPE_DOMAINLOCATION) && !dst->clamp && dst->shiftScale == IL_SHIFT_NONE); - const IlcRegister* dupeReg = findRegister(compiler, dst->registerType, dst->registerNum); - const char* name = "v"; + const IlcRegister* dupeReg = findRegister(compiler, dst->registerType, dst->absoluteSrc == NULL ? dst->registerNum : dst->absoluteSrc->registerNum); if (dupeReg != NULL) { // Inputs are allowed to be redeclared with different components. // Can be safely ignored as long as the import usage and interp mode are equivalent. @@ -1192,8 +1326,91 @@ static void emitInput( assert(false); } } - - if (importUsage == IL_IMPORTUSAGE_POS) { + const char* name = "v"; + if (dst->registerType == IL_REGTYPE_SHADER_INSTANCE_ID) { + inputTypeId = compiler->intId; + inputComponentTypeId = compiler->intId; + inputComponentCount = 1; + inputId = emitVariable(compiler, inputTypeId, SpvStorageClassPrivate); + + IlcSpvId invocationVarId = createInvocationId(compiler); + IlcSpvId vInvocationId = ilcSpvPutLoad(compiler->module, compiler->intId, invocationVarId); + ilcSpvPutStore(compiler->module, inputId, vInvocationId); + } else if (dst->registerType == IL_REGTYPE_INPUTCP) { + name = "vicp"; + if (compiler->kernel->shaderType == IL_SHADER_DOMAIN) { + dupeReg = findRegister(compiler, IL_REGTYPE_INPUTCP, V_ARRAY_REG_ID); + if (dupeReg == NULL) { + LOGE("failed to find vertex input array for domain shader\n"); + assert(false); + } + inputComponentTypeId = compiler->floatId; + inputVecTypeId = compiler->float4Id; + inputComponentCount = 4; + inputArrayItemCount = dst->absoluteSrc == NULL ? 4 : dst->registerNum; + IlcSpvId lengthId = ilcSpvPutConstant(compiler->module, compiler->intId, dupeReg->arrayItemCount); + inputTypeId = ilcSpvPutArrayType(compiler->module, inputVecTypeId, lengthId); + inputId = dupeReg->id; + } else { + inputComponentTypeId = compiler->floatId; + inputVecTypeId = compiler->float4Id; + inputComponentCount = 4; + inputArrayItemCount = dst->absoluteSrc == NULL ? 4 : dst->registerNum; + IlcSpvId lengthId = ilcSpvPutConstant(compiler->module, compiler->intId, inputArrayItemCount); + inputTypeId = ilcSpvPutArrayType(compiler->module, inputVecTypeId, lengthId); + inputId = emitVariable(compiler, inputTypeId, SpvStorageClassInput); + if (importUsage == IL_IMPORTUSAGE_GENERIC) { + IlcSpvWord locationIdx = (dst->absoluteSrc == NULL ? dst->registerNum : dst->absoluteSrc->registerNum); + ilcSpvPutDecoration(compiler->module, inputId, SpvDecorationLocation, 1, &locationIdx); + } else { + LOGW("unhandled import usage for vicp: %d\n", importUsage); + } + } + } else if (dst->registerType == IL_REGTYPE_PATCHCONST) { + //TODO: just search it by output and then loadSource (unimplemented for tess level outer - and other arrayed types) + //TODO: also implement support for this for domain (tess eval) sahder + if (compiler->kernel->shaderType == IL_SHADER_DOMAIN) { + LOGW("support for patch constants for domain shaders isn't implemented\n"); + } else if (compiler->kernel->shaderType != IL_SHADER_HULL) { + LOGE("unexpected shader type for patchconst (vpc): %d\n", compiler->kernel->shaderType); + } + const IlcRegister* edgeReg = findRegisterByType(compiler, IL_REGTYPE_OUTPUT, IL_IMPORTUSAGE_EDGE_TESSFACTOR); + if (edgeReg == NULL) { + LOGW("failed to find vTessLevelOuter\n"); + assert(false); + } + // since patchconst is just tessfactor, let's just store it here + //this is just tess level outer + inputComponentTypeId = compiler->floatId; + inputComponentCount = 4; + inputTypeId = compiler->float4Id; + inputId = emitVariable(compiler, inputTypeId, SpvStorageClassPrivate); + name = "vpc"; + //TODO: change stuff here + if (importUsage != IL_IMPORTUSAGE_GENERIC) { + //there can be collisions + //IlcSpvWord locationIdx = (dst->absoluteSrc == NULL ? dst->registerNum : dst->absoluteSrc->registerNum); + //ilcSpvPutDecoration(compiler->module, inputId, SpvDecorationLocation, 1, &locationIdx); + //} else { + LOGW("unhandled import usage for vpc: %d\n", importUsage); + } + //TODO: use loadSource function + IlcSpvId indexId = ilcSpvPutConstant(compiler->module, compiler->intId, dst->registerNum); + IlcSpvId zeroFloatId = ilcSpvPutConstant(compiler->module, compiler->floatId, 0.0f); + IlcSpvId refId = ilcSpvPutAccessChain(compiler->module, ilcSpvPutPointerType(compiler->module, SpvStorageClassOutput, edgeReg->componentTypeId), edgeReg->interfaceId, 1, &indexId); + IlcSpvId valueId = ilcSpvPutLoad(compiler->module, edgeReg->componentTypeId, refId); + const IlcSpvWord constituents[] = { valueId, zeroFloatId, zeroFloatId, zeroFloatId }; + IlcSpvId vecId = ilcSpvPutCompositeConstruct(compiler->module, compiler->float4Id, 4, constituents); + ilcSpvPutStore(compiler->module, inputId, vecId); + } else if (dst->registerType == IL_REGTYPE_DOMAINLOCATION) { + inputComponentTypeId = compiler->floatId; + inputComponentCount = 3; + inputTypeId = inputVecTypeId = ilcSpvPutVectorType(compiler->module, inputComponentTypeId, inputComponentCount); + inputId = emitVariable(compiler, inputTypeId, SpvStorageClassInput); + name = "tessCoord"; + IlcSpvWord builtInType = SpvBuiltInTessCoord; + ilcSpvPutDecoration(compiler->module, inputId, SpvDecorationBuiltIn, 1, &builtInType); + } else if (importUsage == IL_IMPORTUSAGE_POS) { inputComponentTypeId = compiler->floatId; inputComponentCount = 4; inputTypeId = compiler->float4Id; @@ -1282,7 +1499,7 @@ static void emitInput( .arrayItemCount = inputArrayItemCount, .arrayItemOffset = 0, .ilType = dst->registerType, - .ilNum = dst->registerNum, + .ilNum = dst->absoluteSrc == NULL ? dst->registerNum : dst->absoluteSrc->registerNum, .ilImportUsage = importUsage, .ilInterpMode = interpMode, }; @@ -1607,7 +1824,121 @@ static void emitFunc( IlcSpvId voidTypeId = ilcSpvPutVoidType(compiler->module); IlcSpvId funcTypeId = ilcSpvPutFunctionType(compiler->module, voidTypeId, 0, NULL); ilcSpvPutFunction(compiler->module, voidTypeId, id, SpvFunctionControlMaskNone, funcTypeId); - ilcSpvPutLabel(compiler->module, 0); + compiler->currentFunctionLabelId = ilcSpvPutLabel(compiler->module, 0); +} + +static IlcHullPhaseBlock emitHullForkJoinFunction( + IlcCompiler* compiler) +{ + IlcSpvId funcId = ilcSpvAllocId(compiler->module); + IlcSpvId voidTypeId = ilcSpvPutVoidType(compiler->module); + IlcSpvId funcTypeId = ilcSpvPutFunctionType(compiler->module, voidTypeId, 0, NULL); + ilcSpvPutFunction(compiler->module, voidTypeId, funcId, SpvFunctionControlMaskNone, funcTypeId); + compiler->currentFunctionLabelId = ilcSpvPutLabel(compiler->module, 0);//TODO: handle IL_REGTYPE_SHADER_INSTANCE_ID + compiler->isInFunction = true; + return (IlcHullPhaseBlock) { + .functionId = funcId, + }; +} + +static void emitHullShaderPhase( + IlcCompiler* compiler, + const Instruction* instr) +{ + switch (instr->opcode) { + case IL_OP_HS_FORK_PHASE: { + compiler->forkPhaseCount++; + compiler->forkPhases = realloc(compiler->forkPhases, compiler->forkPhaseCount * sizeof(IlcHullPhaseBlock)); + compiler->forkPhases[compiler->forkPhaseCount - 1] = emitHullForkJoinFunction(compiler); + compiler->forkPhases[compiler->forkPhaseCount - 1].type = FORK; + compiler->forkPhases[compiler->forkPhaseCount - 1].phaseId = compiler->forkPhaseCount - 1; + compiler->forkPhases[compiler->forkPhaseCount - 1].invocationCount = GET_BITS(instr->control, 0, 15); + if (compiler->forkPhases[compiler->forkPhaseCount - 1].invocationCount == 0) { + compiler->forkPhases[compiler->forkPhaseCount - 1].invocationCount = 1; + } + + char name[32]; + snprintf(name, sizeof(name), "hs_fork%u", compiler->forkPhaseCount - 1); + ilcSpvPutName(compiler->module, compiler->forkPhases[compiler->forkPhaseCount - 1].functionId, name); + compiler->isInFunction = true; + compiler->currentHullPhase.type = FORK; + compiler->currentHullPhase.phaseId = compiler->forkPhaseCount - 1; + } break; + case IL_OP_HS_JOIN_PHASE: { + compiler->joinPhaseCount++; + compiler->joinPhases = realloc(compiler->joinPhases, compiler->joinPhaseCount * sizeof(IlcHullPhaseBlock)); + compiler->joinPhases[compiler->joinPhaseCount - 1] = emitHullForkJoinFunction(compiler); + compiler->joinPhases[compiler->joinPhaseCount - 1].type = JOIN; + compiler->joinPhases[compiler->joinPhaseCount - 1].phaseId = compiler->joinPhaseCount - 1; + compiler->joinPhases[compiler->joinPhaseCount - 1].invocationCount = GET_BITS(instr->control, 0, 15); + if (compiler->joinPhases[compiler->joinPhaseCount - 1].invocationCount == 0) { + compiler->joinPhases[compiler->joinPhaseCount - 1].invocationCount = 1; + } + char name[32]; + snprintf(name, sizeof(name), "hs_join%u", compiler->forkPhaseCount - 1); + ilcSpvPutName(compiler->module, compiler->joinPhases[compiler->joinPhaseCount - 1].functionId, name); + + compiler->isInFunction = true; + compiler->currentHullPhase.type = JOIN; + compiler->currentHullPhase.phaseId = compiler->joinPhaseCount - 1; + } break; + case IL_OP_HS_CP_PHASE: { + IlcSpvId functionId = ilcSpvAllocId(compiler->module); + IlcSpvId voidTypeId = ilcSpvPutVoidType(compiler->module); + IlcSpvId funcTypeId = ilcSpvPutFunctionType(compiler->module, voidTypeId, 0, NULL); + ilcSpvPutFunction(compiler->module, voidTypeId, functionId, SpvFunctionControlMaskNone, funcTypeId); + compiler->currentFunctionLabelId = ilcSpvPutLabel(compiler->module, 0); + compiler->isInFunction = true; + compiler->controlPointPhase = (IlcHullPhaseBlock){ + .type = CONTROL_POINT, + .phaseId = 0, + .functionId = functionId, + }; + + compiler->currentHullPhase.type = CONTROL_POINT; + compiler->currentHullPhase.phaseId = 0; + + } break; + } +} + +static void emitDomainShaderInit( + IlcCompiler* compiler) +{ + IlcSpvId inputComponentTypeId = compiler->floatId; + IlcSpvId inputVecTypeId = compiler->float4Id; + unsigned inputComponentCount = 4; + //TODO: handle OCP + IlcSpvId inputArrayItemCount = compiler->inputControlPointSize; + //TODO: just use runtime arrays per vertex + // vertex count + IlcSpvId vertexCountId = ilcSpvPutConstant(compiler->module, compiler->intId, inputArrayItemCount); + // length per vertex + IlcSpvId inputLengthId = ilcSpvPutConstant(compiler->module, compiler->intId, compiler->inputArraySize); + + IlcSpvId arrayTypeId = ilcSpvPutArrayType(compiler->module, inputVecTypeId, inputLengthId); + + IlcSpvId inputTypeId = ilcSpvPutArrayType(compiler->module, arrayTypeId, vertexCountId); + IlcSpvId inputId = emitVariable(compiler, inputTypeId, SpvStorageClassInput); + IlcSpvWord locationIdx = 0;//TODO: handle patches + ilcSpvPutDecoration(compiler->module, inputId, SpvDecorationLocation, 1, &locationIdx); + const IlcRegister reg = { + .id = inputId, + .interfaceId = inputId, + .typeId = inputTypeId, + .vecTypeId = inputVecTypeId == 0 ? inputTypeId : inputVecTypeId, + .componentTypeId = inputComponentTypeId, + .componentCount = inputComponentCount, + .arrayItemCount = inputArrayItemCount, + .arrayItemOffset = 0, + .ilType = IL_REGTYPE_INPUTCP, + .ilNum = V_ARRAY_REG_ID, + .ilImportUsage = IL_IMPORTUSAGE_GENERIC, + .ilInterpMode = 0, + }; + + const char* name = "vIArray"; + addRegister(compiler, ®, name); } static void emitFloatOp( @@ -2108,6 +2439,97 @@ static void emitNumThreadPerGroup( 3, sizes); } +static void emitTessDomain( + IlcCompiler* compiler, + const Instruction* instr) +{ + enum ILTsDomain domainType = (enum ILTsDomain)GET_BITS(instr->control, 0, 15); + IlcSpvWord spvDomainType; + switch (domainType) { + case IL_TS_DOMAIN_ISOLINE: + spvDomainType = SpvExecutionModeIsolines; + break; + case IL_TS_DOMAIN_TRI: + spvDomainType = SpvExecutionModeTriangles; + break; + case IL_TS_DOMAIN_QUAD: + spvDomainType = SpvExecutionModeQuads; + break; + default: + LOGE("undefined domain type %d\n", domainType); + break; + } + ilcSpvPutExecMode(compiler->module, compiler->entryPointId, spvDomainType, + 0, NULL); +} + +static void emitTessPartition( + IlcCompiler* compiler, + const Instruction* instr) +{ + enum ILTsPartition partType = (enum ILTsPartition)GET_BITS(instr->control, 0, 15); + IlcSpvWord spvPartType; + switch (partType) { + case IL_TS_PARTITION_INTEGER: + spvPartType = SpvExecutionModeSpacingEqual; + break; + case IL_TS_PARTITION_POW2: + LOGE("unhandled partition type pow2\n"); + break; + case IL_TS_PARTITION_FRACTIONAL_ODD: + spvPartType = SpvExecutionModeSpacingFractionalOdd; + break; + case IL_TS_PARTITION_FRACTIONAL_EVEN: + spvPartType = SpvExecutionModeSpacingFractionalEven; + break; + default: + LOGE("undefined partition type %d\n", partType); + break; + } + ilcSpvPutExecMode(compiler->module, compiler->entryPointId, spvPartType, 0, NULL); +} + +static void emitTessOutputPrimitive( + IlcCompiler* compiler, + const Instruction* instr) +{ + enum ILTsOutputPrimitive primType = (enum ILTsOutputPrimitive)GET_BITS(instr->control, 0, 15); + IlcSpvWord spvPrimType; + switch (primType) { + case IL_TS_OUTPUT_TRIANGLE_CW: + spvPrimType = SpvExecutionModeVertexOrderCw; + break; + case IL_TS_OUTPUT_TRIANGLE_CCW: + spvPrimType = SpvExecutionModeVertexOrderCcw; + break; + case IL_TS_OUTPUT_POINT: + spvPrimType = SpvExecutionModePointMode; + break; + case IL_TS_OUTPUT_LINE: + LOGE("unhandled line output primitive type\n", primType); + break; + default: + LOGE("undefined primitive type %d\n", primType); + } + ilcSpvPutExecMode(compiler->module, compiler->entryPointId, spvPrimType, 0, NULL); +} + +static void emitNumInputControlPoints( + IlcCompiler* compiler, + const Instruction* instr) +{ + IlcSpvWord inputPoints = instr->extras[0]; + LOGW("unhandled number input control points %d\n", inputPoints); +} + +static void emitNumOutputControlPoints( + IlcCompiler* compiler, + const Instruction* instr) +{ + IlcSpvWord outputPoints = instr->extras[0]; + ilcSpvPutExecMode(compiler->module, compiler->entryPointId, SpvExecutionModeOutputVertices, 1, &outputPoints); +} + static IlcSpvId emitConditionCheck( IlcCompiler* compiler, IlcSpvId srcId, @@ -2124,17 +2546,17 @@ static void emitIf( const Instruction* instr) { const IlcIfElseBlock ifElseBlock = { + .labelBeginId = ilcSpvAllocId(compiler->module), .labelElseId = ilcSpvAllocId(compiler->module), .labelEndId = ilcSpvAllocId(compiler->module), .hasElseBlock = false, }; IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); - IlcSpvId labelBeginId = ilcSpvAllocId(compiler->module); IlcSpvId condId = emitConditionCheck(compiler, srcId, instr->opcode == IL_OP_IF_LOGICALNZ); ilcSpvPutSelectionMerge(compiler->module, ifElseBlock.labelEndId); - ilcSpvPutBranchConditional(compiler->module, condId, labelBeginId, ifElseBlock.labelElseId); - ilcSpvPutLabel(compiler->module, labelBeginId); + ilcSpvPutBranchConditional(compiler->module, condId, ifElseBlock.labelBeginId, ifElseBlock.labelElseId); + ilcSpvPutLabel(compiler->module, ifElseBlock.labelBeginId); const IlcControlFlowBlock block = { .type = BLOCK_IF_ELSE, @@ -2197,6 +2619,7 @@ static void emitWhile( const Instruction* instr) { const IlcLoopBlock loopBlock = { + .labelBeginId = ilcSpvAllocId(compiler->module), .labelHeaderId = ilcSpvAllocId(compiler->module), .labelContinueId = ilcSpvAllocId(compiler->module), .labelBreakId = ilcSpvAllocId(compiler->module), @@ -2207,9 +2630,8 @@ static void emitWhile( ilcSpvPutLoopMerge(compiler->module, loopBlock.labelBreakId, loopBlock.labelContinueId); - IlcSpvId labelBeginId = ilcSpvAllocId(compiler->module); - ilcSpvPutBranch(compiler->module, labelBeginId); - ilcSpvPutLabel(compiler->module, labelBeginId); + ilcSpvPutBranch(compiler->module, loopBlock.labelBeginId); + ilcSpvPutLabel(compiler->module, loopBlock.labelBeginId); const IlcControlFlowBlock block = { .type = BLOCK_LOOP, @@ -2505,6 +2927,11 @@ static void emitLoad( operandsMask |= SpvImageOperandsSampleMask; operandIds[0] = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_W, 1); operandIdCount++; + } else if (resource->ilType != IL_USAGE_PIXTEX_BUFFER && resource->ilType != IL_USAGE_PIXTEX_UNKNOWN) { + // otherwise it is LOD + operandsMask |= SpvImageOperandsLodMask; + operandIds[0] = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_W, 1); + operandIdCount++; } IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); @@ -3326,6 +3753,80 @@ static void finalizeVertexStage( } } +static void finalizeHullShader( + IlcCompiler* compiler, + IlcSpvId invocationId) +{ + if (!compiler->emitHullOutputFinalize) { + return; + } + //TODO: also handle patches + int maxVertRegIndex = -1; + unsigned maxVertexDimension = compiler->inputControlPointSize; + if (maxVertexDimension == 0) { + LOGW("unhandled input control count\n"); + maxVertexDimension = 3; + } + IlcSpvId componentTypeId = compiler->floatId; + unsigned componentCount = 4; + IlcSpvId vecTypeId = compiler->float4Id; + + for (unsigned i = 0; i < compiler->regCount; ++i) { + if (compiler->regs[i].ilType == IL_REGTYPE_INPUTCP && (int)compiler->regs[i].ilNum > maxVertRegIndex) { + if (compiler->regs[i].id == 0) { + LOGW("got empty reg %d %d %d\n", compiler->regs[i].ilNum, compiler->regs[i].ilType, compiler->regs[i].ilImportUsage); + } + maxVertRegIndex = compiler->regs[i].ilNum; + } + } + + if (maxVertRegIndex >= 0) { + IlcSpvId lengthId = ilcSpvPutConstant(compiler->module, compiler->intId, maxVertRegIndex + 1); + IlcSpvId vertexDimId = ilcSpvPutConstant(compiler->module, compiler->intId, maxVertexDimension); + IlcSpvId arrayTypeId = ilcSpvPutArrayType(compiler->module, vecTypeId, lengthId); + IlcSpvId arrayWrapperId = ilcSpvPutArrayType(compiler->module, arrayTypeId, vertexDimId); + + IlcSpvId outputId = emitVariable(compiler, arrayWrapperId, SpvStorageClassOutput); + IlcSpvId locationIdx = 0; + ilcSpvPutDecoration(compiler->module, outputId, SpvDecorationLocation, 1, &locationIdx); + + IlcSpvId inputPtrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassInput, vecTypeId); + IlcSpvId outputPtrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassOutput, vecTypeId); + for (unsigned i = 0; i < compiler->regCount; ++i) { + if (compiler->regs[i].ilType != IL_REGTYPE_INPUTCP) { + continue; + } + IlcSpvId inputIndexId = ilcSpvPutConstant(compiler->module, compiler->intId, i); + IlcSpvId invocationValueId = ilcSpvPutLoad(compiler->module, compiler->intId, invocationId); + + IlcSpvId inputPtrId = ilcSpvPutAccessChain(compiler->module, inputPtrTypeId, compiler->regs[i].id, 1, &invocationValueId); + IlcSpvId loadedInputId = ilcSpvPutLoad(compiler->module, vecTypeId, inputPtrId); + IlcSpvId indexesId[] = { + invocationValueId, inputIndexId + }; + IlcSpvId dstId = ilcSpvPutAccessChain(compiler->module, outputPtrTypeId, outputId, 2, indexesId ); + ilcSpvPutStore(compiler->module, dstId, loadedInputId); + } + + const IlcRegister reg = { + .id = outputId, + .interfaceId = outputId, + .typeId = arrayWrapperId, + .vecTypeId = vecTypeId, + .componentTypeId = componentTypeId, + .componentCount = componentCount, + .arrayItemCount = maxVertexDimension, + .arrayItemOffset = 0, + .ilType = IL_REGTYPE_OUTPUT, + .ilNum = 0,//idk what to place here (not needed :) ) + .ilImportUsage = IL_IMPORTUSAGE_GENERIC, + .ilInterpMode = 0, + }; + + addRegister(compiler, ®, "vert_out"); + } +} + static void emitImplicitInput( IlcCompiler* compiler, SpvBuiltIn spvBuiltIn, @@ -3465,6 +3966,12 @@ static void emitInstr( case IL_OP_ELSE: emitElse(compiler, instr); break; + case IL_OP_HS_CP_PHASE: + case IL_OP_HS_FORK_PHASE: + case IL_OP_HS_JOIN_PHASE: + emitHullShaderPhase(compiler, instr); + break; + case IL_OP_ENDPHASE: case IL_OP_END: case IL_OP_ENDMAIN: if (compiler->isInFunction) { @@ -3555,6 +4062,21 @@ static void emitInstr( case IL_OP_DCL_NUM_THREAD_PER_GROUP: emitNumThreadPerGroup(compiler, instr); break; + case IL_DCL_TS_DOMAIN: + emitTessDomain(compiler, instr); + break; + case IL_DCL_TS_OUTPUT_PRIMITIVE: + emitTessOutputPrimitive(compiler, instr); + break; + case IL_DCL_TS_PARTITION: + emitTessPartition(compiler, instr); + break; + case IL_DCL_NUM_ICP: + emitNumInputControlPoints(compiler, instr); + break; + case IL_DCL_NUM_OCP: + emitNumOutputControlPoints(compiler, instr); + break; case IL_OP_FENCE: emitFence(compiler, instr); break; @@ -3728,8 +4250,9 @@ IlcShader ilcCompileKernel( .inputs = NULL, .outputCount = 0, .outputLocations = NULL, + .emitHullOutputFinalize = 0, .entryPointId = ilcSpvAllocId(&module), - .stageFunctionId = (compiler.kernel->shaderType != IL_SHADER_HULL && compiler.kernel->shaderType != IL_SHADER_DOMAIN) ? ilcSpvAllocId(&module) : 0, + .stageFunctionId = compiler.kernel->shaderType != IL_SHADER_HULL ? ilcSpvAllocId(&module) : 0, .uintId = uintId, .uint4Id = ilcSpvPutVectorType(&module, uintId, 4), .intId = intId, @@ -3739,6 +4262,8 @@ IlcShader ilcCompileKernel( .boolId = boolId, .bool4Id = ilcSpvPutVectorType(&module, boolId, 4), .currentStrideIndex = 0, + .inputArraySize = 0, + .inputControlPointSize = 0, .regCount = 0, .regs = NULL, .resourceCount = 0, @@ -3747,16 +4272,34 @@ IlcShader ilcCompileKernel( .samplers = NULL, .controlFlowBlockCount = 0, .controlFlowBlocks = NULL, + .currentFunctionLabelId = 0, + .forkPhaseCount = 0, + .forkPhases = NULL, + .forkPhaseCount = 0, + .joinPhases = NULL, + .joinPhaseCount = 0, + .controlPointPhase = {}, + .currentHullPhase = {}, .isInFunction = false, .isAfterReturn = false, }; - emitImplicitInputs(&compiler); + // analyze shader + for (int i = 0; i < kernel->instrCount; i++) { + if (compiler.kernel->shaderType == IL_SHADER_DOMAIN && + kernel->instrs[i].opcode == IL_DCL_INPUT && + kernel->instrs[i].dstCount > 0 && + kernel->instrs[i].dsts[0].registerType == IL_REGTYPE_INPUTCP) { + unsigned regNum = (kernel->instrs[i].dsts[0].absoluteSrc != NULL ? kernel->instrs[i].dsts[0].absoluteSrc->registerNum : kernel->instrs[i].dsts[0].registerNum) + 1; + compiler.inputArraySize = compiler.inputArraySize < regNum ? regNum : compiler.inputArraySize; + } else if (kernel->instrs[i].opcode == IL_DCL_NUM_ICP && + kernel->instrs[i].extraCount > 0) { + compiler.inputControlPointSize = kernel->instrs[i].extras[0]; + } + } - if (compiler.kernel->shaderType == IL_SHADER_HULL || - compiler.kernel->shaderType == IL_SHADER_DOMAIN) { - LOGW("unhandled hull/domain shader type\n"); - } else { + emitImplicitInputs(&compiler); + if (compiler.kernel->shaderType != IL_SHADER_HULL) { compiler.isInFunction = true; emitFunc(&compiler, compiler.stageFunctionId); const char* stageFunctionName = "stage_main"; @@ -3764,6 +4307,9 @@ IlcShader ilcCompileKernel( case IL_SHADER_VERTEX: stageFunctionName = "vs_main"; break; + case IL_SHADER_DOMAIN: + stageFunctionName = "ds_main"; + break; case IL_SHADER_GEOMETRY: stageFunctionName = "gs_main"; break; @@ -3777,33 +4323,66 @@ IlcShader ilcCompileKernel( break; } ilcSpvPutName(compiler.module, compiler.stageFunctionId, stageFunctionName); - - for (int i = 0; i < kernel->instrCount; i++) { - emitInstr(&compiler, &kernel->instrs[i]); - } - // close stage main function if not yet ended - if (compiler.isInFunction) { - if (!compiler.isAfterReturn) { - ilcSpvPutReturn(compiler.module); - compiler.isAfterReturn = true; - } - ilcSpvPutFunctionEnd(compiler.module); - compiler.isInFunction = false; - } + } + if (compiler.kernel->shaderType == IL_SHADER_DOMAIN) { + emitDomainShaderInit(&compiler); + } + for (int i = 0; i < kernel->instrCount; i++) { + emitInstr(&compiler, &kernel->instrs[i]); } compiler.isInFunction = true; compiler.isAfterReturn = false; emitFunc(&compiler, compiler.entryPointId); - if (compiler.stageFunctionId != 0) { - IlcSpvId voidTypeId = ilcSpvPutVoidType(compiler.module); + IlcSpvId voidTypeId = ilcSpvPutVoidType(compiler.module); + if (compiler.kernel->shaderType == IL_SHADER_HULL) { + IlcSpvId invocationVarId = createInvocationId(&compiler); + + IlcSpvId scopeId = ilcSpvPutConstant(compiler.module, compiler.uintId, SpvScopeWorkgroup); + IlcSpvId memScopeId = ilcSpvPutConstant(compiler.module, compiler.uintId, SpvScopeInvocation); + IlcSpvId semanticsId = ilcSpvPutConstant(compiler.module, compiler.uintId, SpvMemorySemanticsMaskNone); + for (int i = 0; i < compiler.forkPhaseCount; ++i) { + IlcSpvId invocationId = ilcSpvPutLoad(compiler.module, compiler.intId, invocationVarId); + IlcSpvId invocationCount = ilcSpvPutConstant(compiler.module, compiler.intId, compiler.forkPhases[i].invocationCount); + IlcSpvId condId = ilcSpvPutOp2(compiler.module, SpvOpULessThan, compiler.boolId, invocationId, invocationCount); + IlcSpvId invocationBlockBegin = ilcSpvAllocId(compiler.module); + IlcSpvId invocationBlockEnd = ilcSpvAllocId(compiler.module); + ilcSpvPutSelectionMerge(compiler.module, invocationBlockEnd); + ilcSpvPutBranchConditional(compiler.module, condId, invocationBlockBegin, invocationBlockEnd); + ilcSpvPutLabel(compiler.module, invocationBlockBegin); + + ilcSpvPutFunctionCall(compiler.module, voidTypeId, compiler.forkPhases[i].functionId, 0, NULL); + + ilcSpvPutBranch(compiler.module, invocationBlockEnd); + ilcSpvPutLabel(compiler.module, invocationBlockEnd); + } + ilcSpvPutControlBarrier(compiler.module, scopeId, memScopeId, semanticsId); + for (int i = 0; i < compiler.joinPhaseCount; ++i) { + IlcSpvId invocationId = ilcSpvPutLoad(compiler.module, compiler.intId, invocationVarId); + IlcSpvId invocationCount = ilcSpvPutConstant(compiler.module, compiler.intId, compiler.joinPhases[i].invocationCount); + IlcSpvId condId = ilcSpvPutOp2(compiler.module, SpvOpULessThan, compiler.boolId, invocationId, invocationCount); + IlcSpvId invocationBlockBegin = ilcSpvAllocId(compiler.module); + IlcSpvId invocationBlockEnd = ilcSpvAllocId(compiler.module); + ilcSpvPutSelectionMerge(compiler.module, invocationBlockEnd); + ilcSpvPutBranchConditional(compiler.module, condId, invocationBlockBegin, invocationBlockEnd); + ilcSpvPutLabel(compiler.module, invocationBlockBegin); + + ilcSpvPutFunctionCall(compiler.module, voidTypeId, compiler.joinPhases[i].functionId, 0, NULL); + ilcSpvPutBranch(compiler.module, invocationBlockEnd); + ilcSpvPutLabel(compiler.module, invocationBlockEnd); + } + ilcSpvPutControlBarrier(compiler.module, scopeId, memScopeId, semanticsId); + finalizeHullShader(&compiler, invocationVarId); + } else { // call stage main ilcSpvPutFunctionCall(compiler.module, voidTypeId, compiler.stageFunctionId, 0, NULL); + // finalize + if (compiler.kernel->shaderType == IL_SHADER_DOMAIN || compiler.kernel->shaderType == IL_SHADER_VERTEX) { + finalizeVertexStage(&compiler); + } } - if (compiler.kernel->shaderType == IL_SHADER_VERTEX) { - finalizeVertexStage(&compiler); - } - // close real main function + + // close stage main function if not yet ended if (compiler.isInFunction) { if (!compiler.isAfterReturn) { ilcSpvPutReturn(compiler.module); @@ -3819,6 +4398,8 @@ IlcShader ilcCompileKernel( free(compiler.resources); free(compiler.samplers); free(compiler.controlFlowBlocks); + free(compiler.forkPhases); + free(compiler.joinPhases); ilcSpvFinish(&module); return (IlcShader) { diff --git a/src/amdilc/amdilc_dump.c b/src/amdilc/amdilc_dump.c index 06bd42c9..b673f30f 100644 --- a/src/amdilc/amdilc_dump.c +++ b/src/amdilc/amdilc_dump.c @@ -79,7 +79,7 @@ static const char* mIlRegTypeNames[IL_REGTYPE_LAST] = { "vicp", "vpc", "vDomain", - "51?", + "vocp", "52?", "vInstanceID", "54?", diff --git a/src/amdilc/amdilc_internal.h b/src/amdilc/amdilc_internal.h index b19904fc..bb126844 100644 --- a/src/amdilc/amdilc_internal.h +++ b/src/amdilc/amdilc_internal.h @@ -91,4 +91,10 @@ IlcShader ilcCompileKernel( const Kernel* kernel, const char* name); +IlcRecompiledShader ilcRecompileKernel( + const uint32_t* spirvWords, + unsigned wordCount, + const unsigned* inputPassthroughLocations, + unsigned passthroughCount); + #endif // AMDILC_INTERNAL_H_ diff --git a/src/amdilc/amdilc_passthrough_compiler.c b/src/amdilc/amdilc_passthrough_compiler.c new file mode 100644 index 00000000..bcb2b10d --- /dev/null +++ b/src/amdilc/amdilc_passthrough_compiler.c @@ -0,0 +1,278 @@ +#include "amdilc_spirv.h" +#include "amdilc_internal.h" + +typedef struct { + IlcSpvId varId; + IlcSpvWord location; +} IlcInputRegister; + +typedef struct { + IlcSpvBuffer sourceBuffer; + IlcSpvModule* module; + IlcSpvId entryPointId; + SpvExecutionModel execModel; + const char* entryPointName; + IlcSpvId* interfaces; + unsigned interfaceCount; + IlcInputRegister* existingInputRegisters; + unsigned existingInputCount; + unsigned outputPointsCount; + IlcSpvId invocationVarId; + IlcSpvId floatId; + IlcSpvId float4Id; + IlcSpvId intId; + bool isInFunction; + bool isAfterReturn; +} IlcRecompiler; + +IlcRecompiledShader ilcRecompileKernel( + const IlcSpvWord* spirvWords, + unsigned wordCount, + const unsigned* inputPassthroughLocations, + unsigned passthroughCount) +{ + IlcSpvModule module; + module.currentId = 0; + for (int i = 0; i < ID_MAX; i++) { + module.buffer[i] = (IlcSpvBuffer) { 0, NULL }; + } + + IlcRecompiler recompiler = (IlcRecompiler){ + .module = &module, + .entryPointId = 0, + .execModel = 0, + .entryPointName = NULL, + .interfaces = NULL, + .interfaceCount = 0, + .existingInputRegisters = NULL, + .existingInputCount = 0, + .outputPointsCount = 0, + .invocationVarId = 0, + .floatId = 0, + .float4Id = 0, + .intId = 0, + .isInFunction = false, + .isAfterReturn = false, + }; + //header will be inserted at finish + unsigned bufferIndex = ID_CAPABILITIES; + unsigned bufferStart = 5; + for (unsigned i = 5; i < wordCount;) { + SpvOp opCode = spirvWords[i] & SpvOpCodeMask; + unsigned instrWordCount = spirvWords[i] >> SpvWordCountShift; + unsigned newBufferIndex = getBufferIndex(opCode); + if (newBufferIndex != bufferIndex) { + if (bufferIndex != ID_ENTRY_POINTS && bufferIndex != ID_CODE) { + // skip the entry point as it will be rewritten + ilcSpvUnwrapBuffer(&module.buffer[bufferIndex], &spirvWords[bufferStart], i - bufferStart); + } + bufferIndex = newBufferIndex; + bufferStart = i; + } + bool finishProcessing = false; + switch (bufferIndex) { + case ID_TYPES: + case ID_TYPES_WITH_CONSTANTS: + if (opCode == SpvOpTypeFloat && spirvWords[i + 2] == 32) { + recompiler.floatId = spirvWords[i + 1]; + } else if (opCode == SpvOpTypeVector && spirvWords[i + 2] == recompiler.floatId && spirvWords[i + 3] == 4) { + recompiler.float4Id = spirvWords[i + 1]; + } else if (opCode == SpvOpTypeInt && spirvWords[i + 2] == 32 && spirvWords[i + 3]) { + recompiler.intId = spirvWords[i + 1]; + } + module.currentId = module.currentId < spirvWords[i + 1] ? spirvWords[i + 1] : module.currentId; + break; + case ID_ENTRY_POINTS: + if (opCode == SpvOpEntryPoint) { + recompiler.execModel = spirvWords[i + 1]; + recompiler.entryPointId = spirvWords[i + 2]; + recompiler.entryPointName = (const char*)&spirvWords[i + 3]; + unsigned nameLength = (strlen(recompiler.entryPointName) + 4) / sizeof(IlcSpvWord); + recompiler.interfaceCount = instrWordCount - nameLength - 3; + recompiler.interfaces = malloc(recompiler.interfaceCount * sizeof(IlcSpvWord)); + memcpy(recompiler.interfaces, &spirvWords[i + 3 + nameLength], recompiler.interfaceCount * sizeof(IlcSpvWord)); + } + break; + case ID_EXEC_MODES: + if (opCode == SpvOpExecutionMode && spirvWords[i + 2] == SpvExecutionModeOutputVertices) { + recompiler.outputPointsCount = spirvWords[i + 3]; + } + break; + case ID_VARIABLES: + if (opCode == SpvOpVariable && spirvWords[i + 3] == SpvStorageClassInput) { + bool foundLocation = false; + IlcSpvWord locationIdx = 0; + IlcSpvId varId = spirvWords[i + 2]; + for (unsigned j = 0; !foundLocation && j < module.buffer[ID_DECORATIONS].wordCount;) { + SpvOp decorOpCode = module.buffer[ID_DECORATIONS].words[j] & SpvOpCodeMask; + unsigned decorInstrWordCount = module.buffer[ID_DECORATIONS].words[j] >> SpvWordCountShift; + if (decorOpCode == SpvOpDecorate && module.buffer[ID_DECORATIONS].words[j + 1] == varId && + module.buffer[ID_DECORATIONS].words[j + 2] == SpvDecorationLocation) { + locationIdx = module.buffer[ID_DECORATIONS].words[j + 3]; + foundLocation = true; + } else if (decorOpCode == SpvOpDecorate && module.buffer[ID_DECORATIONS].words[j + 1] == varId && + module.buffer[ID_DECORATIONS].words[j + 2] == SpvDecorationBuiltIn && + module.buffer[ID_DECORATIONS].words[j + 3] == SpvBuiltInInvocationId) { + recompiler.invocationVarId = varId; + } + j += decorInstrWordCount; + } + if (foundLocation) { + recompiler.existingInputRegisters = realloc(recompiler.existingInputRegisters, (1 + recompiler.existingInputCount) * sizeof(IlcInputRegister)); + recompiler.existingInputRegisters[recompiler.existingInputCount] = (IlcInputRegister) { + .varId = varId, + .location = locationIdx, + }; + recompiler.existingInputCount++; + } + } + break; + case ID_CODE: + if (opCode == SpvOpFunction && spirvWords[i + 2] == recompiler.entryPointId) { + recompiler.isInFunction = true; + } else if (opCode == SpvOpStore) { + module.currentId = module.currentId < spirvWords[i + 1] ? spirvWords[i + 1] : module.currentId; + } else if (opCode == SpvOpLoad) { + module.currentId = module.currentId < spirvWords[i + 2] ? spirvWords[i + 2] : module.currentId; + } + if (opCode == SpvOpReturn && recompiler.isInFunction) { + finishProcessing = true; + } else { + // copy the code over + ilcSpvUnwrapBuffer(&module.buffer[ID_CODE], &spirvWords[i], instrWordCount); + } + break; + } + if (finishProcessing) { + break; + } + i += instrWordCount; + } + // HACK: just add offset to avoid collision + module.currentId += 65536; + //TODO: handle outputs checking + IlcSpvId float4InputPtrTypeId = ilcSpvPutPointerType(&module, SpvStorageClassInput, recompiler.float4Id); + IlcSpvId float4OutputPtrTypeId = ilcSpvPutPointerType(&module, SpvStorageClassOutput, recompiler.float4Id); + if (recompiler.execModel == SpvExecutionModelTessellationControl) { + if (recompiler.invocationVarId == 0) { + IlcSpvId intPtrInputId = ilcSpvPutPointerType(&module, SpvStorageClassInput, recompiler.intId); + recompiler.invocationVarId = ilcSpvPutVariable(&module, intPtrInputId, SpvStorageClassInput); + IlcSpvWord builtInType = SpvBuiltInInvocationId; + ilcSpvPutDecoration(&module, recompiler.invocationVarId, SpvDecorationBuiltIn, 1, &builtInType); + ilcSpvPutName(&module, recompiler.invocationVarId, "invocationId"); + recompiler.interfaces = realloc(recompiler.interfaces, (recompiler.interfaceCount + 1) * sizeof(IlcSpvId)); + recompiler.interfaces[recompiler.interfaceCount] = recompiler.invocationVarId; + recompiler.interfaceCount++; + } + int maxArraySize = -1; + for (unsigned i = 0; i < passthroughCount; ++i) { + maxArraySize = maxArraySize < (int)inputPassthroughLocations[i] ? inputPassthroughLocations[i] : maxArraySize; + } + for (unsigned i = 0; i < recompiler.existingInputCount; ++i) { + maxArraySize = maxArraySize < (int)recompiler.existingInputRegisters[i].location ? recompiler.existingInputRegisters[i].location : maxArraySize; + } + maxArraySize++; + if (maxArraySize <= 0) { + goto finish; + } + //vertex count + if (recompiler.outputPointsCount == 0) { + LOGW("didn't handle output control point count\n"); + recompiler.outputPointsCount = 3; + } + IlcSpvId vertexLengthId = ilcSpvPutConstant(&module, recompiler.intId, recompiler.outputPointsCount); + + //TODO: check input/output vertex count + IlcSpvId inputArrTypeId = ilcSpvPutArrayType(&module, recompiler.float4Id, vertexLengthId); + IlcSpvId inputVarTypeId = ilcSpvPutPointerType(&module, SpvStorageClassInput, inputArrTypeId); + + IlcSpvId outputLengthId = ilcSpvPutConstant(&module, recompiler.intId, maxArraySize); + // array of registers per vertex + IlcSpvId outputArrTypeId = ilcSpvPutArrayType(&module, recompiler.float4Id, outputLengthId); + // array of registers per primitive + IlcSpvId outputVArrTypeId = ilcSpvPutArrayType(&module, outputArrTypeId, vertexLengthId); + IlcSpvId outputVArrPtrTypeId = ilcSpvPutPointerType(&module, SpvStorageClassOutput, outputVArrTypeId); + IlcSpvId outputVArrId = ilcSpvPutVariable(&module, outputVArrPtrTypeId, SpvStorageClassOutput); + ilcSpvPutName(&module, outputVArrId, "vertex_out"); + + recompiler.interfaces = realloc(recompiler.interfaces, (recompiler.interfaceCount + 1) * sizeof(IlcSpvId)); + recompiler.interfaces[recompiler.interfaceCount] = outputVArrId; + recompiler.interfaceCount++; + IlcSpvWord outputLocationIdx = 0; + ilcSpvPutDecoration(&module, outputVArrId, SpvDecorationLocation, 1, &outputLocationIdx); + for (unsigned i = 0; i < passthroughCount; ++i) { + bool includesLocation = false; + IlcSpvId inputVariableId = 0; + for (unsigned j = 0; j < recompiler.existingInputCount; ++j) { + if (recompiler.existingInputRegisters[j].location == inputPassthroughLocations[i]) { + includesLocation = true; + inputVariableId = recompiler.existingInputRegisters[j].varId; + } + } + if (!includesLocation) { + char name[32]; + snprintf(name, sizeof(name), "vicp_patched%u", inputPassthroughLocations[i]); + inputVariableId = ilcSpvPutVariable(&module, inputVarTypeId, SpvStorageClassInput); + ilcSpvPutName(&module, inputVariableId, name); + ilcSpvPutDecoration(&module, inputVariableId, SpvDecorationLocation, 1, &inputPassthroughLocations[i]); + + recompiler.interfaces = realloc(recompiler.interfaces, (recompiler.interfaceCount + 1) * sizeof(IlcSpvId)); + recompiler.interfaces[recompiler.interfaceCount] = inputVariableId; + recompiler.interfaceCount++; + } + IlcSpvId inputIndexId = ilcSpvPutConstant(&module, recompiler.intId, inputPassthroughLocations[i]); + IlcSpvId invocationValueId = ilcSpvPutLoad(&module, recompiler.intId, recompiler.invocationVarId); + + IlcSpvId inputPtrId = ilcSpvPutAccessChain(&module, float4InputPtrTypeId, inputVariableId, 1, &invocationValueId); + IlcSpvId loadedInputId = ilcSpvPutLoad(&module, recompiler.float4Id, inputPtrId); + IlcSpvId indexesId[] = { + invocationValueId, + inputIndexId, + }; + IlcSpvId dstId = ilcSpvPutAccessChain(&module, float4OutputPtrTypeId, outputVArrId, 2, indexesId ); + ilcSpvPutStore(&module, dstId, loadedInputId); + } + } else { + for (unsigned i = 0; i < passthroughCount; ++i) { + bool includesLocation = false; + IlcSpvId inputVariableId = 0; + for (unsigned j = 0; j < recompiler.existingInputCount; ++j) { + if (recompiler.existingInputRegisters[j].location == inputPassthroughLocations[i]) { + includesLocation = true; + inputVariableId = recompiler.existingInputRegisters[j].varId; + } + } + if (includesLocation) { + // no need to passthrough + continue; + } + inputVariableId = ilcSpvPutVariable(&module, float4InputPtrTypeId, SpvStorageClassInput); + IlcSpvId outputVariableId = ilcSpvPutVariable(&module, float4OutputPtrTypeId, SpvStorageClassOutput); + ilcSpvPutDecoration(&module, inputVariableId, SpvDecorationLocation, 1, &inputPassthroughLocations[i]); + ilcSpvPutDecoration(&module, outputVariableId, SpvDecorationLocation, 1, &inputPassthroughLocations[i]); + + IlcSpvId valueId = ilcSpvPutLoad(&module, recompiler.float4Id, inputVariableId); + ilcSpvPutStore(&module, outputVariableId, valueId); + + recompiler.interfaces = realloc(recompiler.interfaces, (recompiler.interfaceCount + 2) * sizeof(IlcSpvId)); + recompiler.interfaces[recompiler.interfaceCount] = outputVariableId; + recompiler.interfaces[recompiler.interfaceCount + 1] = inputVariableId; + recompiler.interfaceCount += 2; + } + } +finish: + ilcSpvPutReturn(&module); + ilcSpvPutFunctionEnd(&module); + recompiler.isInFunction = false; + ilcSpvPutEntryPoint(&module, recompiler.entryPointId, recompiler.execModel, recompiler.entryPointName, + recompiler.interfaceCount, recompiler.interfaces); + //inject some code + ilcSpvFinish(&module); + free(recompiler.existingInputRegisters); + free(recompiler.interfaces); + + return (IlcRecompiledShader) { + .codeSize = sizeof(IlcSpvWord) * module.buffer[ID_MAIN].wordCount, + .code = module.buffer[ID_MAIN].words, + }; +} diff --git a/src/amdilc/amdilc_spirv.c b/src/amdilc/amdilc_spirv.c index 61ce7437..495083b1 100644 --- a/src/amdilc/amdilc_spirv.c +++ b/src/amdilc/amdilc_spirv.c @@ -1168,3 +1168,44 @@ void ilcSpvPutDemoteToHelperInvocation( putInstr(buffer, SpvOpDemoteToHelperInvocationEXT, 1); } + +void ilcSpvUnwrapBuffer( + IlcSpvBuffer* buffer, + const IlcSpvWord* src, + unsigned wordCount) +{ + IlcSpvBuffer srcBuffer = (IlcSpvBuffer) { + .words = src, + .wordCount = wordCount, + }; + putBuffer(buffer, &srcBuffer); +} + +unsigned getBufferIndex(SpvOp opCode) { + if (opCode == SpvOpCapability) { + return ID_CAPABILITIES; + } else if (opCode == SpvOpExtension) { + return ID_EXTENSIONS; + } else if (opCode == SpvOpExtInstImport) { + return ID_EXT_INST_IMPORTS; + } else if (opCode == SpvOpMemoryModel) { + return ID_MEMORY_MODEL; + } else if (opCode == SpvOpEntryPoint) { + return ID_ENTRY_POINTS; + } else if (opCode == SpvOpExecutionMode) { + return ID_EXEC_MODES; + } else if (opCode == SpvOpSource || opCode == SpvOpName || opCode == SpvOpString) { + return ID_DEBUG; + } else if (opCode == SpvOpDecorate || opCode == SpvOpMemberDecorate) { + return ID_DECORATIONS; + } else if (opCode == SpvOpTypeStruct || opCode == SpvOpTypeArray || opCode == SpvOpTypePointer) { + return ID_TYPES_WITH_CONSTANTS; + } else if ((opCode >= SpvOpTypeVoid && opCode <= SpvOpTypeForwardPointer) || opCode == SpvOpTypeNamedBarrier || opCode == SpvOpTypePipeStorage) { + return ID_TYPES; + } else if ((opCode >= SpvOpConstantTrue && opCode <= SpvOpSpecConstantOp) || opCode == SpvOpUndef) { + return ID_CONSTANTS; + } else if (opCode == SpvOpVariable) { + return ID_VARIABLES; + } + return ID_CODE; +} diff --git a/src/amdilc/amdilc_spirv.h b/src/amdilc/amdilc_spirv.h index 3aefb3d9..e38d506f 100644 --- a/src/amdilc/amdilc_spirv.h +++ b/src/amdilc/amdilc_spirv.h @@ -412,4 +412,10 @@ IlcSpvId ilcSpvPutGLSLOp( void ilcSpvPutDemoteToHelperInvocation( IlcSpvModule* module); +void ilcSpvUnwrapBuffer( + IlcSpvBuffer* buffer, + const IlcSpvWord* src, + unsigned wordCount); + +unsigned getBufferIndex(SpvOp opCode); #endif // AMDILC_SPIRV_H_ diff --git a/src/amdilc/meson.build b/src/amdilc/meson.build index 1a35cb93..e08243ab 100644 --- a/src/amdilc/meson.build +++ b/src/amdilc/meson.build @@ -1,6 +1,7 @@ amdilc_src = [ 'amdilc.c', 'amdilc_compiler.c', + 'amdilc_passthrough_compiler.c', 'amdilc_decoder.c', 'amdilc_dump.c', 'amdilc_rect_gs_compiler.c', diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 79819479..5b7e6e46 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -312,6 +312,8 @@ typedef struct _GrPipeline { unsigned dynamicOffsetCount; unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS]; UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS]; + VkShaderModule tessellationModule; + VkShaderModule rectangleShaderModule; } GrPipeline; typedef struct _GrQueueSemaphore { @@ -339,6 +341,8 @@ typedef struct _GrShader { GrObject grObj; unsigned refCount; VkShaderModule shaderModule; + uint32_t* code; + unsigned codeSize; unsigned bindingCount; IlcBinding* bindings; unsigned inputCount; diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index 3582b65b..765aad6c 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -97,6 +97,12 @@ GR_RESULT GR_STDCALL grDestroyObject( } free(grPipeline->updateTemplateSlots[i]); } + if (grPipeline->tessellationModule != VK_NULL_HANDLE) { + VKD.vkDestroyShaderModule(grDevice->device, grPipeline->tessellationModule, NULL); + } + if (grPipeline->rectangleShaderModule != VK_NULL_HANDLE) { + VKD.vkDestroyShaderModule(grDevice->device, grPipeline->rectangleShaderModule, NULL); + } } break; case GR_OBJ_TYPE_QUEUE_SEMAPHORE: { GrQueueSemaphore* grQueueSemaphore = (GrQueueSemaphore*)grObject; diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index c40775e5..c280e7d1 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -701,13 +701,14 @@ GR_RESULT GR_STDCALL grCreateShader( return getGrResult(res); } - free(ilcShader.code); - + // TODO: check if the shader needs to be patched in order to free the code memory here GrShader* grShader = malloc(sizeof(GrShader)); *grShader = (GrShader) { .grObj = { GR_OBJ_TYPE_SHADER, grDevice }, .refCount = 1, .shaderModule = vkShaderModule, + .code = ilcShader.code, + .codeSize = ilcShader.codeSize, .bindingCount = ilcShader.bindingCount, .bindings = ilcShader.bindings, .inputCount = ilcShader.inputCount, @@ -732,6 +733,7 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; VkShaderModule rectangleShaderModule = VK_NULL_HANDLE; + VkShaderModule tessellationControlShader = VK_NULL_HANDLE; unsigned dynamicOffsetCount = 0; unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; @@ -768,16 +770,36 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( } GrShader* grShader = (GrShader*)stage->shader->shader; - - grShaderRefs[i] = grShader; - grShader->refCount++; - + VkShaderModule shaderModule = grShader->shaderModule; + + if (stage->flags == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT && stages[0].shader->shader != GR_NULL_HANDLE) { + GrShader* grVertexShader = (GrShader*)stages[0].shader->shader; + IlcRecompiledShader recompiledShader = ilcRecompileShader(grShader->code, grShader->codeSize, + grVertexShader->outputLocations, grVertexShader->outputCount); + const VkShaderModuleCreateInfo tessShaderCreateInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .codeSize = recompiledShader.codeSize, + .pCode = recompiledShader.code, + }; + VkResult vkRes = VKD.vkCreateShaderModule(grDevice->device, &tessShaderCreateInfo, NULL, &tessellationControlShader); + free(recompiledShader.code); + if (vkRes != VK_SUCCESS) { + res = getGrResult(vkRes); + goto bail; + } + shaderModule = tessellationControlShader; + } else { + grShaderRefs[i] = grShader; + grShader->refCount++; + } shaderStageCreateInfo[stageCount] = (VkPipelineShaderStageCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = NULL, .flags = 0, .stage = stage->flags, - .module = grShader->shaderModule, + .module = shaderModule, .pName = "main", .pSpecializationInfo = NULL, }; @@ -906,6 +928,8 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( .dynamicOffsetCount = dynamicOffsetCount, .updateTemplateSlotCounts = { 0 }, // Initialized below .updateTemplateSlots = { NULL }, // Initialized below + .tessellationModule = tessellationControlShader, + .rectangleShaderModule = rectangleShaderModule, }; memcpy(grPipeline->grShaderRefs, grShaderRefs, sizeof(grPipeline->grShaderRefs)); @@ -919,6 +943,9 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( bail: VKD.vkDestroyDescriptorSetLayout(grDevice->device, descriptorSetLayout, NULL); + if (tessellationControlShader != VK_NULL_HANDLE) { + VKD.vkDestroyShaderModule(grDevice->device, tessellationControlShader, NULL); + } VKD.vkDestroyPipelineLayout(grDevice->device, pipelineLayout, NULL); VKD.vkDestroyShaderModule(grDevice->device, rectangleShaderModule, NULL); return res;