graphics.go (30854B)
1 // Copyright 2018 The Ebiten Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build darwin 16 17 package metal 18 19 import ( 20 "fmt" 21 "strings" 22 "unsafe" 23 24 "github.com/hajimehoshi/ebiten/v2/internal/affine" 25 "github.com/hajimehoshi/ebiten/v2/internal/driver" 26 "github.com/hajimehoshi/ebiten/v2/internal/graphics" 27 "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver/metal/ca" 28 "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver/metal/mtl" 29 "github.com/hajimehoshi/ebiten/v2/internal/shaderir" 30 "github.com/hajimehoshi/ebiten/v2/internal/thread" 31 ) 32 33 // #cgo CFLAGS: -x objective-c 34 // #cgo !ios CFLAGS: -mmacosx-version-min=10.12 35 // #cgo LDFLAGS: -framework Foundation 36 // 37 // #import <Foundation/Foundation.h> 38 // 39 // static void* allocAutoreleasePool() { 40 // return [[NSAutoreleasePool alloc] init]; 41 // } 42 // 43 // static void releaseAutoreleasePool(void* pool) { 44 // [(NSAutoreleasePool*)pool release]; 45 // } 46 import "C" 47 48 const source = `#include <metal_stdlib> 49 50 #define FILTER_NEAREST {{.FilterNearest}} 51 #define FILTER_LINEAR {{.FilterLinear}} 52 #define FILTER_SCREEN {{.FilterScreen}} 53 54 #define ADDRESS_CLAMP_TO_ZERO {{.AddressClampToZero}} 55 #define ADDRESS_REPEAT {{.AddressRepeat}} 56 #define ADDRESS_UNSAFE {{.AddressUnsafe}} 57 58 using namespace metal; 59 60 struct VertexIn { 61 packed_float2 position; 62 packed_float2 tex; 63 packed_float4 color; 64 }; 65 66 struct VertexOut { 67 float4 position [[position]]; 68 float2 tex; 69 float4 color; 70 }; 71 72 vertex VertexOut VertexShader( 73 uint vid [[vertex_id]], 74 const device VertexIn* vertices [[buffer(0)]], 75 constant float2& viewport_size [[buffer(1)]] 76 ) { 77 float4x4 projectionMatrix = float4x4( 78 float4(2.0 / viewport_size.x, 0, 0, 0), 79 float4(0, 2.0 / viewport_size.y, 0, 0), 80 float4(0, 0, 1, 0), 81 float4(-1, -1, 0, 1) 82 ); 83 84 VertexIn in = vertices[vid]; 85 VertexOut out = { 86 .position = projectionMatrix * float4(in.position, 0, 1), 87 .tex = in.tex, 88 .color = in.color, 89 }; 90 91 return out; 92 } 93 94 float FloorMod(float x, float y) { 95 if (x < 0.0) { 96 return y - (-x - y * floor(-x/y)); 97 } 98 return x - y * floor(x/y); 99 } 100 101 template<uint8_t address> 102 float2 AdjustTexelByAddress(float2 p, float4 source_region); 103 104 template<> 105 inline float2 AdjustTexelByAddress<ADDRESS_CLAMP_TO_ZERO>(float2 p, float4 source_region) { 106 return p; 107 } 108 109 template<> 110 inline float2 AdjustTexelByAddress<ADDRESS_REPEAT>(float2 p, float4 source_region) { 111 float2 o = float2(source_region[0], source_region[1]); 112 float2 size = float2(source_region[2] - source_region[0], source_region[3] - source_region[1]); 113 return float2(FloorMod((p.x - o.x), size.x) + o.x, FloorMod((p.y - o.y), size.y) + o.y); 114 } 115 116 template<uint8_t filter, uint8_t address> 117 struct ColorFromTexel; 118 119 constexpr sampler texture_sampler{filter::nearest}; 120 121 template<> 122 struct ColorFromTexel<FILTER_NEAREST, ADDRESS_UNSAFE> { 123 inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, float scale, constant float4& source_region) { 124 float2 p = v.tex; 125 return texture.sample(texture_sampler, p); 126 } 127 }; 128 129 template<uint8_t address> 130 struct ColorFromTexel<FILTER_NEAREST, address> { 131 inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, float scale, constant float4& source_region) { 132 float2 p = AdjustTexelByAddress<address>(v.tex, source_region); 133 if (source_region[0] <= p.x && 134 source_region[1] <= p.y && 135 p.x < source_region[2] && 136 p.y < source_region[3]) { 137 return texture.sample(texture_sampler, p); 138 } 139 return 0.0; 140 } 141 }; 142 143 template<> 144 struct ColorFromTexel<FILTER_LINEAR, ADDRESS_UNSAFE> { 145 inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, float scale, constant float4& source_region) { 146 const float2 texel_size = 1 / source_size; 147 148 // Shift 1/512 [texel] to avoid the tie-breaking issue. 149 // As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases. 150 float2 p0 = v.tex - texel_size / 2.0 + (texel_size / 512.0); 151 float2 p1 = v.tex + texel_size / 2.0 + (texel_size / 512.0); 152 153 float4 c0 = texture.sample(texture_sampler, p0); 154 float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y)); 155 float4 c2 = texture.sample(texture_sampler, float2(p0.x, p1.y)); 156 float4 c3 = texture.sample(texture_sampler, p1); 157 158 float2 rate = fract(p0 * source_size); 159 return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); 160 } 161 }; 162 163 template<uint8_t address> 164 struct ColorFromTexel<FILTER_LINEAR, address> { 165 inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, float scale, constant float4& source_region) { 166 const float2 texel_size = 1 / source_size; 167 168 // Shift 1/512 [texel] to avoid the tie-breaking issue. 169 // As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases. 170 float2 p0 = v.tex - texel_size / 2.0 + (texel_size / 512.0); 171 float2 p1 = v.tex + texel_size / 2.0 + (texel_size / 512.0); 172 p0 = AdjustTexelByAddress<address>(p0, source_region); 173 p1 = AdjustTexelByAddress<address>(p1, source_region); 174 175 float4 c0 = texture.sample(texture_sampler, p0); 176 float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y)); 177 float4 c2 = texture.sample(texture_sampler, float2(p0.x, p1.y)); 178 float4 c3 = texture.sample(texture_sampler, p1); 179 180 if (p0.x < source_region[0]) { 181 c0 = 0; 182 c2 = 0; 183 } 184 if (p0.y < source_region[1]) { 185 c0 = 0; 186 c1 = 0; 187 } 188 if (source_region[2] <= p1.x) { 189 c1 = 0; 190 c3 = 0; 191 } 192 if (source_region[3] <= p1.y) { 193 c2 = 0; 194 c3 = 0; 195 } 196 197 float2 rate = fract(p0 * source_size); 198 return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); 199 } 200 }; 201 202 template<uint8_t address> 203 struct ColorFromTexel<FILTER_SCREEN, address> { 204 inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, float scale, constant float4& source_region) { 205 const float2 texel_size = 1 / source_size; 206 207 float2 p0 = v.tex - texel_size / 2.0 / scale + (texel_size / 512.0); 208 float2 p1 = v.tex + texel_size / 2.0 / scale + (texel_size / 512.0); 209 210 float4 c0 = texture.sample(texture_sampler, p0); 211 float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y)); 212 float4 c2 = texture.sample(texture_sampler, float2(p0.x, p1.y)); 213 float4 c3 = texture.sample(texture_sampler, p1); 214 215 float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale; 216 float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0); 217 return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); 218 } 219 }; 220 221 template<bool useColorM, uint8_t filter, uint8_t address> 222 struct FragmentShaderImpl { 223 inline float4 Do( 224 VertexOut v, 225 texture2d<float> texture, 226 constant float2& source_size, 227 constant float4x4& color_matrix_body, 228 constant float4& color_matrix_translation, 229 constant float& scale, 230 constant float4& source_region) { 231 float4 c = ColorFromTexel<filter, address>().Do(v, texture, source_size, scale, source_region); 232 if (useColorM) { 233 c.rgb /= c.a + (1.0 - sign(c.a)); 234 c = (color_matrix_body * c) + color_matrix_translation; 235 c *= v.color; 236 c.rgb *= c.a; 237 } else { 238 float4 s = v.color; 239 c *= float4(s.r, s.g, s.b, 1.0) * s.a; 240 } 241 c = min(c, c.a); 242 return c; 243 } 244 }; 245 246 template<bool useColorM, uint8_t address> 247 struct FragmentShaderImpl<useColorM, FILTER_SCREEN, address> { 248 inline float4 Do( 249 VertexOut v, 250 texture2d<float> texture, 251 constant float2& source_size, 252 constant float4x4& color_matrix_body, 253 constant float4& color_matrix_translation, 254 constant float& scale, 255 constant float4& source_region) { 256 return ColorFromTexel<FILTER_SCREEN, address>().Do(v, texture, source_size, scale, source_region); 257 } 258 }; 259 260 // Define Foo and FooCp macros to force macro replacement. 261 // See "6.10.3.1 Argument substitution" in ISO/IEC 9899. 262 263 #define FragmentShaderFunc(useColorM, filter, address) \ 264 FragmentShaderFuncCp(useColorM, filter, address) 265 266 #define FragmentShaderFuncCp(useColorM, filter, address) \ 267 fragment float4 FragmentShader_##useColorM##_##filter##_##address( \ 268 VertexOut v [[stage_in]], \ 269 texture2d<float> texture [[texture(0)]], \ 270 constant float2& source_size [[buffer(2)]], \ 271 constant float4x4& color_matrix_body [[buffer(3)]], \ 272 constant float4& color_matrix_translation [[buffer(4)]], \ 273 constant float& scale [[buffer(5)]], \ 274 constant float4& source_region [[buffer(6)]]) { \ 275 return FragmentShaderImpl<useColorM, filter, address>().Do( \ 276 v, texture, source_size, color_matrix_body, color_matrix_translation, scale, source_region); \ 277 } 278 279 FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO) 280 FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO) 281 FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_REPEAT) 282 FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_REPEAT) 283 FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_UNSAFE) 284 FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_UNSAFE) 285 FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO) 286 FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO) 287 FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_REPEAT) 288 FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT) 289 FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_UNSAFE) 290 FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_UNSAFE) 291 292 FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_UNSAFE) 293 294 #undef FragmentShaderFuncName 295 ` 296 297 type rpsKey struct { 298 useColorM bool 299 filter driver.Filter 300 address driver.Address 301 compositeMode driver.CompositeMode 302 screen bool 303 } 304 305 type Graphics struct { 306 view view 307 308 screenRPS mtl.RenderPipelineState 309 rpss map[rpsKey]mtl.RenderPipelineState 310 cq mtl.CommandQueue 311 cb mtl.CommandBuffer 312 313 screenDrawable ca.MetalDrawable 314 315 vb mtl.Buffer 316 ib mtl.Buffer 317 318 images map[driver.ImageID]*Image 319 nextImageID driver.ImageID 320 321 shaders map[driver.ShaderID]*Shader 322 nextShaderID driver.ShaderID 323 324 src *Image 325 dst *Image 326 327 transparent bool 328 maxImageSize int 329 tmpTexture mtl.Texture 330 331 t *thread.Thread 332 333 pool unsafe.Pointer 334 } 335 336 var theGraphics Graphics 337 338 func Get() *Graphics { 339 return &theGraphics 340 } 341 342 func (g *Graphics) SetThread(thread *thread.Thread) { 343 g.t = thread 344 } 345 346 func (g *Graphics) Begin() { 347 g.t.Call(func() error { 348 // NSAutoreleasePool is required to release drawable correctly (#847). 349 // https://developer.apple.com/library/archive/documentation/3DDrawing/Conceptual/MTLBestPracticesGuide/Drawables.html 350 g.pool = C.allocAutoreleasePool() 351 return nil 352 }) 353 } 354 355 func (g *Graphics) End() { 356 g.flushIfNeeded(false, true) 357 g.t.Call(func() error { 358 g.screenDrawable = ca.MetalDrawable{} 359 C.releaseAutoreleasePool(g.pool) 360 g.pool = nil 361 return nil 362 }) 363 } 364 365 func (g *Graphics) SetWindow(window uintptr) { 366 g.t.Call(func() error { 367 // Note that [NSApp mainWindow] returns nil when the window is borderless. 368 // Then the window is needed to be given explicitly. 369 g.view.setWindow(window) 370 return nil 371 }) 372 } 373 374 func (g *Graphics) SetUIView(uiview uintptr) { 375 // TODO: Should this be called on the main thread? 376 g.view.setUIView(uiview) 377 } 378 379 func (g *Graphics) SetVertices(vertices []float32, indices []uint16) { 380 g.t.Call(func() error { 381 if g.vb != (mtl.Buffer{}) { 382 g.vb.Release() 383 } 384 if g.ib != (mtl.Buffer{}) { 385 g.ib.Release() 386 } 387 g.vb = g.view.getMTLDevice().MakeBufferWithBytes(unsafe.Pointer(&vertices[0]), unsafe.Sizeof(vertices[0])*uintptr(len(vertices)), resourceStorageMode) 388 g.ib = g.view.getMTLDevice().MakeBufferWithBytes(unsafe.Pointer(&indices[0]), unsafe.Sizeof(indices[0])*uintptr(len(indices)), resourceStorageMode) 389 return nil 390 }) 391 } 392 393 func (g *Graphics) flushIfNeeded(wait bool, present bool) { 394 g.t.Call(func() error { 395 if g.cb == (mtl.CommandBuffer{}) { 396 return nil 397 } 398 399 if present && g.screenDrawable != (ca.MetalDrawable{}) { 400 g.cb.PresentDrawable(g.screenDrawable) 401 } 402 g.cb.Commit() 403 if wait { 404 g.cb.WaitUntilCompleted() 405 } 406 407 g.cb = mtl.CommandBuffer{} 408 409 return nil 410 }) 411 } 412 413 func (g *Graphics) checkSize(width, height int) { 414 if width < 1 { 415 panic(fmt.Sprintf("metal: width (%d) must be equal or more than %d", width, 1)) 416 } 417 if height < 1 { 418 panic(fmt.Sprintf("metal: height (%d) must be equal or more than %d", height, 1)) 419 } 420 m := g.MaxImageSize() 421 if width > m { 422 panic(fmt.Sprintf("metal: width (%d) must be less than or equal to %d", width, m)) 423 } 424 if height > m { 425 panic(fmt.Sprintf("metal: height (%d) must be less than or equal to %d", height, m)) 426 } 427 } 428 429 func (g *Graphics) genNextImageID() driver.ImageID { 430 id := g.nextImageID 431 g.nextImageID++ 432 return id 433 } 434 435 func (g *Graphics) InvalidImageID() driver.ImageID { 436 return -1 437 } 438 439 func (g *Graphics) genNextShaderID() driver.ShaderID { 440 id := g.nextShaderID 441 g.nextShaderID++ 442 return id 443 } 444 445 func (g *Graphics) NewImage(width, height int) (driver.Image, error) { 446 g.checkSize(width, height) 447 td := mtl.TextureDescriptor{ 448 TextureType: mtl.TextureType2D, 449 PixelFormat: mtl.PixelFormatRGBA8UNorm, 450 Width: graphics.InternalImageSize(width), 451 Height: graphics.InternalImageSize(height), 452 StorageMode: storageMode, 453 Usage: mtl.TextureUsageShaderRead | mtl.TextureUsageRenderTarget, 454 } 455 var t mtl.Texture 456 g.t.Call(func() error { 457 t = g.view.getMTLDevice().MakeTexture(td) 458 return nil 459 }) 460 i := &Image{ 461 id: g.genNextImageID(), 462 graphics: g, 463 width: width, 464 height: height, 465 texture: t, 466 } 467 g.addImage(i) 468 return i, nil 469 } 470 471 func (g *Graphics) NewScreenFramebufferImage(width, height int) (driver.Image, error) { 472 g.t.Call(func() error { 473 g.view.setDrawableSize(width, height) 474 return nil 475 }) 476 i := &Image{ 477 id: g.genNextImageID(), 478 graphics: g, 479 width: width, 480 height: height, 481 screen: true, 482 } 483 g.addImage(i) 484 return i, nil 485 } 486 487 func (g *Graphics) addImage(img *Image) { 488 if g.images == nil { 489 g.images = map[driver.ImageID]*Image{} 490 } 491 if _, ok := g.images[img.id]; ok { 492 panic(fmt.Sprintf("opengl: image ID %d was already registered", img.id)) 493 } 494 g.images[img.id] = img 495 } 496 497 func (g *Graphics) removeImage(img *Image) { 498 delete(g.images, img.id) 499 } 500 501 func (g *Graphics) SetTransparent(transparent bool) { 502 g.transparent = transparent 503 } 504 505 func operationToBlendFactor(c driver.Operation) mtl.BlendFactor { 506 switch c { 507 case driver.Zero: 508 return mtl.BlendFactorZero 509 case driver.One: 510 return mtl.BlendFactorOne 511 case driver.SrcAlpha: 512 return mtl.BlendFactorSourceAlpha 513 case driver.DstAlpha: 514 return mtl.BlendFactorDestinationAlpha 515 case driver.OneMinusSrcAlpha: 516 return mtl.BlendFactorOneMinusSourceAlpha 517 case driver.OneMinusDstAlpha: 518 return mtl.BlendFactorOneMinusDestinationAlpha 519 case driver.DstColor: 520 return mtl.BlendFactorDestinationColor 521 default: 522 panic(fmt.Sprintf("metal: invalid operation: %d", c)) 523 } 524 } 525 526 func (g *Graphics) Reset() error { 527 if err := g.t.Call(func() error { 528 if g.cq != (mtl.CommandQueue{}) { 529 g.cq.Release() 530 g.cq = mtl.CommandQueue{} 531 } 532 533 // TODO: Release existing rpss 534 if g.rpss == nil { 535 g.rpss = map[rpsKey]mtl.RenderPipelineState{} 536 } 537 538 if err := g.view.reset(); err != nil { 539 return err 540 } 541 if g.transparent { 542 g.view.ml.SetOpaque(false) 543 } 544 545 replaces := map[string]string{ 546 "{{.FilterNearest}}": fmt.Sprintf("%d", driver.FilterNearest), 547 "{{.FilterLinear}}": fmt.Sprintf("%d", driver.FilterLinear), 548 "{{.FilterScreen}}": fmt.Sprintf("%d", driver.FilterScreen), 549 "{{.AddressClampToZero}}": fmt.Sprintf("%d", driver.AddressClampToZero), 550 "{{.AddressRepeat}}": fmt.Sprintf("%d", driver.AddressRepeat), 551 "{{.AddressUnsafe}}": fmt.Sprintf("%d", driver.AddressUnsafe), 552 } 553 src := source 554 for k, v := range replaces { 555 src = strings.Replace(src, k, v, -1) 556 } 557 558 lib, err := g.view.getMTLDevice().MakeLibrary(src, mtl.CompileOptions{}) 559 if err != nil { 560 return err 561 } 562 vs, err := lib.MakeFunction("VertexShader") 563 if err != nil { 564 return err 565 } 566 fs, err := lib.MakeFunction( 567 fmt.Sprintf("FragmentShader_%d_%d_%d", 0, driver.FilterScreen, driver.AddressUnsafe)) 568 if err != nil { 569 return err 570 } 571 rpld := mtl.RenderPipelineDescriptor{ 572 VertexFunction: vs, 573 FragmentFunction: fs, 574 } 575 rpld.ColorAttachments[0].PixelFormat = g.view.colorPixelFormat() 576 rpld.ColorAttachments[0].BlendingEnabled = true 577 rpld.ColorAttachments[0].DestinationAlphaBlendFactor = mtl.BlendFactorZero 578 rpld.ColorAttachments[0].DestinationRGBBlendFactor = mtl.BlendFactorZero 579 rpld.ColorAttachments[0].SourceAlphaBlendFactor = mtl.BlendFactorOne 580 rpld.ColorAttachments[0].SourceRGBBlendFactor = mtl.BlendFactorOne 581 rps, err := g.view.getMTLDevice().MakeRenderPipelineState(rpld) 582 if err != nil { 583 return err 584 } 585 g.screenRPS = rps 586 587 for _, screen := range []bool{false, true} { 588 for _, cm := range []bool{false, true} { 589 for _, a := range []driver.Address{ 590 driver.AddressClampToZero, 591 driver.AddressRepeat, 592 driver.AddressUnsafe, 593 } { 594 for _, f := range []driver.Filter{ 595 driver.FilterNearest, 596 driver.FilterLinear, 597 } { 598 for c := driver.CompositeModeSourceOver; c <= driver.CompositeModeMax; c++ { 599 cmi := 0 600 if cm { 601 cmi = 1 602 } 603 fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d_%d", cmi, f, a)) 604 if err != nil { 605 return err 606 } 607 rpld := mtl.RenderPipelineDescriptor{ 608 VertexFunction: vs, 609 FragmentFunction: fs, 610 } 611 612 pix := mtl.PixelFormatRGBA8UNorm 613 if screen { 614 pix = g.view.colorPixelFormat() 615 } 616 rpld.ColorAttachments[0].PixelFormat = pix 617 rpld.ColorAttachments[0].BlendingEnabled = true 618 619 src, dst := c.Operations() 620 rpld.ColorAttachments[0].DestinationAlphaBlendFactor = operationToBlendFactor(dst) 621 rpld.ColorAttachments[0].DestinationRGBBlendFactor = operationToBlendFactor(dst) 622 rpld.ColorAttachments[0].SourceAlphaBlendFactor = operationToBlendFactor(src) 623 rpld.ColorAttachments[0].SourceRGBBlendFactor = operationToBlendFactor(src) 624 rps, err := g.view.getMTLDevice().MakeRenderPipelineState(rpld) 625 if err != nil { 626 return err 627 } 628 g.rpss[rpsKey{ 629 screen: screen, 630 useColorM: cm, 631 filter: f, 632 address: a, 633 compositeMode: c, 634 }] = rps 635 } 636 } 637 } 638 } 639 } 640 641 g.cq = g.view.getMTLDevice().MakeCommandQueue() 642 return nil 643 }); err != nil { 644 return err 645 } 646 647 return nil 648 } 649 650 func (g *Graphics) draw(rps mtl.RenderPipelineState, dst *Image, srcs [graphics.ShaderImageNum]*Image, indexLen int, indexOffset int, uniforms []interface{}) error { 651 g.view.update() 652 653 rpd := mtl.RenderPassDescriptor{} 654 // Even though the destination pixels are not used, mtl.LoadActionDontCare might cause glitches 655 // (#1019). Always using mtl.LoadActionLoad is safe. 656 rpd.ColorAttachments[0].LoadAction = mtl.LoadActionLoad 657 rpd.ColorAttachments[0].StoreAction = mtl.StoreActionStore 658 659 var t mtl.Texture 660 if dst.screen { 661 if g.screenDrawable == (ca.MetalDrawable{}) { 662 drawable := g.view.drawable() 663 if drawable == (ca.MetalDrawable{}) { 664 return nil 665 } 666 g.screenDrawable = drawable 667 } 668 t = g.screenDrawable.Texture() 669 } else { 670 t = dst.texture 671 } 672 rpd.ColorAttachments[0].Texture = t 673 rpd.ColorAttachments[0].ClearColor = mtl.ClearColor{} 674 675 if g.cb == (mtl.CommandBuffer{}) { 676 g.cb = g.cq.MakeCommandBuffer() 677 } 678 rce := g.cb.MakeRenderCommandEncoder(rpd) 679 rce.SetRenderPipelineState(rps) 680 681 // In Metal, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't 682 // match. Then, the Y direction must be inverted. 683 w, h := dst.internalSize() 684 rce.SetViewport(mtl.Viewport{ 685 OriginX: 0, 686 OriginY: float64(h), 687 Width: float64(w), 688 Height: -float64(h), 689 ZNear: -1, 690 ZFar: 1, 691 }) 692 rce.SetVertexBuffer(g.vb, 0, 0) 693 694 for i, u := range uniforms { 695 switch u := u.(type) { 696 case float32: 697 rce.SetVertexBytes(unsafe.Pointer(&u), unsafe.Sizeof(u), i+1) 698 rce.SetFragmentBytes(unsafe.Pointer(&u), unsafe.Sizeof(u), i+1) 699 case []float32: 700 rce.SetVertexBytes(unsafe.Pointer(&u[0]), unsafe.Sizeof(u[0])*uintptr(len(u)), i+1) 701 rce.SetFragmentBytes(unsafe.Pointer(&u[0]), unsafe.Sizeof(u[0])*uintptr(len(u)), i+1) 702 default: 703 return fmt.Errorf("metal: unexpected uniform value: %[1]v (type: %[1]T)", u) 704 } 705 } 706 707 for i, src := range srcs { 708 if src != nil { 709 rce.SetFragmentTexture(src.texture, i) 710 } else { 711 rce.SetFragmentTexture(mtl.Texture{}, i) 712 } 713 } 714 rce.DrawIndexedPrimitives(mtl.PrimitiveTypeTriangle, indexLen, mtl.IndexTypeUInt16, g.ib, indexOffset*2) 715 rce.EndEncoding() 716 return nil 717 } 718 719 func (g *Graphics) Draw(dstID, srcID driver.ImageID, indexLen int, indexOffset int, mode driver.CompositeMode, colorM *affine.ColorM, filter driver.Filter, address driver.Address, sourceRegion driver.Region) error { 720 dst := g.images[dstID] 721 srcs := [graphics.ShaderImageNum]*Image{g.images[srcID]} 722 723 var rps mtl.RenderPipelineState 724 if dst.screen && filter == driver.FilterScreen { 725 rps = g.screenRPS 726 } else { 727 rps = g.rpss[rpsKey{ 728 screen: dst.screen, 729 useColorM: colorM != nil, 730 filter: filter, 731 address: address, 732 compositeMode: mode, 733 }] 734 } 735 736 if err := g.t.Call(func() error { 737 w, h := dst.internalSize() 738 sourceSize := []float32{0, 0} 739 if filter != driver.FilterNearest { 740 w, h := srcs[0].internalSize() 741 sourceSize[0] = float32(w) 742 sourceSize[1] = float32(h) 743 } 744 esBody, esTranslate := colorM.UnsafeElements() 745 scale := float32(0) 746 if filter == driver.FilterScreen { 747 scale = float32(dst.width) / float32(srcs[0].width) 748 } 749 uniforms := []interface{}{ 750 []float32{float32(w), float32(h)}, 751 sourceSize, 752 esBody, 753 esTranslate, 754 scale, 755 []float32{ 756 sourceRegion.X, 757 sourceRegion.Y, 758 sourceRegion.X + sourceRegion.Width, 759 sourceRegion.Y + sourceRegion.Height, 760 }, 761 } 762 if err := g.draw(rps, dst, srcs, indexLen, indexOffset, uniforms); err != nil { 763 return err 764 } 765 return nil 766 }); err != nil { 767 return err 768 } 769 return nil 770 } 771 772 func (g *Graphics) SetVsyncEnabled(enabled bool) { 773 g.view.setDisplaySyncEnabled(enabled) 774 } 775 776 func (g *Graphics) FramebufferYDirection() driver.YDirection { 777 return driver.Downward 778 } 779 780 func (g *Graphics) NeedsRestoring() bool { 781 return false 782 } 783 784 func (g *Graphics) IsGL() bool { 785 return false 786 } 787 788 func (g *Graphics) HasHighPrecisionFloat() bool { 789 return true 790 } 791 792 func (g *Graphics) MaxImageSize() int { 793 m := 0 794 g.t.Call(func() error { 795 if g.maxImageSize == 0 { 796 g.maxImageSize = 4096 797 // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf 798 switch { 799 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily5_v1): 800 g.maxImageSize = 16384 801 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily4_v1): 802 g.maxImageSize = 16384 803 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily3_v1): 804 g.maxImageSize = 16384 805 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily2_v2): 806 g.maxImageSize = 8192 807 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily2_v1): 808 g.maxImageSize = 4096 809 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily1_v2): 810 g.maxImageSize = 8192 811 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_iOS_GPUFamily1_v1): 812 g.maxImageSize = 4096 813 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_tvOS_GPUFamily2_v1): 814 g.maxImageSize = 16384 815 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_tvOS_GPUFamily1_v1): 816 g.maxImageSize = 8192 817 case g.view.getMTLDevice().SupportsFeatureSet(mtl.FeatureSet_macOS_GPUFamily1_v1): 818 g.maxImageSize = 16384 819 default: 820 panic("metal: there is no supported feature set") 821 } 822 } 823 m = g.maxImageSize 824 return nil 825 }) 826 return m 827 } 828 829 func (g *Graphics) NewShader(program *shaderir.Program) (driver.Shader, error) { 830 var s *Shader 831 if err := g.t.Call(func() error { 832 var err error 833 s, err = newShader(g.view.getMTLDevice(), g.genNextShaderID(), program) 834 if err != nil { 835 return err 836 } 837 return nil 838 }); err != nil { 839 return nil, err 840 } 841 g.addShader(s) 842 return s, nil 843 } 844 845 func (g *Graphics) addShader(shader *Shader) { 846 if g.shaders == nil { 847 g.shaders = map[driver.ShaderID]*Shader{} 848 } 849 if _, ok := g.shaders[shader.id]; ok { 850 panic(fmt.Sprintf("metal: shader ID %d was already registered", shader.id)) 851 } 852 g.shaders[shader.id] = shader 853 } 854 855 func (g *Graphics) removeShader(shader *Shader) { 856 delete(g.shaders, shader.id) 857 } 858 859 type Image struct { 860 id driver.ImageID 861 graphics *Graphics 862 width int 863 height int 864 screen bool 865 texture mtl.Texture 866 } 867 868 func (i *Image) ID() driver.ImageID { 869 return i.id 870 } 871 872 func (i *Image) internalSize() (int, int) { 873 if i.screen { 874 return i.width, i.height 875 } 876 return graphics.InternalImageSize(i.width), graphics.InternalImageSize(i.height) 877 } 878 879 func (i *Image) Dispose() { 880 i.graphics.t.Call(func() error { 881 if i.texture != (mtl.Texture{}) { 882 i.texture.Release() 883 i.texture = mtl.Texture{} 884 } 885 return nil 886 }) 887 i.graphics.removeImage(i) 888 } 889 890 func (i *Image) IsInvalidated() bool { 891 // TODO: Does Metal cause context lost? 892 // https://developer.apple.com/documentation/metal/mtlresource/1515898-setpurgeablestate 893 // https://developer.apple.com/documentation/metal/mtldevicenotificationhandler 894 return false 895 } 896 897 func (i *Image) syncTexture() { 898 // Calling SynchronizeTexture is ignored on iOS (see mtl.m), but it looks like committing BliCommandEncoder 899 // is necessary (#1337). 900 i.graphics.t.Call(func() error { 901 if i.graphics.cb != (mtl.CommandBuffer{}) { 902 panic("metal: command buffer must be empty at syncTexture: flushIfNeeded is not called yet?") 903 } 904 905 cb := i.graphics.cq.MakeCommandBuffer() 906 bce := cb.MakeBlitCommandEncoder() 907 bce.SynchronizeTexture(i.texture, 0, 0) 908 bce.EndEncoding() 909 cb.Commit() 910 cb.WaitUntilCompleted() 911 return nil 912 }) 913 } 914 915 func (i *Image) Pixels() ([]byte, error) { 916 i.graphics.flushIfNeeded(true, false) 917 i.syncTexture() 918 919 b := make([]byte, 4*i.width*i.height) 920 i.graphics.t.Call(func() error { 921 i.texture.GetBytes(&b[0], uintptr(4*i.width), mtl.Region{ 922 Size: mtl.Size{Width: i.width, Height: i.height, Depth: 1}, 923 }, 0) 924 return nil 925 }) 926 return b, nil 927 } 928 929 func (i *Image) ReplacePixels(args []*driver.ReplacePixelsArgs) { 930 g := i.graphics 931 g.flushIfNeeded(true, false) 932 933 // If the memory is shared (e.g., iOS), texture data doen't have to be synced. Send the data directly. 934 if storageMode == mtl.StorageModeShared { 935 g.t.Call(func() error { 936 for _, a := range args { 937 i.texture.ReplaceRegion(mtl.Region{ 938 Origin: mtl.Origin{X: a.X, Y: a.Y, Z: 0}, 939 Size: mtl.Size{Width: a.Width, Height: a.Height, Depth: 1}, 940 }, 0, unsafe.Pointer(&a.Pixels[0]), 4*a.Width) 941 } 942 return nil 943 }) 944 return 945 } 946 947 // If the memory is managed (e.g., macOS), texture data cannot be sent to the destination directly because 948 // this requires synchronizing data between CPU and GPU. As synchronizing is inefficient, let's send the 949 // data to a temporary texture once, and then copy it in GPU. 950 g.t.Call(func() error { 951 w, h := i.texture.Width(), i.texture.Height() 952 if g.tmpTexture == (mtl.Texture{}) || w > g.tmpTexture.Width() || h > g.tmpTexture.Height() { 953 if g.tmpTexture != (mtl.Texture{}) { 954 g.tmpTexture.Release() 955 } 956 td := mtl.TextureDescriptor{ 957 TextureType: mtl.TextureType2D, 958 PixelFormat: mtl.PixelFormatRGBA8UNorm, 959 Width: w, 960 Height: h, 961 StorageMode: storageMode, 962 Usage: mtl.TextureUsageShaderRead | mtl.TextureUsageRenderTarget, 963 } 964 g.tmpTexture = g.view.getMTLDevice().MakeTexture(td) 965 } 966 967 for _, a := range args { 968 g.tmpTexture.ReplaceRegion(mtl.Region{ 969 Origin: mtl.Origin{X: a.X, Y: a.Y, Z: 0}, 970 Size: mtl.Size{Width: a.Width, Height: a.Height, Depth: 1}, 971 }, 0, unsafe.Pointer(&a.Pixels[0]), 4*a.Width) 972 } 973 974 if g.cb == (mtl.CommandBuffer{}) { 975 g.cb = i.graphics.cq.MakeCommandBuffer() 976 } 977 bce := g.cb.MakeBlitCommandEncoder() 978 for _, a := range args { 979 o := mtl.Origin{X: a.X, Y: a.Y, Z: 0} 980 s := mtl.Size{Width: a.Width, Height: a.Height, Depth: 1} 981 bce.CopyFromTexture(g.tmpTexture, 0, 0, o, s, i.texture, 0, 0, o) 982 } 983 bce.EndEncoding() 984 985 return nil 986 }) 987 } 988 989 func (g *Graphics) DrawShader(dstID driver.ImageID, srcIDs [graphics.ShaderImageNum]driver.ImageID, offsets [graphics.ShaderImageNum - 1][2]float32, shader driver.ShaderID, indexLen int, indexOffset int, sourceRegion driver.Region, mode driver.CompositeMode, uniforms []interface{}) error { 990 dst := g.images[dstID] 991 var srcs [graphics.ShaderImageNum]*Image 992 for i, srcID := range srcIDs { 993 srcs[i] = g.images[srcID] 994 } 995 996 if err := g.t.Call(func() error { 997 rps, err := g.shaders[shader].RenderPipelineState(g.view.getMTLDevice(), mode) 998 if err != nil { 999 return err 1000 } 1001 1002 us := make([]interface{}, graphics.PreservedUniformVariablesNum+len(uniforms)) 1003 1004 // Set the destination texture size. 1005 dw, dh := dst.internalSize() 1006 us[graphics.DestinationTextureSizeUniformVariableIndex] = []float32{float32(dw), float32(dh)} 1007 1008 // Set the source texture sizes. 1009 usizes := make([]float32, 2*len(srcs)) 1010 for i, src := range srcs { 1011 if src != nil { 1012 w, h := src.internalSize() 1013 usizes[2*i] = float32(w) 1014 usizes[2*i+1] = float32(h) 1015 } 1016 } 1017 us[graphics.TextureSizesUniformVariableIndex] = usizes 1018 1019 // Set the source offsets. 1020 uoffsets := make([]float32, 2*len(offsets)) 1021 for i, offset := range offsets { 1022 uoffsets[2*i] = offset[0] 1023 uoffsets[2*i+1] = offset[1] 1024 } 1025 us[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets 1026 1027 // Set the source region's origin of texture0. 1028 uorigin := []float32{float32(sourceRegion.X), float32(sourceRegion.Y)} 1029 us[graphics.TextureSourceRegionOriginUniformVariableIndex] = uorigin 1030 1031 // Set the source region's size of texture0. 1032 ussize := []float32{float32(sourceRegion.Width), float32(sourceRegion.Height)} 1033 us[graphics.TextureSourceRegionSizeUniformVariableIndex] = ussize 1034 1035 // Set the additional uniform variables. 1036 for i, v := range uniforms { 1037 const offset = graphics.PreservedUniformVariablesNum 1038 us[offset+i] = v 1039 } 1040 1041 if err := g.draw(rps, dst, srcs, indexLen, indexOffset, us); err != nil { 1042 return err 1043 } 1044 return nil 1045 }); err != nil { 1046 return err 1047 } 1048 return nil 1049 }