Bug 84977

Summary: r300/compiler: register allocation pass generate invalid swizzle for r300/r400
Product: Mesa Reporter: David Heidelberg (okias) <david>
Component: Drivers/Gallium/r300Assignee: Default DRI bug account <dri-devel>
Status: RESOLVED MOVED QA Contact:
Severity: normal    
Priority: medium CC: david
Version: unspecified   
Hardware: Other   
OS: All   
Whiteboard:
i915 platform: i915 features:
Attachments: r300_deadsource.txt
r300_regalloc.txt
r300_regalloc-workaround.txt
r300_regalloc-separated_workaround.txt

Description David Heidelberg (okias) 2014-10-14 08:31:56 UTC
Created attachment 107808 [details]
r300_deadsource.txt

Error:
Not a native swizzle: 00000fc3

Can be workarounded forcing

diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
index 14f93fb..5682b55 100644
--- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -432,6 +432,8 @@ static enum rc_reg_class variable_get_class(
                }
        }
 
+       can_change_writemask = 0;
+
        class_index = find_class(classes, writemask,
                                                can_change_writemask ? 3 : 1);
 done:

Seems like if "(!r300_swizzle_is_native_basic(new_swizzle)) {"
didn't check this one swizzle (checked by fprintf).

Output from register allocation pass compared with can_change_writemask = 0;

--- r300_regalloc.txt       2014-10-14 01:51:00.813113749 +0200
+++ r300_regalloc-workaround.txt    2014-10-14 01:51:40.207223071 +0200
@@ -10,20 +10,20 @@
      DP3, src0.xyz, src0.xyz
      DP3 temp[3].w, src0._, src0._
   5: src0.xyz = input[3], src0.w = temp[3]
-     DP3 temp[7].z, src0.xyz, src0.xyz
+     DP3 temp[8].x, src0.xyz, src0.xyz
      RSQ temp[4].w, |src0.w|
-  6: src0.xyz = input[6], src1.xyz = temp[7]
-     DP3 temp[7].z, src0.xyz, src0.xyz
-     RSQ temp[5].w, |src1.z|
-  7: src0.xyz = input[5], src0.w = temp[3]
+  6: src0.xyz = input[6], src1.xyz = temp[8]
      DP3 temp[8].x, src0.xyz, src0.xyz
+     RSQ temp[5].w, |src1.x|
+  7: src0.xyz = input[5], src0.w = temp[3]
+     DP3 temp[9].x, src0.xyz, src0.xyz
      MAD temp[3].w, -src0.w, src0.1, src0.1
-  8: src0.w = temp[4], src1.xyz = input[4], src2.xyz = temp[8]
+  8: src0.w = temp[4], src1.xyz = input[4], src2.xyz = temp[9]
      MAD temp[4].xyz, src0.www, src1.xyz, src0.000
      RSQ temp[4].w, |src2.x|
-  9: src0.xyz = temp[7], src1.xyz = temp[7]
+  9: src0.xyz = temp[7], src1.xyz = temp[8]
      MAD temp[7].xy, src0.xy_, src0.11_, src0.HH_
-     RSQ temp[6].w, |src1.z|
+     RSQ temp[6].w, |src1.x|
  10: src0.xyz = input[3], src0.w = temp[5], src2.xyz = temp[4]
      MAD temp[3].xyz, src0.xyz, src0.www, src2.xyz
  11: src0.xyz = input[5], src0.w = temp[4]
@@ -38,10 +38,10 @@
  15: src0.xyz = temp[6], src1.xyz = temp[4]
      DP3_SAT temp[4].x, src0.xyz, src1.xyz
  16: src0.xyz = temp[3]
-     DP3 temp[4].y, src0.xyz, src0.xyz
- 17: src0.xyz = temp[5], src1.xyz = const[7], src2.xyz = temp[4]
+     DP3 temp[7].x, src0.xyz, src0.xyz
+ 17: src0.xyz = temp[5], src1.xyz = const[7], src2.xyz = temp[7]
      MAD temp[5].xy, src0.xy_, src1.xx_, src1.yy_
-     RSQ temp[5].w, |src2.y|
+     RSQ temp[5].w, |src2.x|
  18: src0.xyz = temp[4], src0.w = input[0], src1.xyz = input[0]
      MAD temp[0].xyz, src0.xxx, src1.xyz, src0.000
      MAD temp[0].w, src0.x, src0.w, src0.0
@@ -53,24 +53,24 @@
      DP3_SAT, src0.xyz, src1.xyz
      DP3_SAT temp[5].w, src0._, src0._
  22: src0.xyz = temp[5], src0.w = const[7], src1.xyz = const[7]
-     MAD temp[3].x, src0.y__, src1.z__, src0.w__
+     MAD temp[3].z, src0.__y, src1.__z, src0.__w
      MAD temp[6].w, src0.x, src1.z, src0.w
  23: src0.xyz = temp[5], src0.w = temp[6], src1.xyz = temp[3], src1.w = const[6]
-     MAD temp[3].x, src0.y__, src1.x__, src1.w__
+     MAD temp[3].z, src0.__y, src1.__z, src1.__w
      MAD temp[6].w, src0.x, src0.w, src1.w
  24: src0.xyz = temp[5], src0.w = temp[6], src1.xyz = temp[3], src2.xyz = const[8]
-     MAD temp[3].x, src0.y__, src1.x__, src2.x__
+     MAD temp[3].z, src0.__y, src1.__z, src2.__x
      MAD temp[6].w, src0.x, src0.w, src2.x
  25: src0.w = temp[5]
      LG2 temp[5].w, src0.w
  26: src0.xyz = temp[5], src0.w = temp[6], src1.xyz = temp[3]
-     MAD temp[6].x, src0.y__, src1.x__, -src0.H__
+     MAD temp[6].z, src0.__y, src1.__z, -src0.__H
      MAD temp[6].w, src0.x, src0.w, -src0.H
  27: src0.xyz = temp[5], src0.w = temp[5], src1.xyz = temp[6], src1.w = temp[6], src2.xyz = const[4]
-     MAD temp[3].xy, src0.xy_, src1.wx_, src0.11_
+     MAD temp[3].xy, src0.xy_, src1.wz_, src0.11_
      MAD temp[5].w, src0.w, src2.x, src0.0
  28: src0.xyz = temp[3], src0.w = temp[4]
-     MAD temp[3].x, -src0.x__, src0.1__, src0.y__
+     MAD temp[3].y, -src0._x_, src0._1_, src0._y_
      MAD temp[4].w, -src0.x, src0.1, src0.w
  29: src0.w = temp[5]
      EX2 temp[5].w, src0.w
@@ -78,7 +78,7 @@
      MAD temp[1].xyz, src1.www, src1.xyz, src0.000
      MAD temp[1].w, src1.w, src0.w, src0.0
  31: src0.xyz = temp[3]
-     RCP temp[5].w, src0.x
+     RCP temp[5].w, src0.y
  32: src0.w = temp[4], src1.w = temp[5]
      MAD_SAT temp[4].w, src1.w, src0.w, src0.0
  33: src0.xyz = const[8], src0.w = temp[4]
@@ -110,4 +110,4 @@
      MAD temp[0].w, src2.w, srcp.w, src0.0
  43: src0.xyz = temp[0], src0.w = temp[0]
      MAD color[0].xyz, src0.xyz, src0.111, src0.000
-     MAD color[0].w, src0.w, src0.1, src0.0
+     MAD color[0].w, src0.w, src0.1, src0.0
Comment 1 David Heidelberg (okias) 2014-10-14 08:33:15 UTC
Created attachment 107809 [details]
r300_regalloc.txt

Also, problematic swizzle can be seen on instruction 27: in diff wx -> wz.
Comment 2 David Heidelberg (okias) 2014-10-14 08:34:00 UTC
Created attachment 107810 [details]
r300_regalloc-workaround.txt
Comment 3 Tom Stellard 2014-10-14 17:07:16 UTC
(In reply to David Heidelberger (okias) from comment #1)
> Created attachment 107809 [details]
> r300_regalloc.txt
> 
> Also, problematic swizzle can be seen on instruction 27: in diff wx -> wz.

.wz is a legal swizzle according to the native_swizzles table.  Can you add printfs to lookup_native_swizzle to see why it is not being treated as a legal swizzle?
Comment 4 David Heidelberg (okias) 2014-10-14 22:11:53 UTC
> > Also, problematic swizzle can be seen on instruction 27: in diff wx -> wz.
> 
> .wz is a legal swizzle according to the native_swizzles table.  Can you add
> printfs to lookup_native_swizzle to see why it is not being treated as a
> legal swizzle?

I made printf in lookup_native_swizzle originally.
I probably messed this. It's wz -> wx. (it fails for www after 1. w, same for wzy, also fails after 1. w).
Comment 5 Tom Stellard 2014-10-15 13:55:37 UTC
(In reply to David Heidelberger (okias) from comment #4)
> > > Also, problematic swizzle can be seen on instruction 27: in diff wx -> wz.
> > 
> > .wz is a legal swizzle according to the native_swizzles table.  Can you add
> > printfs to lookup_native_swizzle to see why it is not being treated as a
> > legal swizzle?
> 
> I made printf in lookup_native_swizzle originally.
> I probably messed this. It's wz -> wx. (it fails for www after 1. w, same
> for wzy, also fails after 1. w).

What is the value of sd  is returned when the swizzle .wx is passed to lookup_native_swizzle?
Comment 6 David Heidelberg (okias) 2014-10-15 21:51:09 UTC
in function r300FPTranslateRGBSwizzle is return value of sd == 0 (for sure)
Comment 7 Tom Stellard 2014-10-17 14:07:57 UTC
(In reply to David Heidelberger (okias) from comment #6)
> in function r300FPTranslateRGBSwizzle is return value of sd == 0 (for sure)

So it sounds like this might be a bug somewhere in rc_reg_class_variable_get_class()

At the very bottom of this function, there is a call to find_class(), it would be good to put a printf to check the value of can_change_writemask and also class_index.
Comment 8 David Heidelberg (okias) 2014-10-18 10:07:34 UTC
$ wine D3D9HLSLPixelLighting.exe 
fixme:heap:HeapSetInformation (nil) 1 (nil) 0
fixme:win:EnumDisplayDevicesW ((null),0,0x32f6f8,0x00000000), stub!
r300: DRM version: 2.39.0, Name: ATI RS690, ID: 0x791f, GB: 1, Z: 1
r300: GART size: 509 MB, VRAM size: 128 MB
r300: AA compression RAM: YES, Z compression RAM: NO, HiZ RAM: NO
fixme:win:EnumDisplayDevicesW (L"\\\\.\\DISPLAY1",0,0x32f6f8,0x00000000), stub!
fixme:win:EnumDisplayDevicesW (L"\\\\.\\DISPLAY1",1,0x32f6f8,0x00000000), stub!
fixme:win:EnumDisplayDevicesW ((null),1,0x32f6f8,0x00000000), stub!
fixme:d3d9:d3dadapter9_new 
Native Direct3D 9 is active.
For more informations visit https://wiki.ixit.cz/d3d9
(L"\\\\.\\DISPLAY1",0,0x32f718,0x00000000), stub!
r300: DRM version: 2.39.0, Name: ATI RS690, ID: 0x791f, GB: 1, Z: 1
r300: GART size: 509 MB, VRAM size: 128 MB
r300: AA compression RAM: YES, Z compression RAM: NO, HiZ RAM: NO
fixme:d3dadapter:DRI3PresentGroup_GetMultiheadCount (0x1363e8), stub!
fixme:d3dadapter:DRI3PresentGroup_GetMultiheadCount (0x1363e8), stub!
fixme:d3d9:Direct3DShaderValidatorCreate9 stub
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
fixme:msvcp:_Locinfo__Locinfo_ctor_cat_cstr (0x32fb54 1 C) semi-stub
fixme:msvcp:_Locinfo__Locinfo_ctor_cat_cstr (0x32f994 1 C) semi-stub
nine:device9.c:2397: NineDevice9_SetNPatchMode STUB!
nine:device9.c:2397: NineDevice9_SetNPatchMode STUB!
nine:nine_state_copy_common: Fixed function state not handled properly by StateBlocks.
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 10, can_change_writemask: 0
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 10, can_change_writemask: 0
class_index: 10, can_change_writemask: 0
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 10, can_change_writemask: 0
class_index: 10, can_change_writemask: 0
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
class_index: 4, can_change_writemask: 1
class_index: 10, can_change_writemask: 0
class_index: 0, can_change_writemask: 1
class_index: 0, can_change_writemask: 1
Not a native swizzle: 00000fc3
Comment 9 David Heidelberg (okias) 2014-10-28 11:02:10 UTC
class_index: 1, dstindex: 8, writemask: 3, can_change: 0
class_index: 0, dstindex: 35, writemask: 1, can_change: 1
class_index: 0, dstindex: 41, writemask: 1, can_change: 1
class_index: 0, dstindex: 20, writemask: 1, can_change: 1
class_index: 1, dstindex: 9, writemask: 3, can_change: 0
class_index: 1, dstindex: 10, writemask: 3, can_change: 0
class_index: 0, dstindex: 45, writemask: 1, can_change: 1
class_index: 0, dstindex: 38, writemask: 1, can_change: 1
class_index: 1, dstindex: 11, writemask: 3, can_change: 0
class_index: 1, dstindex: 12, writemask: 3, can_change: 0
class_index: 0, dstindex: 13, writemask: 4, can_change: 1
class_index: 0, dstindex: 14, writemask: 4, can_change: 1
class_index: 0, dstindex: 15, writemask: 4, can_change: 1
class_index: 4, dstindex: 16, writemask: 12, can_change: 1
class_index: 1, dstindex: 17, writemask: 3, can_change: 0
class_index: 0, dstindex: 18, writemask: 2, can_change: 1
class_index: 0, dstindex: 30, writemask: 1, can_change: 1
Not a native swizzle: 00000fc3

Not sure, if I fully get it. It has only INST up to 43. Why is there dstindex 45?
Comment 10 David Heidelberg (okias) 2014-10-28 11:18:14 UTC
class_index: 1, dstindex: 8, writemask: 3, can_change: 0
class_index: 0, dstindex: 35, writemask: 1, can_change: 1
class_index: 0, dstindex: 41, writemask: 1, can_change: 1
class_index: 0, dstindex: 20, writemask: 1, can_change: 1
class_index: 1, dstindex: 9, writemask: 3, can_change: 0
class_index: 1, dstindex: 10, writemask: 3, can_change: 0
class_index: 0, dstindex: 45, writemask: 1, can_change: 1
class_index: 0, dstindex: 38, writemask: 1, can_change: 1
class_index: 1, dstindex: 11, writemask: 3, can_change: 0
class_index: 1, dstindex: 12, writemask: 3, can_change: 0
class_index: 0, dstindex: 13, writemask: 4, can_change: 1
class_index: 0, dstindex: 14, writemask: 4, can_change: 1
class_index: 0, dstindex: 15, writemask: 4, can_change: 1
class_index: 4, dstindex: 16, writemask: 12, can_change: 0
class_index: 1, dstindex: 17, writemask: 3, can_change: 0
class_index: 0, dstindex: 18, writemask: 2, can_change: 1
class_index: 0, dstindex: 30, writemask: 1, can_change: 1

output with additional code:

if (writemask == 12) can_change_writemask = 0; // which is WZ later converted to WX
Comment 11 David Heidelberg (okias) 2014-10-28 21:56:23 UTC
Created attachment 108592 [details]
r300_regalloc-separated_workaround.txt

I made separeted workaround with line 

if (writemask == 12) can_change_writemask = 0;

$ diff -Naur r300_regalloc.txt r300_regalloc-separated_workaround.txt 
--- r300_regalloc.txt   2014-10-28 22:54:13.673516366 +0100
+++ r300_regalloc-separated_workaround.txt      2014-10-28 22:54:05.065450759 +0100
@@ -64,10 +64,10 @@
  25: src0.w = temp[5]
      LG2 temp[5].w, src0.w
  26: src0.xyz = temp[5], src0.w = temp[6], src1.xyz = temp[3]
-     MAD temp[6].x, src0.y__, src1.x__, -src0.H__
+     MAD temp[6].z, src0.__y, src1.__x, -src0.__H
      MAD temp[6].w, src0.x, src0.w, -src0.H
  27: src0.xyz = temp[5], src0.w = temp[5], src1.xyz = temp[6], src1.w = temp[6], src2.xyz = const[4]
-     MAD temp[3].xy, src0.xy_, src1.wx_, src0.11_
+     MAD temp[3].xy, src0.xy_, src1.wz_, src0.11_
      MAD temp[5].w, src0.w, src2.x, src0.0
  28: src0.xyz = temp[3], src0.w = temp[4]
      MAD temp[3].x, -src0.x__, src0.1__, src0.y__
Comment 12 GitLab Migration User 2019-09-18 18:52:46 UTC
-- GitLab Migration Automatic Message --

This bug has been migrated to freedesktop.org's GitLab instance and has been closed from further activity.

You can subscribe and participate further through the new bug through this link to our GitLab instance: https://gitlab.freedesktop.org/mesa/mesa/issues/363.

Use of freedesktop.org services, including Bugzilla, is subject to our Code of Conduct. How we collect and use information is described in our Privacy Policy.