
    7i                        d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlmZ dgZ ed	       G d
 de             Zy)    )annotations)	dataclass)ListOptional)_unwrap_if_constexpr)_realize_cta_layoutDistributedLayout)languageAMDMFMALayoutT)frozenc                       e Zd ZU dZded<   ded<   ded<   ded<   ej                  Zd	ed
<   dZded<   dZ	ded<   dZ
ded<   dZded<    fdZd ZddZd Zd Z xZS )r   a  
    Represents a layout for AMD MFMA (matrix core) operations.

    Args:
        version (int): Major and minor identifier for the MFMA instruction.
        instr_shape: (M, N) dimension for the instrinsic shape.
        transposed (bool): indicates the result tensor is transposed so that each thread holds consecutive elements in the same row instead of column, which is good for chained dot and global write.
        warps_per_cta (List[int]): Number of warps per CTA.
        elem_type Optional(ttgl.dtype): Supported types are int32, fp32 and fp64. Default is fp32.
        tiles_per_warp Optional(List[int]): Number of tiles per WARP. For mfma layout, if missing, use the default where we have unit tile size on all dimensions.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    intversionz	List[int]instr_shapebool
transposedwarps_per_ctaz
ttgl.dtype	elem_typeNzOptional[List[int]]tiles_per_warpctas_per_cgacta_split_num	cta_orderc                   t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   d	t        | j                               | j                  .t        j                  | dd
gt        | j                        z         | j                          y )Nr   r   r   r   r   r   r   r   r      )super__setattr__r   r   r   r   r   r   r   r   r   r   objectlenverify)self	__class__s    l/home/rose/Desktop/poly/venv/lib/python3.12/site-packages/triton/experimental/gluon/language/amd/_layouts.py__post_init__zAMDMFMALayout.__post_init__)   s)   I';DLL'IJM+?@P@P+QRL*>t*OPO-A$BTBT-UV,.B4CVCV.WXK)=dnn)MNN,@ARAR,STO-A$BTBT-UVK)=dnn)MN&t%5sSASAS=T7TU    c                
   | j                   j                  |      }|j                  | j                  | j                  | j
                  | j                  || j                  | j                  | j                  | j                  	      S N)r   to_irget_amd_mfma_layoutr   r   r   r   r   r   r   r   )r    buildertypes      r"   _to_irzAMDMFMALayout._to_ir9   sn    ~~##G,**4<<9I9I4??\`\n\npt+/+>+>@Q@QSWSeSegkguguw 	wr$   c                @   d }d| j                    d || j                         d| j                   d || j                         d || j                         d| j
                   d || j                         d || j                         d || j                         dS )Nc                F    | ydj                  t        t        |             S )N _)joinmapstr)xs    r"   	stringifyz'AMDMFMALayout.mangle.<locals>.stringify@   s    y88CQK((r$   MFMA_r/   _MFMA)	r   r   r   r   r   r   r   r   r   )r    r4   s     r"   manglezAMDMFMALayout.mangle>   s8   	)
 t||nAi0@0@&A%B!DOOCTTUV_`d`r`rVsUttuv  AE  AT  AT  wU  vV  VW  X\  Xf  Xf  Wg  gh  ir  sw  sD  sD  iE  hF  FG  HQ  RV  Rd  Rd  He  Gf  fg  hq  rv  r@  r@  hA  gB  BG  H  	Hr$   c                4   | j                   dk\  r| j                   dk  sJ d       ddgddgddgddgg}| j                  |v sJ dt        |      z          | j                  j	                         s;| j                  j                         s!| j                  j                         sJ d       t        | j                        }t        | |       t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ y )	Nr      z#version must be in the [1, 4] range       @   z-invalid intrinsic shape; accepted shapes are z/element type must be float32, float64, or int32)r   r   r2   r   is_fp32is_fp64is_int32r   r   r   r   r   r   )r    valid_shapesranks      r"   r   zAMDMFMALayout.verifyG   s   ||q T\\Q%6]8]]6R2r(RGaW=</t1`cfgsct1tt/~~%%'4>>+A+A+C^^$$&	[)Z	[ ' 4%%&D$'4$$%---4%%&$...4>>"d***r$   c                   t        | j                  t        | j                        | j                  t        | j
                        | j                  | j                  rt        | j                        nd | j                  rt        | j                        nd | j                  rt        | j                        nd | j                  rt        | j                        f	      S d f	      S r&   )hashr   tupler   r   r   r   r   r   r   r   )r    s    r"   __hash__zAMDMFMALayout.__hash__U   s    LL$""#OO$$$%NN*.*=*=E$%%&4(,(9(9E$##$t)-););E$$$%%)^^E$..!

 
 
	 :>

 
 
	r$   )returnr2   )__name__
__module____qualname____doc____annotations__ttglfloat32r   r   r   r   r   r#   r+   r7   r   rE   __classcell__)r!   s   @r"   r   r      sv     L LLIz(*.N'.(,L%,)-M&-%)I") w
H+r$   N)
__future__r   dataclassesr   typingr   r   triton.language.corer   +triton.experimental.gluon.language._layoutsr   r	   triton.experimental.gluonr
   rL   __all__r    r$   r"   <module>rW      sG    " ! ! 5 ^ 6 
 $P% P Pr$   