|
Up
|
|
|
|
|
__pycache__/
|
— |
|
|
|
aoti_runtime/
|
— |
|
|
|
cuda/
|
— |
|
|
|
cutedsl/
|
— |
|
|
|
mtia/
|
— |
|
|
|
rocm/
|
— |
|
|
|
xpu/
|
— |
|
|
|
__init__.py
|
|
|
|
|
aoti_hipify_utils.py
|
|
|
|
|
block_analysis.py
|
|
|
|
|
common.py
|
|
|
|
|
cpp.py
|
|
|
|
|
cpp_bmm_template.py
|
|
|
|
|
cpp_flex_attention_template.py
|
|
|
|
|
cpp_gemm_template.py
|
|
|
|
|
cpp_grouped_gemm_template.py
|
|
|
|
|
cpp_micro_gemm.py
|
|
|
|
|
cpp_template.py
|
|
|
|
|
cpp_template_kernel.py
|
|
|
|
|
cpp_utils.py
|
|
|
|
|
cpp_wrapper_cpu.py
|
|
|
|
|
cpp_wrapper_cpu_array_ref.py
|
|
|
|
|
cpp_wrapper_gpu.py
|
|
|
|
|
cpp_wrapper_mps.py
|
|
|
|
|
cpu_device_op_overrides.py
|
|
|
|
|
cuda_combined_scheduling.py
|
|
|
|
|
debug_utils.py
|
|
|
|
|
halide.py
|
|
|
|
|
memory_planning.py
|
|
|
|
|
mps.py
|
|
|
|
|
mps_device_op_overrides.py
|
|
|
|
|
multi_kernel.py
|
|
|
|
|
pallas.py
|
|
|
|
|
python_wrapper_mtia.py
|
|
|
|
|
segmented_tree.py
|
|
|
|
|
simd.py
|
|
|
|
|
simd_kernel_features.py
|
|
|
|
|
subgraph.py
|
|
|
|
|
triton.py
|
|
|
|
|
triton_combo_kernel.py
|
|
|
|
|
triton_split_scan.py
|
|
|
|
|
triton_utils.py
|
|
|
|
|
wrapper.py
|
|
|
|
|
wrapper_fxir.py
|
|
|
|