{
  "count": 19,
  "license": "CC-BY-SA-4.0",
  "generated": "2026-04-29T10:37:47.522Z",
  "items": [
    {
      "id": "qwen2.5-coder-32b",
      "name": "Qwen2.5-Coder 32B Instruct",
      "lab": "alibaba",
      "release_date": "2024-11-12",
      "license": "Apache-2.0",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "dense",
        "total_params_b": 32,
        "active_params_b": 32,
        "layers": 64,
        "hidden_size": 5120,
        "ffn_size": 27648,
        "num_attention_heads": 40,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 152064,
        "max_context_length": 131072,
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 60500000000,
          "bytes_per_token": 60500000000
        },
        {
          "operator": "attention",
          "flops_per_token": 3500000000,
          "bytes_per_token": 3500000000
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 6553600,
          "bytes_per_token": 819200
        },
        {
          "operator": "rope",
          "flops_per_token": 327680,
          "bytes_per_token": 40960
        },
        {
          "operator": "silu",
          "flops_per_token": 121634816,
          "bytes_per_token": 60817408
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "qwen3.5-397b",
      "name": "Qwen3.5 397B Reasoning",
      "lab": "alibaba",
      "release_date": "2026-03-05",
      "license": "Apache-2.0",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 397,
        "active_params_b": 22,
        "layers": 64,
        "hidden_size": 5120,
        "ffn_size": 14336,
        "num_attention_heads": 40,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 152064,
        "max_context_length": 131072,
        "moe": {
          "num_experts": 128,
          "top_k": 8,
          "expert_hidden_size": 1536,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 28185722880,
          "bytes_per_token": 28185722880
        },
        {
          "operator": "attention",
          "flops_per_token": 9395240960,
          "bytes_per_token": 14495514624
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 41943040,
          "bytes_per_token": 8053063680
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 3276800,
          "bytes_per_token": 1310720
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "qwen3.6-plus",
      "name": "Qwen3.6 Plus",
      "lab": "alibaba",
      "release_date": "2026-03-25",
      "license": "Apache-2.0",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 480,
        "active_params_b": 35,
        "layers": 64,
        "hidden_size": 6144,
        "ffn_size": 16384,
        "num_attention_heads": 64,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 152064,
        "max_context_length": 1048576,
        "moe": {
          "num_experts": 128,
          "top_k": 8,
          "expert_hidden_size": 2048,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 38654705664,
          "bytes_per_token": 38654705664
        },
        {
          "operator": "attention",
          "flops_per_token": 12884901888,
          "bytes_per_token": 20132659200
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 50331648,
          "bytes_per_token": 12884901888
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 3932160,
          "bytes_per_token": 1572864
        }
      ],
      "modalities": [
        "text",
        "vision"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "deepseek-r1",
      "name": "DeepSeek R1",
      "lab": "deepseek",
      "release_date": "2025-01-20",
      "license": "deepseek-license",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 671,
        "active_params_b": 37,
        "layers": 61,
        "hidden_size": 7168,
        "ffn_size": 18432,
        "num_attention_heads": 128,
        "num_kv_heads": 128,
        "head_dim": 128,
        "vocab_size": 129280,
        "max_context_length": 131072,
        "moe": {
          "num_experts": 256,
          "top_k": 8,
          "expert_hidden_size": 2048,
          "shared_experts": 0
        },
        "attention_type": "mla"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 48356130816,
          "bytes_per_token": 48356130816
        },
        {
          "operator": "attention",
          "flops_per_token": 16118710272,
          "bytes_per_token": 32237420544
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 111935488,
          "bytes_per_token": 14327742464
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 4372480,
          "bytes_per_token": 1748992
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16",
      "hf_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1"
    },
    {
      "id": "deepseek-v4-flash",
      "name": "DeepSeek V4 Flash",
      "lab": "deepseek",
      "release_date": "2026-04-24",
      "license": "deepseek-license",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 284,
        "active_params_b": 13,
        "layers": 32,
        "hidden_size": 4096,
        "ffn_size": 14336,
        "num_attention_heads": 32,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 132000,
        "max_context_length": 1048576,
        "moe": {
          "num_experts": 64,
          "top_k": 4,
          "expert_hidden_size": 1408,
          "shared_experts": 0
        },
        "attention_type": "csa+hca"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 11274289152,
          "bytes_per_token": 11274289152
        },
        {
          "operator": "attention",
          "flops_per_token": 3221225472,
          "bytes_per_token": 4831838208
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 8388608,
          "bytes_per_token": 1476395008
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 1310720,
          "bytes_per_token": 524288
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16",
      "hf_url": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash"
    },
    {
      "id": "deepseek-v4-pro",
      "name": "DeepSeek V4 Pro",
      "lab": "deepseek",
      "release_date": "2026-04-24",
      "license": "deepseek-license",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 1600,
        "active_params_b": 49,
        "layers": 64,
        "hidden_size": 8192,
        "ffn_size": 24576,
        "num_attention_heads": 64,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 132000,
        "max_context_length": 1048576,
        "moe": {
          "num_experts": 256,
          "top_k": 8,
          "expert_hidden_size": 2048,
          "shared_experts": 0
        },
        "attention_type": "csa+hca",
        "rope_theta": 10000000
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 4800000000,
          "bytes_per_token": 18000000
        },
        {
          "operator": "attention",
          "flops_per_token": 1200000000,
          "bytes_per_token": 4500000
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 10000000,
          "bytes_per_token": 1000000
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 5000000,
          "bytes_per_token": 1000000
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16",
      "hf_url": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro"
    },
    {
      "id": "alphafold-3",
      "name": "AlphaFold 3",
      "lab": "google",
      "release_date": "2024-05-08",
      "license": "creative-commons-by-nc-4.0",
      "domain": "scientific",
      "workload_kind": "forward-only-batch",
      "architecture": {
        "family": "dense",
        "total_params_b": 0.21,
        "active_params_b": 0.21,
        "layers": 48,
        "hidden_size": 384,
        "ffn_size": 1536,
        "num_attention_heads": 16,
        "num_kv_heads": 16,
        "head_dim": 24,
        "vocab_size": 64,
        "max_context_length": 5120,
        "attention_type": "pair-bias-attention"
      },
      "operator_decomposition": [
        {
          "operator": "pair-bias-attention",
          "flops_per_token": 2400000000,
          "bytes_per_token": 800000000,
          "notes": "Pair representation update — quadratic in residue count, dominant cost"
        },
        {
          "operator": "matmul",
          "flops_per_token": 1200000000,
          "bytes_per_token": 400000000,
          "notes": "PairFormer + Diffusion linear projections"
        },
        {
          "operator": "triangle-multiplication",
          "flops_per_token": 600000000,
          "bytes_per_token": 600000000,
          "notes": "AF3-specific triangle update — FP32-sensitive numerics"
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 8000000,
          "bytes_per_token": 4000000
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16",
      "paper_url": "https://www.nature.com/articles/s41586-024-07487-w",
      "github_url": "https://github.com/google-deepmind/alphafold3",
      "notes": "AlphaFold 3 (Abramson et al. 2024, Nature). Predicts joint structure of\nproteins, DNA, RNA, ligands, and ions. Uses PairFormer + Diffusion\nModule instead of AlphaFold 2's Evoformer + IPA.\n\nCritical for scientific accelerators: this is NOT a transformer LLM\nworkload. Pair-bias attention is O(N^2) in residue count (typical\nN=2000), making memory-bandwidth dominant for large complexes.\nFP32 precision needed in triangle multiplication for numerical\nstability; FP16/BF16 work for most other ops.\n\nInference is single-shot (forward-only-batch), not autoregressive.\nLatency target: 1-30 minutes per structure depending on size.\n"
    },
    {
      "id": "gemma-4",
      "name": "Gemma 4 26B",
      "lab": "google",
      "release_date": "2026-02-20",
      "license": "Gemma-License",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 26,
        "active_params_b": 4,
        "layers": 32,
        "hidden_size": 4096,
        "ffn_size": 11264,
        "num_attention_heads": 32,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 256000,
        "max_context_length": 131072,
        "moe": {
          "num_experts": 16,
          "top_k": 2,
          "expert_hidden_size": 5632,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 8858370048,
          "bytes_per_token": 8858370048
        },
        {
          "operator": "attention",
          "flops_per_token": 3221225472,
          "bytes_per_token": 4831838208
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 2097152,
          "bytes_per_token": 2952790016
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 1310720,
          "bytes_per_token": 524288
        }
      ],
      "modalities": [
        "text",
        "vision"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "graphcast",
      "name": "GraphCast",
      "lab": "google",
      "release_date": "2023-11-14",
      "license": "creative-commons-by-nc-sa-4.0",
      "domain": "scientific",
      "workload_kind": "graph-iteration",
      "architecture": {
        "family": "dense",
        "total_params_b": 0.0367,
        "active_params_b": 0.0367,
        "layers": 16,
        "hidden_size": 512,
        "ffn_size": 1024,
        "num_attention_heads": 8,
        "num_kv_heads": 8,
        "head_dim": 64,
        "vocab_size": 256,
        "max_context_length": 1038240,
        "attention_type": "graph-message-passing"
      },
      "operator_decomposition": [
        {
          "operator": "graph-message-passing",
          "flops_per_token": 280000000,
          "bytes_per_token": 280000000,
          "notes": "Mesh GNN message passing — 16 processor steps, edge-bound"
        },
        {
          "operator": "matmul",
          "flops_per_token": 180000000,
          "bytes_per_token": 60000000,
          "notes": "Encoder/decoder MLPs + per-edge MLPs"
        },
        {
          "operator": "scatter-gather",
          "flops_per_token": 40000000,
          "bytes_per_token": 200000000,
          "notes": "Grid ↔ icosahedral mesh interpolation — bandwidth-bound"
        },
        {
          "operator": "layernorm",
          "flops_per_token": 4000000,
          "bytes_per_token": 2000000
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "fp32",
      "paper_url": "https://www.science.org/doi/10.1126/science.adi2336",
      "github_url": "https://github.com/google-deepmind/graphcast",
      "notes": "GraphCast (Lam et al. 2023, Science). 0.25° / 1° global weather\nforecast at 6-hour intervals, up to 10 days. Outperforms HRES\n(operational numerical weather prediction) on 90% of variables.\n\nCritical scientific-computing workload characteristic: graph-based\niterative inference (40 forward passes for a 10-day forecast) over\n~1M-vertex mesh. NOT autoregressive in the LLM sense — each step\nproduces global state.\n\nMemory-bandwidth-bound on accelerators with HBM (1M vertex × 474\nvariables = ~2 GB activations per step). Triangulated icosahedral\nmesh creates irregular memory access patterns that favor cards with\nlarge cache hierarchies (H100 50 MB L2, MI300X 256 MB Infinity Cache)\nover those with raw HBM bandwidth alone.\n"
    },
    {
      "id": "llama-3.3-70b",
      "name": "Llama 3.3 70B Instruct",
      "lab": "meta",
      "release_date": "2024-12-06",
      "license": "Llama-3.3-Community",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "dense",
        "total_params_b": 70,
        "active_params_b": 70,
        "layers": 80,
        "hidden_size": 8192,
        "ffn_size": 28672,
        "num_attention_heads": 64,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 128256,
        "max_context_length": 131072,
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 132000000000,
          "bytes_per_token": 132000000000
        },
        {
          "operator": "attention",
          "flops_per_token": 7500000000,
          "bytes_per_token": 7500000000
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 8400000,
          "bytes_per_token": 1310720
        },
        {
          "operator": "rope",
          "flops_per_token": 524288,
          "bytes_per_token": 65536
        },
        {
          "operator": "silu",
          "flops_per_token": 156905472,
          "bytes_per_token": 78643200
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "llama-4-maverick",
      "name": "Llama 4 Maverick",
      "lab": "meta",
      "release_date": "2025-04-05",
      "license": "Llama-4-Community",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 400,
        "active_params_b": 17,
        "layers": 48,
        "hidden_size": 5120,
        "ffn_size": 16384,
        "num_attention_heads": 40,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 200000,
        "max_context_length": 1048576,
        "moe": {
          "num_experts": 128,
          "top_k": 1,
          "expert_hidden_size": 8192,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 24159191040,
          "bytes_per_token": 24159191040
        },
        {
          "operator": "attention",
          "flops_per_token": 7046430720,
          "bytes_per_token": 10871635968
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 31457280,
          "bytes_per_token": 4026531840
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 2457600,
          "bytes_per_token": 983040
        }
      ],
      "modalities": [
        "text",
        "vision",
        "video"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "llama-4-scout",
      "name": "Llama 4 Scout",
      "lab": "meta",
      "release_date": "2025-04-05",
      "license": "Llama-4-Community",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 109,
        "active_params_b": 17,
        "layers": 48,
        "hidden_size": 5120,
        "ffn_size": 16384,
        "num_attention_heads": 40,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 200000,
        "max_context_length": 10485760,
        "moe": {
          "num_experts": 16,
          "top_k": 1,
          "expert_hidden_size": 8192,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 24159191040,
          "bytes_per_token": 24159191040
        },
        {
          "operator": "attention",
          "flops_per_token": 7046430720,
          "bytes_per_token": 10871635968
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 3932160,
          "bytes_per_token": 4026531840
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 2457600,
          "bytes_per_token": 983040
        }
      ],
      "modalities": [
        "text",
        "vision",
        "video"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "minimax-m2.7",
      "name": "MiniMax M2.7",
      "lab": "minimax",
      "release_date": "2026-04-10",
      "license": "MIT",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "hybrid",
        "total_params_b": 456,
        "active_params_b": 46,
        "layers": 80,
        "hidden_size": 6144,
        "ffn_size": 16384,
        "num_attention_heads": 64,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 200000,
        "max_context_length": 4194304,
        "moe": {
          "num_experts": 32,
          "top_k": 2,
          "expert_hidden_size": 16384,
          "shared_experts": 0
        },
        "attention_type": "lightning"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 48318382080,
          "bytes_per_token": 48318382080
        },
        {
          "operator": "attention",
          "flops_per_token": 16106127360,
          "bytes_per_token": 25165824000
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 15728640,
          "bytes_per_token": 32212254720
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 4915200,
          "bytes_per_token": 1966080
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "mistral-large-3",
      "name": "Mistral Large 3",
      "lab": "mistral",
      "release_date": "2025-08-14",
      "license": "MRL-2",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "dense",
        "total_params_b": 123,
        "active_params_b": 123,
        "layers": 88,
        "hidden_size": 12288,
        "ffn_size": 28672,
        "num_attention_heads": 96,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 32768,
        "max_context_length": 128000,
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 232000000000,
          "bytes_per_token": 232000000000
        },
        {
          "operator": "attention",
          "flops_per_token": 12500000000,
          "bytes_per_token": 12500000000
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 14745600,
          "bytes_per_token": 1966080
        },
        {
          "operator": "rope",
          "flops_per_token": 786432,
          "bytes_per_token": 98304
        },
        {
          "operator": "silu",
          "flops_per_token": 252706816,
          "bytes_per_token": 126353408
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "mistral-small-4",
      "name": "Mistral Small 4",
      "lab": "mistral",
      "release_date": "2026-03-16",
      "license": "Apache-2.0",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 119,
        "active_params_b": 22,
        "layers": 40,
        "hidden_size": 5120,
        "ffn_size": 14336,
        "num_attention_heads": 32,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 131072,
        "max_context_length": 131072,
        "moe": {
          "num_experts": 8,
          "top_k": 2,
          "expert_hidden_size": 14336,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 17616076800,
          "bytes_per_token": 17616076800
        },
        {
          "operator": "attention",
          "flops_per_token": 5872025600,
          "bytes_per_token": 9227468800
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 1638400,
          "bytes_per_token": 11744051200
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 2048000,
          "bytes_per_token": 819200
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "kimi-k2.6",
      "name": "Kimi K2.6",
      "lab": "moonshot",
      "release_date": "2026-04-15",
      "license": "moonshot-license",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 1000,
        "active_params_b": 32,
        "layers": 60,
        "hidden_size": 7168,
        "ffn_size": 18432,
        "num_attention_heads": 64,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 160000,
        "max_context_length": 262144,
        "moe": {
          "num_experts": 384,
          "top_k": 8,
          "expert_hidden_size": 1536,
          "shared_experts": 0
        },
        "attention_type": "mla"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 47563407360,
          "bytes_per_token": 47563407360
        },
        {
          "operator": "attention",
          "flops_per_token": 15854469120,
          "bytes_per_token": 25543311360
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 165150720,
          "bytes_per_token": 10569646080
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 4300800,
          "bytes_per_token": 1720320
        }
      ],
      "modalities": [
        "text",
        "vision"
      ],
      "weight_format": "bf16",
      "hf_url": "https://huggingface.co/moonshotai/Kimi-K2.6"
    },
    {
      "id": "gpt-oss",
      "name": "GPT-OSS",
      "lab": "openai",
      "release_date": "2025-08-12",
      "license": "Apache-2.0",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 120,
        "active_params_b": 5,
        "layers": 36,
        "hidden_size": 2880,
        "ffn_size": 11520,
        "num_attention_heads": 32,
        "num_kv_heads": 8,
        "head_dim": 90,
        "vocab_size": 200000,
        "max_context_length": 131072,
        "moe": {
          "num_experts": 128,
          "top_k": 4,
          "expert_hidden_size": 2880,
          "shared_experts": 0
        },
        "attention_type": "gqa"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 7166361600,
          "bytes_per_token": 7166361600
        },
        {
          "operator": "attention",
          "flops_per_token": 2043740160,
          "bytes_per_token": 2813460480
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 13271040,
          "bytes_per_token": 2388787200
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 1036800,
          "bytes_per_token": 414720
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "glm-5-reasoning",
      "name": "GLM-5 Reasoning",
      "lab": "zhipu",
      "release_date": "2026-03-15",
      "license": "MIT",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "dense",
        "total_params_b": 32,
        "active_params_b": 32,
        "layers": 60,
        "hidden_size": 5120,
        "ffn_size": 17920,
        "num_attention_heads": 40,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 151552,
        "max_context_length": 131072,
        "attention_type": "mha"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 33030144000,
          "bytes_per_token": 33030144000
        },
        {
          "operator": "attention",
          "flops_per_token": 8808038400,
          "bytes_per_token": 13589544960
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 3072000,
          "bytes_per_token": 1228800
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16"
    },
    {
      "id": "glm-5.1",
      "name": "GLM-5.1",
      "lab": "zhipu",
      "release_date": "2026-04-07",
      "license": "MIT",
      "domain": "llm",
      "workload_kind": "autoregressive-decode",
      "architecture": {
        "family": "moe",
        "total_params_b": 754,
        "active_params_b": 32,
        "layers": 64,
        "hidden_size": 6144,
        "ffn_size": 16384,
        "num_attention_heads": 48,
        "num_kv_heads": 8,
        "head_dim": 128,
        "vocab_size": 151552,
        "max_context_length": 131072,
        "moe": {
          "num_experts": 192,
          "top_k": 8,
          "expert_hidden_size": 1536,
          "shared_experts": 0
        },
        "attention_type": "mha"
      },
      "operator_decomposition": [
        {
          "operator": "matmul",
          "flops_per_token": 38654705664,
          "bytes_per_token": 38654705664
        },
        {
          "operator": "attention",
          "flops_per_token": 12884901888,
          "bytes_per_token": 20401094656
        },
        {
          "operator": "moe-gate",
          "flops_per_token": 75497472,
          "bytes_per_token": 9663676416
        },
        {
          "operator": "rmsnorm",
          "flops_per_token": 3932160,
          "bytes_per_token": 1572864
        }
      ],
      "modalities": [
        "text"
      ],
      "weight_format": "bf16",
      "hf_url": "https://huggingface.co/THUDM/GLM-5.1"
    }
  ]
}