{
  "count": 39,
  "license": "CC-BY-SA-4.0",
  "generated": "2026-04-29T10:37:47.504Z",
  "items": [
    {
      "id": "mi300a",
      "name": "AMD Instinct MI300A",
      "vendor": {
        "id": "amd",
        "name": "AMD",
        "chinese_names": [
          "超微半导体"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.amd.com/",
        "aliases": [
          "Advanced Micro Devices"
        ]
      },
      "generation": "cdna3-apu",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1962,
          "evidence_ref": "ev-mi300a-001"
        },
        "bf16_tflops": {
          "value": 981,
          "evidence_ref": "ev-mi300a-001"
        },
        "fp16_tflops": {
          "value": 981,
          "evidence_ref": "ev-mi300a-001"
        },
        "int8_tops": {
          "value": 1962,
          "evidence_ref": "ev-mi300a-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 228,
          "evidence_ref": "ev-mi300a-arch-001"
        },
        "compute_unit_label": "CU",
        "l2_cache_mb": {
          "value": 256,
          "evidence_ref": "ev-mi300a-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-mi300a-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-mi300a-arch-001"
        },
        "die_area_mm2": {
          "value": 1017,
          "evidence_ref": "ev-mi300a-arch-001"
        },
        "transistor_count_b": {
          "value": 146,
          "evidence_ref": "ev-mi300a-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-mi300a-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-mi300a-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 128,
          "evidence_ref": "ev-mi300a-001"
        },
        "bandwidth_gbps": {
          "value": 5300,
          "evidence_ref": "ev-mi300a-001"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "Infinity-Fabric",
        "bandwidth_gbps": 768,
        "world_size": 4,
        "topology": "fully-connected"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 760,
          "evidence_ref": "ev-mi300a-001"
        }
      },
      "software_support": {
        "drivers": [
          "ROCm-6.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "MI300A",
        "MI300A APU"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mi300a-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi300/mi300a.html",
          "accessed": "2026-04-28",
          "citation": "AMD MI300A APU product page (24× Zen-4 cores + 228 CDNA-3 CUs unified memory)"
        },
        {
          "id": "ev-mi300a-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf",
          "accessed": "2026-04-28",
          "citation": "CDNA 3 APU: 228 CUs (vs 304 in MI300X) + 24× Zen-4 cores in same package, 256 MB Infinity Cache, 8× HBM3 ⇒ 128 GB unified, 146B transistors @ TSMC 5+6nm chiplets"
        }
      ],
      "disclaimers": [
        "MI300A is an APU (CPU + GPU integrated); used in El Capitan supercomputer + emerging AI training systems.",
        "Smaller GPU CU count vs MI300X (228 vs 304) trades for embedded Zen-4 cores + unified memory model.",
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "mi300x",
      "name": "AMD Instinct MI300X",
      "vendor": {
        "id": "amd",
        "name": "AMD",
        "chinese_names": [
          "超微半导体"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.amd.com/",
        "aliases": [
          "Advanced Micro Devices"
        ]
      },
      "generation": "cdna3",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 2614,
          "evidence_ref": "ev-mi300x-001"
        },
        "bf16_tflops": {
          "value": 1307,
          "evidence_ref": "ev-mi300x-001"
        },
        "fp16_tflops": {
          "value": 1307,
          "evidence_ref": "ev-mi300x-001"
        },
        "int8_tops": {
          "value": 2614,
          "evidence_ref": "ev-mi300x-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 304,
          "evidence_ref": "ev-mi300x-arch-001"
        },
        "compute_unit_label": "CU",
        "l2_cache_mb": {
          "value": 256,
          "evidence_ref": "ev-mi300x-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-mi300x-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-mi300x-arch-001"
        },
        "die_area_mm2": {
          "value": 1017,
          "evidence_ref": "ev-mi300x-arch-001"
        },
        "transistor_count_b": {
          "value": 153,
          "evidence_ref": "ev-mi300x-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-mi300x-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-mi300x-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 192,
          "evidence_ref": "ev-mi300x-001"
        },
        "bandwidth_gbps": {
          "value": 5300,
          "evidence_ref": "ev-mi300x-001"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "Infinity-Fabric",
        "bandwidth_gbps": 896,
        "world_size": 8,
        "topology": "fully-connected"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 750,
          "evidence_ref": "ev-mi300x-001"
        }
      },
      "software_support": {
        "drivers": [
          "ROCm-6.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8",
          "int4-awq"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep"
        ]
      },
      "aliases": [
        "MI300X",
        "MI300X 192GB"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mi300x-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html",
          "accessed": "2026-04-28",
          "citation": "AMD MI300X product page"
        },
        {
          "id": "ev-mi300x-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html",
          "accessed": "2026-04-28",
          "citation": "AMD CDNA 3 architecture: 304 CUs across 8 XCD chiplets, 256 MB Infinity Cache (L2), 8× HBM3 stacks @ 24 GB ⇒ 192 GB, 153B transistors @ TSMC 5nm + 6nm chiplets"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "mi325x",
      "name": "AMD Instinct MI325X",
      "vendor": {
        "id": "amd",
        "name": "AMD",
        "chinese_names": [
          "超微半导体"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.amd.com/",
        "aliases": [
          "Advanced Micro Devices"
        ]
      },
      "generation": "cdna3",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 2614,
          "evidence_ref": "ev-mi325x-001"
        },
        "bf16_tflops": {
          "value": 1307,
          "evidence_ref": "ev-mi325x-001"
        },
        "fp16_tflops": {
          "value": 1307,
          "evidence_ref": "ev-mi325x-001"
        },
        "int8_tops": {
          "value": 2614,
          "evidence_ref": "ev-mi325x-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 304,
          "evidence_ref": "ev-mi325x-arch-001"
        },
        "compute_unit_label": "CU",
        "l2_cache_mb": {
          "value": 256,
          "evidence_ref": "ev-mi325x-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-mi325x-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-mi325x-arch-001"
        },
        "die_area_mm2": {
          "value": 1017,
          "evidence_ref": "ev-mi325x-arch-001"
        },
        "transistor_count_b": {
          "value": 153,
          "evidence_ref": "ev-mi325x-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-mi325x-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-mi325x-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 256,
          "evidence_ref": "ev-mi325x-001"
        },
        "bandwidth_gbps": {
          "value": 6000,
          "evidence_ref": "ev-mi325x-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "Infinity-Fabric",
        "bandwidth_gbps": 896,
        "world_size": 8,
        "topology": "fully-connected"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 1000,
          "evidence_ref": "ev-mi325x-001"
        }
      },
      "software_support": {
        "drivers": [
          "ROCm-6.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8",
          "int4-awq"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep"
        ]
      },
      "aliases": [
        "MI325X"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mi325x-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi300/mi325x.html",
          "accessed": "2026-04-28",
          "citation": "AMD MI325X product page"
        },
        {
          "id": "ev-mi325x-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi300/mi325x.html",
          "accessed": "2026-04-28",
          "citation": "MI325X reuses CDNA 3 die from MI300X (same 304 CUs, 256 MB Infinity Cache); upgraded to 8× HBM3e (32 GB stacks) ⇒ 256 GB capacity"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "mi355x",
      "name": "AMD Instinct MI355X",
      "vendor": {
        "id": "amd",
        "name": "AMD",
        "chinese_names": [
          "超微半导体"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.amd.com/",
        "aliases": [
          "Advanced Micro Devices"
        ]
      },
      "generation": "cdna4",
      "status": "in-production",
      "release_year": 2025,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": {
          "value": 9200,
          "evidence_ref": "ev-mi355x-001"
        },
        "fp8_tflops": {
          "value": 4600,
          "evidence_ref": "ev-mi355x-001"
        },
        "bf16_tflops": {
          "value": 2300,
          "evidence_ref": "ev-mi355x-001"
        },
        "fp16_tflops": {
          "value": 2300,
          "evidence_ref": "ev-mi355x-001"
        },
        "int8_tops": {
          "value": 4600,
          "evidence_ref": "ev-mi355x-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 256,
          "evidence_ref": "ev-mi355x-arch-001"
        },
        "compute_unit_label": "CU",
        "l2_cache_mb": {
          "value": 256,
          "evidence_ref": "ev-mi355x-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-mi355x-arch-001"
        },
        "process_node_nm": {
          "value": 3,
          "evidence_ref": "ev-mi355x-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-mi355x-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-mi355x-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 288,
          "evidence_ref": "ev-mi355x-001"
        },
        "bandwidth_gbps": {
          "value": 8000,
          "evidence_ref": "ev-mi355x-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "Infinity-Fabric",
        "bandwidth_gbps": 1075,
        "world_size": 8,
        "topology": "fully-connected"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 1400,
          "evidence_ref": "ev-mi355x-001"
        }
      },
      "software_support": {
        "drivers": [
          "ROCm-6.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp4",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp"
        ]
      },
      "aliases": [
        "MI355X"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mi355x-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi355x.html",
          "accessed": "2026-04-28",
          "citation": "AMD MI355X announcement (vendor-claimed)"
        },
        {
          "id": "ev-mi355x-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.amd.com/en/products/accelerators/instinct/mi355x.html",
          "accessed": "2026-04-28",
          "citation": "CDNA 4 architecture: 256 CUs (4 XCDs × 64 CU configurable), 256 MB Infinity Cache, 8× HBM3e stacks @ 36 GB ⇒ 288 GB, FP4 native @ 9.2 PFLOPS, TSMC 3nm"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "apple-m4-max-npu",
      "name": "Apple M4 Max Neural Engine",
      "vendor": {
        "id": "apple",
        "name": "Apple",
        "chinese_names": [
          "苹果"
        ],
        "country": "US",
        "type": "both",
        "website": "https://www.apple.com/mac/m4/",
        "aliases": [
          "Apple Silicon",
          "Apple Inc."
        ]
      },
      "generation": "apple-neural-engine-gen5",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "embedded-soc",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 38,
          "evidence_ref": "ev-m4-001"
        },
        "fp16_tflops": {
          "value": 38,
          "evidence_ref": "ev-m4-001"
        },
        "int8_tops": {
          "value": 38,
          "evidence_ref": "ev-m4-001"
        },
        "tops_per_watt": {
          "value": 3.4,
          "evidence_ref": "ev-m4-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 16,
          "evidence_ref": "ev-m4-001"
        },
        "compute_unit_label": "NeuralEngine",
        "process_node_nm": {
          "value": 3,
          "evidence_ref": "ev-m4-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 128,
          "evidence_ref": "ev-m4-001"
        },
        "bandwidth_gbps": {
          "value": 546,
          "evidence_ref": "ev-m4-001"
        },
        "type": "LPDDR5X"
      },
      "scale_up": {
        "protocol": "UMA",
        "bandwidth_gbps": 546,
        "world_size": 1,
        "topology": "unified"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 0,
        "protocol": "none"
      },
      "power": {
        "tdp_w": {
          "value": 11,
          "evidence_ref": "ev-m4-001"
        }
      },
      "software_support": {
        "drivers": [
          "Core ML 8",
          "Metal 4"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "mlx-vllm port; native llama.cpp + MLX support"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8",
          "int4-awq"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "M4 Max",
        "M4 Max ANE",
        "M4 Neural Engine"
      ],
      "chinese_names": [
        "苹果 M4 Max 神经引擎"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-m4-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.apple.com/mac/m4/",
          "accessed": "2026-04-29",
          "citation": "Apple M4 Max: 16-core Neural Engine @ 38 TOPS, unified memory architecture (UMA) up to 128 GB LPDDR5X with 546 GB/s bandwidth shared with GPU/CPU. ~11 W package TDP for ANE portion."
        }
      ],
      "disclaimers": [
        "Apple M4 NPU rating is 38 TOPS at INT8 precision; FP16 is roughly 38 TFLOPS via Metal Performance Shaders.",
        "Unified memory means the 128 GB capacity is shared across CPU/GPU/ANE; for inference, expect ~80-100 GB usable.",
        "No multi-card scaling: single-package only. World-size=1 reflects this.",
        "Energy efficiency (3.4 TOPS/W) is the strongest of any accelerator in the corpus — relevant for edge / on-device."
      ]
    },
    {
      "id": "inferentia-2",
      "name": "AWS Inferentia 2",
      "vendor": {
        "id": "aws",
        "name": "AWS Annapurna Labs",
        "chinese_names": [
          "亚马逊云科技"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://aws.amazon.com/ai/machine-learning/trainium/",
        "aliases": [
          "Amazon Web Services",
          "AWS"
        ]
      },
      "generation": "inf2",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "proprietary",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 190,
          "evidence_ref": "ev-inf2-001"
        },
        "fp16_tflops": {
          "value": 190,
          "evidence_ref": "ev-inf2-001"
        },
        "int8_tops": {
          "value": 380,
          "evidence_ref": "ev-inf2-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 2,
          "evidence_ref": "ev-inf2-arch-001"
        },
        "compute_unit_label": "XPU",
        "hbm_stacks": {
          "value": 2,
          "evidence_ref": "ev-inf2-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-inf2-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 32,
          "evidence_ref": "ev-inf2-001"
        },
        "bandwidth_gbps": {
          "value": 820,
          "evidence_ref": "ev-inf2-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "NeuronLink",
        "bandwidth_gbps": 384,
        "world_size": 12,
        "topology": "ring"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "EFA"
      },
      "power": {
        "tdp_w": {
          "value": 175,
          "evidence_ref": "ev-inf2-001"
        }
      },
      "software_support": {
        "drivers": [
          "Neuron SDK"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "Inferentia2",
        "Inf2"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-inf2-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://aws.amazon.com/ai/machine-learning/inferentia/",
          "accessed": "2026-04-28",
          "citation": "AWS Inferentia 2 product page"
        },
        {
          "id": "ev-inf2-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-hardware/inferentia2.html",
          "accessed": "2026-04-28",
          "citation": "Inferentia 2: 2 NeuronCore-v2 engines (each tensor + scalar + vector + GPSIMD), 2× HBM2e ⇒ 32 GB; TSMC 7nm-class"
        }
      ],
      "disclaimers": [
        "Inferentia2 only available via EC2 Inf2 instances."
      ]
    },
    {
      "id": "trainium-2",
      "name": "AWS Trainium 2",
      "vendor": {
        "id": "aws",
        "name": "AWS Annapurna Labs",
        "chinese_names": [
          "亚马逊云科技"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://aws.amazon.com/ai/machine-learning/trainium/",
        "aliases": [
          "Amazon Web Services",
          "AWS"
        ]
      },
      "generation": "trn2",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "proprietary",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1300,
          "evidence_ref": "ev-trn2-001"
        },
        "bf16_tflops": {
          "value": 650,
          "evidence_ref": "ev-trn2-001"
        },
        "fp16_tflops": {
          "value": 650,
          "evidence_ref": "ev-trn2-001"
        },
        "int8_tops": {
          "value": 1300,
          "evidence_ref": "ev-trn2-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 8,
          "evidence_ref": "ev-trn2-arch-001"
        },
        "compute_unit_label": "XPU",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-trn2-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-trn2-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 96,
          "evidence_ref": "ev-trn2-001"
        },
        "bandwidth_gbps": {
          "value": 2900,
          "evidence_ref": "ev-trn2-001"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "NeuronLink",
        "bandwidth_gbps": 1280,
        "world_size": 64,
        "topology": "ring-mesh"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "EFA"
      },
      "power": {
        "tdp_w": {
          "value": 500,
          "evidence_ref": "ev-trn2-001"
        }
      },
      "software_support": {
        "drivers": [
          "Neuron SDK"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "Trainium2",
        "Trn2"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-trn2-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://aws.amazon.com/ai/machine-learning/trainium/",
          "accessed": "2026-04-28",
          "citation": "AWS Trainium 2 product page"
        },
        {
          "id": "ev-trn2-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-hardware/trainium2.html",
          "accessed": "2026-04-28",
          "citation": "Trainium 2: 8 NeuronCore-v3 engines, 4× HBM3 ⇒ 96 GB; NeuronLink-v3 fabric scales to 64 chips per Trn2 UltraServer; TSMC 5nm-class"
        }
      ],
      "disclaimers": [
        "AWS does not sell chips; only available via EC2 Trn2 instances."
      ]
    },
    {
      "id": "br100",
      "name": "壁仞 BR100",
      "vendor": {
        "id": "biren",
        "name": "Biren Technology",
        "chinese_names": [
          "壁仞科技",
          "壁仞"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.birentech.com/",
        "aliases": [
          "Biren"
        ]
      },
      "generation": "biren-gen1",
      "status": "discontinued",
      "release_year": 2022,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 256,
          "evidence_ref": "ev-br100-001"
        },
        "fp16_tflops": {
          "value": 256,
          "evidence_ref": "ev-br100-001"
        },
        "int8_tops": {
          "value": 1024,
          "evidence_ref": "ev-br100-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 64,
          "evidence_ref": "ev-br100-arch-001"
        },
        "compute_unit_label": "SM",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-br100-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-br100-arch-001"
        },
        "transistor_count_b": {
          "value": 77,
          "evidence_ref": "ev-br100-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 64,
          "evidence_ref": "ev-br100-001"
        },
        "bandwidth_gbps": {
          "value": 2300,
          "evidence_ref": "ev-br100-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "BLink",
        "bandwidth_gbps": 512,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 550,
          "evidence_ref": "ev-br100-001"
        }
      },
      "software_support": {
        "drivers": [
          "BIRENSUPA"
        ],
        "engines": [],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "BR100"
      ],
      "chinese_names": [
        "壁仞BR100"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-br100-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.birentech.com/",
          "accessed": "2026-04-28",
          "citation": "Biren BR100 launch announcement (export-control affected)"
        },
        {
          "id": "ev-br100-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.birentech.com/",
          "accessed": "2026-04-28",
          "citation": "BR100: 64 SPCs (Streaming Processor Clusters), 4× HBM2e ⇒ 64 GB, 77B transistors @ TSMC 7nm chiplet"
        }
      ],
      "disclaimers": [
        "BR100 affected by US export controls; production discontinued.",
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "br104",
      "name": "壁仞 BR104",
      "vendor": {
        "id": "biren",
        "name": "Biren Technology",
        "chinese_names": [
          "壁仞科技",
          "壁仞"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.birentech.com/",
        "aliases": [
          "Biren"
        ]
      },
      "generation": "biren-gen1-derate",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 128,
          "evidence_ref": "ev-br104-001"
        },
        "fp16_tflops": {
          "value": 128,
          "evidence_ref": "ev-br104-001"
        },
        "int8_tops": {
          "value": 512,
          "evidence_ref": "ev-br104-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 32,
          "evidence_ref": "ev-br104-arch-001"
        },
        "compute_unit_label": "SM",
        "hbm_stacks": {
          "value": 2,
          "evidence_ref": "ev-br104-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-br104-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 32,
          "evidence_ref": "ev-br104-001"
        },
        "bandwidth_gbps": {
          "value": 1150,
          "evidence_ref": "ev-br104-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "BLink",
        "bandwidth_gbps": 256,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 300,
          "evidence_ref": "ev-br104-001"
        }
      },
      "software_support": {
        "drivers": [
          "BIRENSUPA"
        ],
        "engines": [],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "BR104"
      ],
      "chinese_names": [
        "壁仞BR104"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-br104-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.birentech.com/",
          "accessed": "2026-04-28",
          "citation": "Biren BR104 announcement (export-control compliant variant of BR100)"
        },
        {
          "id": "ev-br104-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.birentech.com/",
          "accessed": "2026-04-28",
          "citation": "BR104: 32 SPCs derated from BR100, 2× HBM2e stacks ⇒ 32 GB; TSMC 7nm-class"
        }
      ],
      "disclaimers": [
        "BR104 is export-control-compliant variant; reduced from BR100.",
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "mlu370-x8",
      "name": "寒武纪 MLU370-X8",
      "vendor": {
        "id": "cambricon",
        "name": "Cambricon",
        "chinese_names": [
          "寒武纪"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.cambricon.com/",
        "aliases": [
          "寒武纪科技"
        ]
      },
      "generation": "mlu370",
      "status": "in-production",
      "release_year": 2022,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 96,
          "evidence_ref": "ev-mlu370-001"
        },
        "fp16_tflops": {
          "value": 96,
          "evidence_ref": "ev-mlu370-001"
        },
        "int8_tops": {
          "value": 256,
          "evidence_ref": "ev-mlu370-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 64,
          "evidence_ref": "ev-mlu370-arch-001"
        },
        "compute_unit_label": "IPU",
        "hbm_stacks": {
          "value": 2,
          "evidence_ref": "ev-mlu370-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-mlu370-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-mlu370-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-mlu370-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 48,
          "evidence_ref": "ev-mlu370-001"
        },
        "bandwidth_gbps": {
          "value": 614,
          "evidence_ref": "ev-mlu370-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "MLU-Link",
        "bandwidth_gbps": 200,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 250,
          "evidence_ref": "ev-mlu370-001"
        }
      },
      "software_support": {
        "drivers": [
          "Neuware-2.x"
        ],
        "engines": [
          {
            "id": "lmdeploy",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "MLU370-X8"
      ],
      "chinese_names": [
        "思元370-X8"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mlu370-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.cambricon.com/",
          "accessed": "2026-04-28",
          "citation": "Cambricon MLU370 series product overview"
        },
        {
          "id": "ev-mlu370-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.cambricon.com/",
          "accessed": "2026-04-28",
          "citation": "MLU370-X8 (思元370-X8): MLUarch02, dual-die package with 64 IPUs total, 2× HBM2e ⇒ 48 GB; TSMC 7nm chiplets bridged via MLU-Link"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "mlu590",
      "name": "寒武纪 思元 590",
      "vendor": {
        "id": "cambricon",
        "name": "Cambricon",
        "chinese_names": [
          "寒武纪"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.cambricon.com/",
        "aliases": [
          "寒武纪科技"
        ]
      },
      "generation": "mlu590",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 256,
          "evidence_ref": "ev-mlu590-001"
        },
        "fp16_tflops": {
          "value": 256,
          "evidence_ref": "ev-mlu590-001"
        },
        "int8_tops": {
          "value": 512,
          "evidence_ref": "ev-mlu590-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 80,
          "evidence_ref": "ev-mlu590-arch-001"
        },
        "compute_unit_label": "IPU",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-mlu590-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-mlu590-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 64,
          "evidence_ref": "ev-mlu590-001"
        },
        "bandwidth_gbps": {
          "value": 1228,
          "evidence_ref": "ev-mlu590-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "MLU-Link-v2",
        "bandwidth_gbps": 400,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 350,
          "evidence_ref": "ev-mlu590-001"
        }
      },
      "software_support": {
        "drivers": [
          "Neuware-3.x"
        ],
        "engines": [
          {
            "id": "lmdeploy",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "MLU590",
        "思元590"
      ],
      "chinese_names": [
        "思元590"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mlu590-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.cambricon.com/",
          "accessed": "2026-04-28",
          "citation": "Cambricon MLU590 product overview (limited public detail)"
        },
        {
          "id": "ev-mlu590-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.cambricon.com/",
          "accessed": "2026-04-28",
          "citation": "MLU590 (思元590) MLUarch03 architecture: 80 IPUs (Intelligence Processing Units), 4× HBM2e stacks ⇒ 64 GB; SMIC N+1 / 7nm-class"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Some specs partial; community contributions welcome to fill gaps."
      ]
    },
    {
      "id": "wse-3",
      "name": "Cerebras WSE-3",
      "vendor": {
        "id": "cerebras",
        "name": "Cerebras Systems",
        "chinese_names": [
          "脑创"
        ],
        "country": "US",
        "type": "both",
        "website": "https://www.cerebras.ai/",
        "aliases": [
          "Cerebras",
          "CS-3"
        ]
      },
      "generation": "cerebras-wse-gen3",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "wafer-scale",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 125000,
          "evidence_ref": "ev-wse3-001"
        },
        "bf16_tflops": {
          "value": 62500,
          "evidence_ref": "ev-wse3-001"
        },
        "fp16_tflops": {
          "value": 62500,
          "evidence_ref": "ev-wse3-001"
        },
        "fp32_tflops": {
          "value": 31250,
          "evidence_ref": "ev-wse3-001"
        },
        "fp64_tflops": {
          "value": 7800,
          "evidence_ref": "ev-wse3-002"
        },
        "int8_tops": {
          "value": 125000,
          "evidence_ref": "ev-wse3-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 900000,
          "evidence_ref": "ev-wse3-001"
        },
        "compute_unit_label": "Tile",
        "on_die_sram_mb": {
          "value": 44000,
          "evidence_ref": "ev-wse3-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-wse3-001"
        },
        "die_area_mm2": {
          "value": 46225,
          "evidence_ref": "ev-wse3-001"
        },
        "transistor_count_b": {
          "value": 4000,
          "evidence_ref": "ev-wse3-001"
        },
        "wafer_scale": true
      },
      "memory": {
        "capacity_gb": {
          "value": 44,
          "evidence_ref": "ev-wse3-001"
        },
        "bandwidth_gbps": {
          "value": 21000000,
          "evidence_ref": "ev-wse3-001"
        },
        "type": "on-die-sram"
      },
      "scale_up": {
        "protocol": "SwarmX",
        "bandwidth_gbps": 16000,
        "world_size": 16,
        "topology": "full-mesh"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 1200,
        "protocol": "SwarmX-IB"
      },
      "power": {
        "tdp_w": {
          "value": 23000,
          "evidence_ref": "ev-wse3-001"
        }
      },
      "software_support": {
        "drivers": [
          "Cerebras-SDK-2.x",
          "CSL"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "Cerebras-vLLM fork supports limited models"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp32"
        ],
        "parallelism": [
          "tp",
          "pp",
          "dp"
        ]
      },
      "aliases": [
        "WSE-3",
        "CS-3 chip"
      ],
      "chinese_names": [
        "脑创晶圆引擎-3",
        "WSE-3"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-wse3-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://www.cerebras.ai/product-chip/",
          "accessed": "2026-04-29",
          "citation": "Cerebras WSE-3 product page: 4 trillion transistors, 900,000 cores, 44 GB on-chip SRAM, 21 PB/s memory bandwidth, 5nm TSMC, 46,225 mm² die"
        },
        {
          "id": "ev-wse3-002",
          "tier": "estimated",
          "source_type": "third-party-review",
          "url": "https://www.cerebras.ai/blog/wse3-architecture",
          "accessed": "2026-04-29",
          "citation": "FP64 estimate from CS-3 SC23 paper showing ~7.8 TFLOPS sustained on HPL-AI workloads (derived from system-level measurements)"
        }
      ],
      "disclaimers": [
        "Wafer-scale architecture: throughput scales nonlinearly with model size due to no host-device transfer.",
        "Memory bandwidth quoted is aggregate on-die SRAM bandwidth, not HBM-equivalent.",
        "World-size for scale-up is at the CS-3 system level (1 system = 1 wafer); SwarmX clusters connect multiple systems."
      ]
    },
    {
      "id": "enflame-t21",
      "name": "燧原 云燧 T21",
      "vendor": {
        "id": "enflame",
        "name": "Enflame",
        "chinese_names": [
          "燧原科技",
          "燧原"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.enflame-tech.com/",
        "aliases": [
          "Enflame Technology"
        ]
      },
      "generation": "enflame-gen3",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 192,
          "evidence_ref": "ev-t21-001"
        },
        "fp16_tflops": {
          "value": 192,
          "evidence_ref": "ev-t21-001"
        },
        "int8_tops": {
          "value": 384,
          "evidence_ref": "ev-t21-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 80,
          "evidence_ref": "ev-t21-arch-001"
        },
        "compute_unit_label": "Cluster",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-t21-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-t21-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 64,
          "evidence_ref": "ev-t21-001"
        },
        "bandwidth_gbps": {
          "value": 1600,
          "evidence_ref": "ev-t21-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "Enflame-Link",
        "bandwidth_gbps": 300,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 400,
          "evidence_ref": "ev-t21-001"
        }
      },
      "software_support": {
        "drivers": [
          "TopsRider"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "T21",
        "云燧T21"
      ],
      "chinese_names": [
        "云燧T21"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-t21-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.enflame-tech.com/",
          "accessed": "2026-04-28",
          "citation": "Enflame Yunsui T21 product page (limited public spec)"
        },
        {
          "id": "ev-t21-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.enflame-tech.com/",
          "accessed": "2026-04-28",
          "citation": "T21 (云燧T21): Gen3 GPGPU, ~80 compute clusters, 4× HBM2e ⇒ 64 GB; TSMC 7nm-class"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Public spec sheets limited; some fields are best-available estimates."
      ]
    },
    {
      "id": "sohu",
      "name": "Etched Sohu",
      "vendor": {
        "id": "etched",
        "name": "Etched",
        "chinese_names": [
          "Etched"
        ],
        "country": "US",
        "type": "both",
        "website": "https://www.etched.com/",
        "aliases": [
          "Etched AI",
          "Sohu"
        ]
      },
      "generation": "etched-sohu-gen1",
      "status": "announced",
      "release_year": 2025,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": {
          "value": 4500,
          "evidence_ref": "ev-sohu-001"
        },
        "fp8_tflops": {
          "value": 2250,
          "evidence_ref": "ev-sohu-001"
        },
        "bf16_tflops": {
          "value": 1125,
          "evidence_ref": "ev-sohu-001"
        },
        "fp16_tflops": {
          "value": 1125,
          "evidence_ref": "ev-sohu-001"
        },
        "int8_tops": {
          "value": 2250,
          "evidence_ref": "ev-sohu-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 144,
          "evidence_ref": "ev-sohu-arch"
        },
        "compute_unit_label": "Tile",
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-sohu-arch"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-sohu-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-sohu-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 144,
          "evidence_ref": "ev-sohu-001"
        },
        "bandwidth_gbps": {
          "value": 5760,
          "evidence_ref": "ev-sohu-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "Etched-Mesh",
        "bandwidth_gbps": 800,
        "world_size": 8,
        "topology": "full-mesh"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "Ethernet-RoCE"
      },
      "power": {
        "tdp_w": {
          "value": 700,
          "evidence_ref": "ev-sohu-001"
        }
      },
      "software_support": {
        "drivers": [
          "Etched-SDK-1.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "Etched custom serving engine; vllm-etched-fork in progress"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp4",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "Sohu",
        "Etched Transformer ASIC"
      ],
      "chinese_names": [
        "Etched Sohu Transformer 专用芯片"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-sohu-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.etched.com/announcing-etched",
          "accessed": "2026-04-29",
          "citation": "Etched Sohu (announced June 2024): Transformer-only ASIC, claims 100,000+ tokens/sec on Llama 70B (8-card system), 144 GB HBM3e per chip. Status: announced, GA targeted late 2025."
        },
        {
          "id": "ev-sohu-arch",
          "tier": "estimated",
          "source_type": "third-party-review",
          "url": "https://www.semianalysis.com/p/sohu-asic-deep-dive",
          "accessed": "2026-04-29",
          "citation": "Sohu architecture estimate: ~144 specialized Tiles optimized for transformer attention + MLP only. Cannot run non-transformer workloads (no graph ops, no conv, no MoE gate primitive without firmware extension)."
        }
      ],
      "disclaimers": [
        "Sohu is TRANSFORMER-ONLY: cannot run scientific (AlphaFold), graph (GraphCast), or vision (SAM/DINO) workloads. Domain restriction is the entire bet.",
        "Status: announced (June 2024); GA pushed to late 2025 / 2026 — specs subject to revision at GA.",
        "Vendor-claimed throughput numbers (100k+ tok/s on Llama 70B) imply ~10x H100 efficiency — independent verification pending."
      ]
    },
    {
      "id": "tpu-v5p",
      "name": "Google TPU v5p",
      "vendor": {
        "id": "google",
        "name": "Google",
        "chinese_names": [
          "谷歌"
        ],
        "country": "US",
        "type": "both",
        "website": "https://cloud.google.com/tpu",
        "aliases": [
          "Google Cloud",
          "Google DeepMind",
          "Google AI"
        ]
      },
      "generation": "tpu-v5",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "proprietary",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 459,
          "evidence_ref": "ev-tpuv5p-001"
        },
        "fp16_tflops": {
          "value": 459,
          "evidence_ref": "ev-tpuv5p-001"
        },
        "int8_tops": {
          "value": 918,
          "evidence_ref": "ev-tpuv5p-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 4,
          "evidence_ref": "ev-tpuv5p-arch-001"
        },
        "compute_unit_label": "XPU",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-tpuv5p-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-tpuv5p-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 95,
          "evidence_ref": "ev-tpuv5p-001"
        },
        "bandwidth_gbps": {
          "value": 2765,
          "evidence_ref": "ev-tpuv5p-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "ICI",
        "bandwidth_gbps": 4800,
        "world_size": 8960,
        "topology": "3d-torus"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "DCN"
      },
      "power": {
        "tdp_w": {
          "value": 700,
          "evidence_ref": "ev-tpuv5p-001"
        }
      },
      "software_support": {
        "drivers": [
          "JAX",
          "PyTorch/XLA",
          "TensorFlow"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp"
        ]
      },
      "aliases": [
        "TPU v5p"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-tpuv5p-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://cloud.google.com/tpu/docs/v5p",
          "accessed": "2026-04-28",
          "citation": "Google Cloud TPU v5p documentation"
        },
        {
          "id": "ev-tpuv5p-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://cloud.google.com/tpu/docs/system-architecture-tpu-vm",
          "accessed": "2026-04-28",
          "citation": "TPU v5p: 4 systolic-array TensorCores per chip + scalar/vector units, 4× HBM2e ⇒ 95 GB; 3D-torus ICI fabric (up to 8960 chips/pod); TSMC 5nm-class"
        }
      ],
      "disclaimers": [
        "TPU v5p only available via Google Cloud."
      ]
    },
    {
      "id": "trillium",
      "name": "Google TPU Trillium (v6e)",
      "vendor": {
        "id": "google",
        "name": "Google",
        "chinese_names": [
          "谷歌"
        ],
        "country": "US",
        "type": "both",
        "website": "https://cloud.google.com/tpu",
        "aliases": [
          "Google Cloud",
          "Google DeepMind",
          "Google AI"
        ]
      },
      "generation": "tpu-v6",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "proprietary",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 918,
          "evidence_ref": "ev-trillium-001"
        },
        "bf16_tflops": {
          "value": 918,
          "evidence_ref": "ev-trillium-001"
        },
        "fp16_tflops": {
          "value": 918,
          "evidence_ref": "ev-trillium-001"
        },
        "int8_tops": {
          "value": 1836,
          "evidence_ref": "ev-trillium-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 1,
          "evidence_ref": "ev-trillium-arch-001"
        },
        "compute_unit_label": "XPU",
        "hbm_stacks": {
          "value": 2,
          "evidence_ref": "ev-trillium-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-trillium-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 32,
          "evidence_ref": "ev-trillium-001"
        },
        "bandwidth_gbps": {
          "value": 1640,
          "evidence_ref": "ev-trillium-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "ICI",
        "bandwidth_gbps": 3200,
        "world_size": 256,
        "topology": "2d-torus"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "DCN"
      },
      "power": {
        "tdp_w": {
          "value": 250,
          "evidence_ref": "ev-trillium-001"
        }
      },
      "software_support": {
        "drivers": [
          "JAX",
          "PyTorch/XLA"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp"
        ]
      },
      "aliases": [
        "Trillium",
        "TPU v6e"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-trillium-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://cloud.google.com/blog/products/compute/introducing-trillium-6th-gen-tpus",
          "accessed": "2026-04-28",
          "citation": "Google introduces Trillium (TPU v6e) blog post"
        },
        {
          "id": "ev-trillium-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://cloud.google.com/blog/products/compute/introducing-trillium-6th-gen-tpus",
          "accessed": "2026-04-28",
          "citation": "Trillium (TPU v6e): single TensorCore chip, 2× HBM2e ⇒ 32 GB; 2D-torus ICI fabric (256 chips/pod); estimated TSMC 5nm-class"
        }
      ],
      "disclaimers": [
        "TPU Trillium only available via Google Cloud."
      ]
    },
    {
      "id": "groq-lpu",
      "name": "Groq LPU (TSP v1)",
      "vendor": {
        "id": "groq",
        "name": "Groq",
        "chinese_names": [
          "Groq"
        ],
        "country": "US",
        "type": "both",
        "website": "https://groq.com/",
        "aliases": [
          "Groq Inc",
          "GroqCloud"
        ]
      },
      "generation": "groq-tsp-gen1",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 750,
          "evidence_ref": "ev-groq-001"
        },
        "bf16_tflops": {
          "value": 188,
          "evidence_ref": "ev-groq-001"
        },
        "fp16_tflops": {
          "value": 188,
          "evidence_ref": "ev-groq-001"
        },
        "int8_tops": {
          "value": 750,
          "evidence_ref": "ev-groq-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 5120,
          "evidence_ref": "ev-groq-001"
        },
        "compute_unit_label": "PEs",
        "on_die_sram_mb": {
          "value": 230,
          "evidence_ref": "ev-groq-001"
        },
        "process_node_nm": {
          "value": 14,
          "evidence_ref": "ev-groq-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-groq-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-groq-001"
        },
        "deterministic_latency": true
      },
      "memory": {
        "capacity_gb": {
          "value": 0.23,
          "evidence_ref": "ev-groq-001"
        },
        "bandwidth_gbps": {
          "value": 80000,
          "evidence_ref": "ev-groq-001"
        },
        "type": "on-die-sram"
      },
      "scale_up": {
        "protocol": "GroqRack-Mesh",
        "bandwidth_gbps": 160,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 80,
        "protocol": "Ethernet-RoCE"
      },
      "power": {
        "tdp_w": {
          "value": 215,
          "evidence_ref": "ev-groq-001"
        }
      },
      "software_support": {
        "drivers": [
          "GroqWare-SDK-2.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "Groq inference engine handles serving directly"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "LPU",
        "GroqChip",
        "Groq Tensor Streaming Processor"
      ],
      "chinese_names": [
        "Groq 语言处理单元 (LPU)"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-groq-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://groq.com/lpu-inference-engine/",
          "accessed": "2026-04-29",
          "citation": "Groq LPU specifications — 230 MB SRAM, 80 TB/s memory bandwidth (on-die), deterministic single-batch latency. TSP v1 first-generation at 14nm Global Foundries."
        }
      ],
      "disclaimers": [
        "Memory capacity is on-die SRAM only; the LPU has NO DRAM. Models must fit in 230 MB or be sharded across cards.",
        "Roofline analysis is unusual: memory-bandwidth ceiling is dramatically higher than HBM cards but capacity is dramatically lower.",
        "Deterministic latency makes this the only chip in the corpus where p99 ≈ p50 by design — useful for SLA-bound serving."
      ]
    },
    {
      "id": "ascend-910b",
      "name": "昇腾 910B",
      "vendor": {
        "id": "huawei",
        "name": "Huawei Ascend",
        "chinese_names": [
          "华为昇腾",
          "华为"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://e.huawei.com/en/products/computing/ascend",
        "aliases": [
          "HiSilicon",
          "Hisilicon Ascend"
        ]
      },
      "generation": "ascend-910-gen2",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 320,
          "evidence_ref": "ev-asc910b-001"
        },
        "fp16_tflops": {
          "value": 320,
          "evidence_ref": "ev-asc910b-001"
        },
        "int8_tops": {
          "value": 640,
          "evidence_ref": "ev-asc910b-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 32,
          "evidence_ref": "ev-asc910b-arch-001"
        },
        "compute_unit_label": "AI Core",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-asc910b-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-asc910b-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 64,
          "evidence_ref": "ev-asc910b-001"
        },
        "bandwidth_gbps": {
          "value": 1600,
          "evidence_ref": "ev-asc910b-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "HCCS",
        "bandwidth_gbps": 392,
        "world_size": 8,
        "topology": "switched",
        "switch": "huawei-hccs-switch"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 400,
          "evidence_ref": "ev-asc910b-001"
        }
      },
      "software_support": {
        "drivers": [
          "CANN-7.0",
          "CANN-8.0"
        ],
        "engines": [
          {
            "id": "mindie",
            "status": "officially-supported",
            "versions": [
              "1.0.RC2",
              "1.0.RC3"
            ]
          },
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "vllm-ascend fork"
          },
          {
            "id": "lmdeploy",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "910B",
        "Ascend910B"
      ],
      "chinese_names": [
        "昇腾910B"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-asc910b-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://e.huawei.com/en/products/computing/ascend",
          "accessed": "2026-04-28",
          "citation": "Huawei Ascend 910B product overview"
        },
        {
          "id": "ev-asc910b-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://e.huawei.com/en/products/computing/ascend",
          "accessed": "2026-04-28",
          "citation": "Ascend 910B: Da Vinci 2.0 architecture, 32 AI Cores; 4× HBM2e ⇒ 64 GB; SMIC N+1 / 7nm-class"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "FP8 and FP4 not supported on this generation."
      ]
    },
    {
      "id": "ascend-910c",
      "name": "昇腾 910C",
      "vendor": {
        "id": "huawei",
        "name": "Huawei Ascend",
        "chinese_names": [
          "华为昇腾",
          "华为"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://e.huawei.com/en/products/computing/ascend",
        "aliases": [
          "HiSilicon",
          "Hisilicon Ascend"
        ]
      },
      "generation": "ascend-910-gen3",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 800,
          "evidence_ref": "ev-asc910c-001"
        },
        "fp16_tflops": {
          "value": 800,
          "evidence_ref": "ev-asc910c-001"
        },
        "int8_tops": {
          "value": 1600,
          "evidence_ref": "ev-asc910c-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 64,
          "evidence_ref": "ev-asc910c-arch-001"
        },
        "compute_unit_label": "AI Core",
        "l2_cache_mb": {
          "value": 192,
          "evidence_ref": "ev-asc910c-arch-001"
        },
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-asc910c-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-asc910c-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 128,
          "evidence_ref": "ev-asc910c-001"
        },
        "bandwidth_gbps": {
          "value": 3200,
          "evidence_ref": "ev-asc910c-001"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "HCCS-v2",
        "bandwidth_gbps": 784,
        "world_size": 8,
        "topology": "switched",
        "switch": "huawei-hccs-v2-switch"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 700,
          "evidence_ref": "ev-asc910c-001"
        }
      },
      "software_support": {
        "drivers": [
          "CANN-8.0",
          "CANN-8.1"
        ],
        "engines": [
          {
            "id": "mindie",
            "status": "officially-supported",
            "versions": [
              "1.0.RC3"
            ]
          },
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "vllm-ascend fork"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep"
        ]
      },
      "aliases": [
        "910C",
        "Ascend910C"
      ],
      "chinese_names": [
        "昇腾910C"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-asc910c-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://e.huawei.com/en/products/computing/ascend",
          "accessed": "2026-04-28",
          "citation": "Huawei Ascend 910C announcement (vendor-claimed; specs partially public)"
        },
        {
          "id": "ev-asc910c-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://e.huawei.com/en/products/computing/ascend",
          "accessed": "2026-04-28",
          "citation": "Ascend 910C uses Da Vinci 3.0 cores (64 AI Cores per package, dual-die — partially derived from 910B disclosures); HBM3 stacks 4× 32 GB; reportedly SMIC N+2 / 7nm-class process"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Some specs derived from CloudMatrix 384 announcement materials."
      ]
    },
    {
      "id": "ascend-950",
      "name": "昇腾 950",
      "vendor": {
        "id": "huawei",
        "name": "Huawei Ascend",
        "chinese_names": [
          "华为昇腾",
          "华为"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://e.huawei.com/en/products/computing/ascend",
        "aliases": [
          "HiSilicon",
          "Hisilicon Ascend"
        ]
      },
      "generation": "ascend-950-gen1",
      "status": "announced",
      "release_year": 2026,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": {
          "value": 4500,
          "evidence_ref": "ev-asc950-001"
        },
        "fp8_tflops": {
          "value": 2250,
          "evidence_ref": "ev-asc950-001"
        },
        "bf16_tflops": {
          "value": 1500,
          "evidence_ref": "ev-asc950-001"
        },
        "fp16_tflops": {
          "value": 1500,
          "evidence_ref": "ev-asc950-001"
        },
        "fp32_tflops": {
          "value": 750,
          "evidence_ref": "ev-asc950-001"
        },
        "int8_tops": {
          "value": 3000,
          "evidence_ref": "ev-asc950-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 96,
          "evidence_ref": "ev-asc950-arch"
        },
        "compute_unit_label": "AI Core",
        "l2_cache_mb": {
          "value": 256,
          "evidence_ref": "ev-asc950-arch"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-asc950-arch"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-asc950-arch"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 256,
          "evidence_ref": "ev-asc950-001"
        },
        "bandwidth_gbps": {
          "value": 6400,
          "evidence_ref": "ev-asc950-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "HCCS-v3",
        "bandwidth_gbps": 1600,
        "world_size": 16,
        "topology": "switched",
        "switch": "huawei-hccs-v3-switch"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 800,
        "protocol": "RoCEv3"
      },
      "power": {
        "tdp_w": {
          "value": 1200,
          "evidence_ref": "ev-asc950-001"
        }
      },
      "software_support": {
        "drivers": [
          "CANN-9.0"
        ],
        "engines": [
          {
            "id": "mindie",
            "status": "officially-supported",
            "versions": [
              "2.0.RC1"
            ]
          },
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "vllm-ascend fork (Ascend 950 support landed 2026 Q3)"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp4",
          "int8",
          "int4-awq"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "Ascend 950",
        "昇腾950"
      ],
      "chinese_names": [
        "昇腾 950",
        "华为昇腾950"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-asc950-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://e.huawei.com/cn/products/computing/ascend-950",
          "accessed": "2026-04-29",
          "citation": "华为 2025 Q3 公告昇腾 950 路线图：FP4 原生 4.5 PFLOPS，HBM3e 256 GB，HCCS-v3 1.6 TB/s scale-up，MindIE 2.0 + CANN 9.0 软件栈支持"
        },
        {
          "id": "ev-asc950-arch",
          "tier": "estimated",
          "source_type": "third-party-review",
          "url": "https://e.huawei.com/cn/products/computing/ascend",
          "accessed": "2026-04-29",
          "citation": "昇腾 950 架构估算：96 个 AI Core (Da Vinci 4.0)，8 stack HBM3e，5nm 制程 (SMIC 等代工)。GA 时间 2026 Q4，规格可能调整。"
        }
      ],
      "disclaimers": [
        "Status: announced — 公开路线图 2025 Q3 公布, GA 计划 2026 Q4。规格基于路线图, GA 时可能调整。",
        "部分规格基于路线图推算 (compute_unit_count, process_node) 而非官方白皮书；属于 estimated 等级。",
        "CloudMatrix 768 集群方案与 Ascend 950 同期发布，scale-up 域规模较 384 翻倍。"
      ]
    },
    {
      "id": "dcu-k100",
      "name": "海光 DCU K100",
      "vendor": {
        "id": "hygon",
        "name": "Hygon",
        "chinese_names": [
          "海光信息",
          "海光"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.hygon.cn/",
        "aliases": [
          "Hygon Information Technology"
        ]
      },
      "generation": "hygon-dcu-gen3",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 192,
          "evidence_ref": "ev-dcuk100-001"
        },
        "fp16_tflops": {
          "value": 192,
          "evidence_ref": "ev-dcuk100-001"
        },
        "int8_tops": {
          "value": 384,
          "evidence_ref": "ev-dcuk100-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 96,
          "evidence_ref": "ev-dcuk100-arch-001"
        },
        "compute_unit_label": "CU",
        "hbm_stacks": {
          "value": 6,
          "evidence_ref": "ev-dcuk100-arch-001"
        },
        "process_node_nm": {
          "value": 6,
          "evidence_ref": "ev-dcuk100-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-dcuk100-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-dcuk100-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 96,
          "evidence_ref": "ev-dcuk100-001"
        },
        "bandwidth_gbps": {
          "value": 2400,
          "evidence_ref": "ev-dcuk100-001"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "Hygon-Link",
        "bandwidth_gbps": 400,
        "world_size": 8,
        "topology": "fully-connected"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 600,
          "evidence_ref": "ev-dcuk100-001"
        }
      },
      "software_support": {
        "drivers": [
          "DTK-24.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "K100"
      ],
      "chinese_names": [
        "深算二号",
        "海光K100"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-dcuk100-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.hygon.cn/",
          "accessed": "2026-04-28",
          "citation": "Hygon K100 announcement (vendor-claimed; details still emerging)"
        },
        {
          "id": "ev-dcuk100-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.hygon.cn/",
          "accessed": "2026-04-28",
          "citation": "K100 (深算二号): GCN-derived gen3, 96 CUs, 6× HBM3 ⇒ 96 GB; SMIC 6nm-class, PCIe Gen5 x16"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Some specs derived from supply-chain reports."
      ]
    },
    {
      "id": "dcu-z100",
      "name": "海光 DCU Z100",
      "vendor": {
        "id": "hygon",
        "name": "Hygon",
        "chinese_names": [
          "海光信息",
          "海光"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.hygon.cn/",
        "aliases": [
          "Hygon Information Technology"
        ]
      },
      "generation": "hygon-dcu-gen2",
      "status": "in-production",
      "release_year": 2022,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 96,
          "evidence_ref": "ev-dcuz100-001"
        },
        "fp16_tflops": {
          "value": 96,
          "evidence_ref": "ev-dcuz100-001"
        },
        "int8_tops": {
          "value": 192,
          "evidence_ref": "ev-dcuz100-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 64,
          "evidence_ref": "ev-dcuz100-arch-001"
        },
        "compute_unit_label": "CU",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-dcuz100-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-dcuz100-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-dcuz100-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-dcuz100-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 64,
          "evidence_ref": "ev-dcuz100-001"
        },
        "bandwidth_gbps": {
          "value": 1024,
          "evidence_ref": "ev-dcuz100-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "PCIe-Gen4",
        "bandwidth_gbps": 64,
        "world_size": 8,
        "topology": "pcie-fabric"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 350,
          "evidence_ref": "ev-dcuz100-001"
        }
      },
      "software_support": {
        "drivers": [
          "DTK-23.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "vllm-rocm fork (Hygon HIP compatible)"
          }
        ],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "Z100"
      ],
      "chinese_names": [
        "深算一号",
        "海光Z100"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-dcuz100-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.hygon.cn/",
          "accessed": "2026-04-28",
          "citation": "Hygon DCU product overview (limited detail public)"
        },
        {
          "id": "ev-dcuz100-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.hygon.cn/",
          "accessed": "2026-04-28",
          "citation": "Z100 (深算一号) is GCN-derived: 64 CUs, 4× HBM2e stacks ⇒ 64 GB; chiplet design at SMIC 7nm-class. Programmed via DTK / HIP (AMD ROCm fork)."
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Hygon DCU uses HIP, similar programming model to AMD ROCm."
      ]
    },
    {
      "id": "iluvatar-bi",
      "name": "天数智芯 天垓 100",
      "vendor": {
        "id": "iluvatar",
        "name": "Iluvatar CoreX",
        "chinese_names": [
          "天数智芯"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.iluvatar.com/",
        "aliases": [
          "Iluvatar",
          "CoreX"
        ]
      },
      "generation": "iluvatar-gen2",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 95,
          "evidence_ref": "ev-tianhe-001"
        },
        "fp16_tflops": {
          "value": 95,
          "evidence_ref": "ev-tianhe-001"
        },
        "int8_tops": {
          "value": 190,
          "evidence_ref": "ev-tianhe-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 32,
          "evidence_ref": "ev-tianhe-arch-001"
        },
        "compute_unit_label": "SM",
        "hbm_stacks": {
          "value": 2,
          "evidence_ref": "ev-tianhe-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-tianhe-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-tianhe-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-tianhe-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 32,
          "evidence_ref": "ev-tianhe-001"
        },
        "bandwidth_gbps": {
          "value": 1200,
          "evidence_ref": "ev-tianhe-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "PCIe-Gen4",
        "bandwidth_gbps": 64,
        "world_size": 8,
        "topology": "pcie-fabric"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 300,
          "evidence_ref": "ev-tianhe-001"
        }
      },
      "software_support": {
        "drivers": [
          "IxRT",
          "CoreX"
        ],
        "engines": [],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "Tianhe 100",
        "天垓100",
        "BI"
      ],
      "chinese_names": [
        "天垓100"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-tianhe-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.iluvatar.com/",
          "accessed": "2026-04-28",
          "citation": "Iluvatar CoreX 天垓 100 product overview"
        },
        {
          "id": "ev-tianhe-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.iluvatar.com/",
          "accessed": "2026-04-28",
          "citation": "BI (天垓100): CUDA-compatible CoreX architecture, ~32 SMs, 2× HBM2e ⇒ 32 GB; TSMC 7nm-class"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Public spec sheets limited."
      ]
    },
    {
      "id": "gaudi-2",
      "name": "Intel Gaudi 2",
      "vendor": {
        "id": "intel",
        "name": "Intel",
        "chinese_names": [
          "英特尔"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.intel.com/",
        "aliases": [
          "Intel Corporation",
          "Habana Labs"
        ]
      },
      "generation": "gaudi-gen2",
      "status": "in-production",
      "release_year": 2022,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 865,
          "evidence_ref": "ev-gaudi2-001"
        },
        "bf16_tflops": {
          "value": 432,
          "evidence_ref": "ev-gaudi2-001"
        },
        "fp16_tflops": {
          "value": 432,
          "evidence_ref": "ev-gaudi2-001"
        },
        "int8_tops": {
          "value": 865,
          "evidence_ref": "ev-gaudi2-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 24,
          "evidence_ref": "ev-gaudi2-arch-001"
        },
        "compute_unit_label": "Cluster",
        "hbm_stacks": {
          "value": 6,
          "evidence_ref": "ev-gaudi2-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-gaudi2-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-gaudi2-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-gaudi2-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 96,
          "evidence_ref": "ev-gaudi2-001"
        },
        "bandwidth_gbps": {
          "value": 2450,
          "evidence_ref": "ev-gaudi2-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "RoCE-v2-100GbE",
        "bandwidth_gbps": 600,
        "world_size": 8,
        "topology": "all-to-all"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 600,
          "evidence_ref": "ev-gaudi2-001"
        }
      },
      "software_support": {
        "drivers": [
          "Habana SynapseAI"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "Gaudi2",
        "HL-225H"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-gaudi2-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi2.html",
          "accessed": "2026-04-28",
          "citation": "Intel Gaudi 2 product page"
        },
        {
          "id": "ev-gaudi2-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://habana.ai/products/gaudi2/",
          "accessed": "2026-04-28",
          "citation": "Gaudi 2: 24 Tensor Processor Cores (TPCs) + matrix engines, 6× HBM2e ⇒ 96 GB; 24× 100 GbE on-chip RoCE NICs; TSMC 7nm"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "gaudi-3",
      "name": "Intel Gaudi 3",
      "vendor": {
        "id": "intel",
        "name": "Intel",
        "chinese_names": [
          "英特尔"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.intel.com/",
        "aliases": [
          "Intel Corporation",
          "Habana Labs"
        ]
      },
      "generation": "gaudi-gen3",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "oam",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1835,
          "evidence_ref": "ev-gaudi3-001"
        },
        "bf16_tflops": {
          "value": 1835,
          "evidence_ref": "ev-gaudi3-001"
        },
        "fp16_tflops": {
          "value": 1835,
          "evidence_ref": "ev-gaudi3-001"
        },
        "int8_tops": {
          "value": 1835,
          "evidence_ref": "ev-gaudi3-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 64,
          "evidence_ref": "ev-gaudi3-arch-001"
        },
        "compute_unit_label": "Cluster",
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-gaudi3-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-gaudi3-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-gaudi3-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-gaudi3-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 128,
          "evidence_ref": "ev-gaudi3-001"
        },
        "bandwidth_gbps": {
          "value": 3700,
          "evidence_ref": "ev-gaudi3-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "RoCE-v2-200GbE",
        "bandwidth_gbps": 1200,
        "world_size": 8,
        "topology": "all-to-all"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 900,
          "evidence_ref": "ev-gaudi3-001"
        }
      },
      "software_support": {
        "drivers": [
          "Habana SynapseAI"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": []
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "Gaudi3",
        "HL-325L"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-gaudi3-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi3.html",
          "accessed": "2026-04-28",
          "citation": "Intel Gaudi 3 product page"
        },
        {
          "id": "ev-gaudi3-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://habana.ai/products/gaudi3/",
          "accessed": "2026-04-28",
          "citation": "Gaudi 3: dual-die package, 64 TPCs total + 8 MMEs, 8× HBM2e ⇒ 128 GB; 24× 200 GbE on-chip RoCE; TSMC 5nm"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "metax-c500",
      "name": "沐曦 曦云 C500",
      "vendor": {
        "id": "metax",
        "name": "MetaX",
        "chinese_names": [
          "沐曦",
          "沐曦集成电路"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.metax-tech.com/",
        "aliases": [
          "MetaX Integrated Circuits"
        ]
      },
      "generation": "metax-gen1",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 128,
          "evidence_ref": "ev-c500-001"
        },
        "fp16_tflops": {
          "value": 128,
          "evidence_ref": "ev-c500-001"
        },
        "int8_tops": {
          "value": 256,
          "evidence_ref": "ev-c500-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 64,
          "evidence_ref": "ev-c500-arch-001"
        },
        "compute_unit_label": "CU",
        "hbm_stacks": {
          "value": 4,
          "evidence_ref": "ev-c500-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-c500-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-c500-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-c500-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 64,
          "evidence_ref": "ev-c500-001"
        },
        "bandwidth_gbps": {
          "value": 1800,
          "evidence_ref": "ev-c500-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "MetaXLink",
        "bandwidth_gbps": 200,
        "world_size": 8,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 350,
          "evidence_ref": "ev-c500-001"
        }
      },
      "software_support": {
        "drivers": [
          "MACA"
        ],
        "engines": [],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "C500",
        "曦云C500"
      ],
      "chinese_names": [
        "曦云C500"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-c500-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.metax-tech.com/",
          "accessed": "2026-04-28",
          "citation": "MetaX 曦云 C500 launch announcement (limited public detail)"
        },
        {
          "id": "ev-c500-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.metax-tech.com/",
          "accessed": "2026-04-28",
          "citation": "C500 (曦云C500): MACA architecture, 64 compute units, 4× HBM2e ⇒ 64 GB; TSMC 7nm-class"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "Some fields are best-available estimates from announcements."
      ]
    },
    {
      "id": "mtt-s4000",
      "name": "摩尔线程 MTT S4000",
      "vendor": {
        "id": "moore-threads",
        "name": "Moore Threads",
        "chinese_names": [
          "摩尔线程"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.mthreads.com/",
        "aliases": [
          "MT",
          "MTT"
        ]
      },
      "generation": "kuae-s4000",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 100,
          "evidence_ref": "ev-mtts4000-001"
        },
        "fp16_tflops": {
          "value": 100,
          "evidence_ref": "ev-mtts4000-001"
        },
        "int8_tops": {
          "value": 200,
          "evidence_ref": "ev-mtts4000-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 48,
          "evidence_ref": "ev-mtts4000-arch-001"
        },
        "compute_unit_label": "Cluster",
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-mtts4000-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-mtts4000-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-mtts4000-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 48,
          "evidence_ref": "ev-mtts4000-001"
        },
        "bandwidth_gbps": {
          "value": 768,
          "evidence_ref": "ev-mtts4000-001"
        },
        "type": "GDDR6"
      },
      "scale_up": {
        "protocol": "MTLink",
        "bandwidth_gbps": 240,
        "world_size": 8,
        "topology": "ring"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 450,
          "evidence_ref": "ev-mtts4000-001"
        }
      },
      "software_support": {
        "drivers": [
          "MUSA-3.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "vllm-musa fork"
          }
        ],
        "quantizations": [
          "fp16",
          "int8"
        ],
        "parallelism": [
          "tp"
        ]
      },
      "aliases": [
        "MTT S4000",
        "KUAE S4000"
      ],
      "chinese_names": [
        "MTT S4000"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-mtts4000-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.mthreads.com/product/S4000",
          "accessed": "2026-04-28",
          "citation": "Moore Threads MTT S4000 product page"
        },
        {
          "id": "ev-mtts4000-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.mthreads.com/product/S4000",
          "accessed": "2026-04-28",
          "citation": "KUAE S4000 (MUSA architecture): 48 compute clusters; PCIe Gen5 x16; SMIC 7nm-class fabrication"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "MUSA programming model is CUDA-compatible at source level."
      ]
    },
    {
      "id": "a100-sxm4",
      "name": "NVIDIA A100 SXM4 80GB",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "ampere",
      "status": "in-production",
      "release_year": 2020,
      "form_factor": "sxm",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": {
          "value": 312,
          "evidence_ref": "ev-a100-001"
        },
        "fp16_tflops": {
          "value": 312,
          "evidence_ref": "ev-a100-001"
        },
        "int8_tops": {
          "value": 624,
          "evidence_ref": "ev-a100-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 108,
          "evidence_ref": "ev-a100-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-a100-arch-001"
        },
        "l2_cache_mb": {
          "value": 40,
          "evidence_ref": "ev-a100-arch-001"
        },
        "hbm_stacks": {
          "value": 5,
          "evidence_ref": "ev-a100-arch-001"
        },
        "process_node_nm": {
          "value": 7,
          "evidence_ref": "ev-a100-arch-001"
        },
        "die_area_mm2": {
          "value": 826,
          "evidence_ref": "ev-a100-arch-001"
        },
        "transistor_count_b": {
          "value": 54,
          "evidence_ref": "ev-a100-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-a100-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-a100-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 80,
          "evidence_ref": "ev-a100-001"
        },
        "bandwidth_gbps": {
          "value": 2039,
          "evidence_ref": "ev-a100-001"
        },
        "type": "HBM2e"
      },
      "scale_up": {
        "protocol": "NVLink-3.0",
        "bandwidth_gbps": 600,
        "world_size": 8,
        "topology": "switched",
        "switch": "nvswitch-gen2"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "InfiniBand-HDR",
        "nic": "ConnectX-6"
      },
      "power": {
        "tdp_w": {
          "value": 400,
          "evidence_ref": "ev-a100-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-11.x",
          "CUDA-12.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          },
          {
            "id": "lmdeploy",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "int8",
          "int4-awq",
          "int4-gptq",
          "w4a16"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "A100 SXM",
        "A100-80GB",
        "A100"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-a100-001",
          "tier": "official",
          "source_type": "vendor-datasheet",
          "url": "https://www.nvidia.com/en-us/data-center/a100/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA A100 Tensor Core GPU Datasheet (80GB SXM variant)"
        },
        {
          "id": "ev-a100-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://images.nvidia.com/aem-dam/en-zz/Solutions/data-center/nvidia-ampere-architecture-whitepaper.pdf",
          "accessed": "2026-04-28",
          "citation": "NVIDIA Ampere Architecture Whitepaper (GA100 die: 108 SMs enabled, 40 MB L2, 54B transistors, 826 mm² @ TSMC 7nm)"
        }
      ],
      "disclaimers": [
        "A100 has no FP8 native (Hopper+); FP8 only via emulated paths.",
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "b200-sxm",
      "name": "NVIDIA B200 SXM 180GB",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "blackwell-gen1",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "sxm",
      "compute": {
        "fp4_tflops": {
          "value": 9000,
          "evidence_ref": "ev-b200-001"
        },
        "fp8_tflops": {
          "value": 4500,
          "evidence_ref": "ev-b200-001"
        },
        "bf16_tflops": {
          "value": 2250,
          "evidence_ref": "ev-b200-001"
        },
        "fp16_tflops": {
          "value": 2250,
          "evidence_ref": "ev-b200-001"
        },
        "int8_tops": {
          "value": 4500,
          "evidence_ref": "ev-b200-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 160,
          "evidence_ref": "ev-b200-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-b200-arch-001"
        },
        "l2_cache_mb": {
          "value": 100,
          "evidence_ref": "ev-b200-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-b200-arch-001"
        },
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-b200-arch-001"
        },
        "die_area_mm2": {
          "value": 1600,
          "evidence_ref": "ev-b200-arch-001"
        },
        "transistor_count_b": {
          "value": 208,
          "evidence_ref": "ev-b200-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-b200-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-b200-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 180,
          "evidence_ref": "ev-b200-001"
        },
        "bandwidth_gbps": {
          "value": 8000,
          "evidence_ref": "ev-b200-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "NVLink-5.0",
        "bandwidth_gbps": 1800,
        "world_size": 8,
        "topology": "switched",
        "switch": "nvswitch-gen4"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 800,
        "protocol": "InfiniBand-XDR",
        "nic": "ConnectX-8"
      },
      "power": {
        "tdp_w": {
          "value": 1000,
          "evidence_ref": "ev-b200-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.5+"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp8-e5m2",
          "fp4",
          "int8",
          "int4-awq"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "B200"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-b200-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.nvidia.com/en-us/data-center/dgx-b200/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA Blackwell B200 product specifications"
        },
        {
          "id": "ev-b200-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://resources.nvidia.com/en-us-blackwell-architecture",
          "accessed": "2026-04-28",
          "citation": "Blackwell B200 architecture: dual-die package (2× 104 SMs ⇒ 160 enabled), 100 MB L2, 8× HBM3e stacks (180 GB), 208B transistors @ TSMC 4NP"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "b300-sxm",
      "name": "NVIDIA B300 SXM 288GB",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "blackwell-gen2",
      "status": "in-production",
      "release_year": 2025,
      "form_factor": "sxm",
      "compute": {
        "fp4_tflops": {
          "value": 15000,
          "evidence_ref": "ev-b300-001"
        },
        "fp8_tflops": {
          "value": 7500,
          "evidence_ref": "ev-b300-001"
        },
        "bf16_tflops": {
          "value": 3750,
          "evidence_ref": "ev-b300-001"
        },
        "fp16_tflops": {
          "value": 3750,
          "evidence_ref": "ev-b300-001"
        },
        "int8_tops": {
          "value": 7500,
          "evidence_ref": "ev-b300-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 168,
          "evidence_ref": "ev-b300-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-b300-arch-001"
        },
        "l2_cache_mb": {
          "value": 100,
          "evidence_ref": "ev-b300-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-b300-arch-001"
        },
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-b300-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-b300-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-b300-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 288,
          "evidence_ref": "ev-b300-001"
        },
        "bandwidth_gbps": {
          "value": 8000,
          "evidence_ref": "ev-b300-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "NVLink-5.0",
        "bandwidth_gbps": 1800,
        "world_size": 8,
        "topology": "switched",
        "switch": "nvswitch-gen4"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 800,
        "protocol": "InfiniBand-XDR",
        "nic": "ConnectX-8"
      },
      "power": {
        "tdp_w": {
          "value": 1400,
          "evidence_ref": "ev-b300-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.6+"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp8-e4m3",
          "fp8-e5m2",
          "fp4",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "B300"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-b300-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.nvidia.com/en-us/data-center/dgx-b300/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA B300 announcement (vendor-claimed)"
        },
        {
          "id": "ev-b300-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.nvidia.com/en-us/data-center/dgx-b300/",
          "accessed": "2026-04-28",
          "citation": "B300 dual-die package, refresh of Blackwell with 168 SMs enabled and 8× HBM3e stacks @ 36 GB ⇒ 288 GB"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "gb200-nvl72",
      "name": "NVIDIA GB200 NVL72",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "blackwell-gen1",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "nvl",
      "compute": {
        "fp4_tflops": {
          "value": 9000,
          "evidence_ref": "ev-gb200-001"
        },
        "fp8_tflops": {
          "value": 4500,
          "evidence_ref": "ev-gb200-001"
        },
        "bf16_tflops": {
          "value": 2250,
          "evidence_ref": "ev-gb200-001"
        },
        "fp16_tflops": {
          "value": 2250,
          "evidence_ref": "ev-gb200-001"
        },
        "int8_tops": {
          "value": 4500,
          "evidence_ref": "ev-gb200-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 160,
          "evidence_ref": "ev-gb200-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-gb200-arch-001"
        },
        "l2_cache_mb": {
          "value": 100,
          "evidence_ref": "ev-gb200-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-gb200-arch-001"
        },
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-gb200-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 192,
          "evidence_ref": "ev-gb200-001"
        },
        "bandwidth_gbps": {
          "value": 8000,
          "evidence_ref": "ev-gb200-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "NVLink-5.0",
        "bandwidth_gbps": 1800,
        "world_size": 72,
        "topology": "switched-rail-optimized",
        "switch": "nvswitch-gen4"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 800,
        "protocol": "InfiniBand-XDR",
        "nic": "ConnectX-8"
      },
      "power": {
        "tdp_w": {
          "value": 1200,
          "evidence_ref": "ev-gb200-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.5+"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          },
          {
            "id": "mori",
            "status": "officially-supported",
            "versions": [
              "0.1"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp8-e4m3",
          "fp4",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "GB200 NVL72",
        "GB200"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-gb200-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.nvidia.com/en-us/data-center/gb200-nvl72/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA GB200 NVL72 product page; per-GPU specs (each GB200 contains 2 B200s)"
        },
        {
          "id": "ev-gb200-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://resources.nvidia.com/en-us-blackwell-architecture",
          "accessed": "2026-04-28",
          "citation": "GB200 reuses Blackwell B200 die: 160 SMs, 100 MB L2, 8× HBM3e (per-GPU); 2 GPUs + 1 Grace CPU per Bianca board"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured.",
        "GB200 chip pairs 2 B200 GPUs with 1 Grace CPU; specs above are per-B200."
      ]
    },
    {
      "id": "gb300-nvl72",
      "name": "NVIDIA GB300 NVL72",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "blackwell-gen2",
      "status": "in-production",
      "release_year": 2025,
      "form_factor": "nvl",
      "compute": {
        "fp4_tflops": {
          "value": 15000,
          "evidence_ref": "ev-gb300-001"
        },
        "fp8_tflops": {
          "value": 7500,
          "evidence_ref": "ev-gb300-001"
        },
        "bf16_tflops": {
          "value": 3750,
          "evidence_ref": "ev-gb300-001"
        },
        "fp16_tflops": {
          "value": 3750,
          "evidence_ref": "ev-gb300-001"
        },
        "int8_tops": {
          "value": 7500,
          "evidence_ref": "ev-gb300-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 168,
          "evidence_ref": "ev-gb300-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-gb300-arch-001"
        },
        "l2_cache_mb": {
          "value": 100,
          "evidence_ref": "ev-gb300-arch-001"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-gb300-arch-001"
        },
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-gb300-arch-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 288,
          "evidence_ref": "ev-gb300-001"
        },
        "bandwidth_gbps": {
          "value": 8000,
          "evidence_ref": "ev-gb300-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "NVLink-5.0",
        "bandwidth_gbps": 1800,
        "world_size": 72,
        "topology": "switched-rail-optimized",
        "switch": "nvswitch-gen4"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 800,
        "protocol": "InfiniBand-XDR",
        "nic": "ConnectX-8"
      },
      "power": {
        "tdp_w": {
          "value": 1400,
          "evidence_ref": "ev-gb300-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.6+"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          },
          {
            "id": "mori",
            "status": "officially-supported",
            "versions": [
              "0.1"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp8-e4m3",
          "fp4",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "GB300 NVL72",
        "GB300"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-gb300-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.nvidia.com/en-us/data-center/gb300-nvl72/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA GB300 NVL72 announcement; per-B300 specs"
        },
        {
          "id": "ev-gb300-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.nvidia.com/en-us/data-center/gb300-nvl72/",
          "accessed": "2026-04-28",
          "citation": "GB300 reuses B300 die: 168 SMs (more enabled vs B200), 100 MB L2, 8× HBM3e (36 GB stacks ⇒ 288 GB)"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "h100-sxm5",
      "name": "NVIDIA H100 SXM5 80GB",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "hopper-gen1",
      "status": "in-production",
      "release_year": 2022,
      "form_factor": "sxm",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1979,
          "evidence_ref": "ev-h100-001"
        },
        "bf16_tflops": {
          "value": 989,
          "evidence_ref": "ev-h100-001"
        },
        "fp16_tflops": {
          "value": 989,
          "evidence_ref": "ev-h100-001"
        },
        "int8_tops": {
          "value": 1979,
          "evidence_ref": "ev-h100-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 132,
          "evidence_ref": "ev-h100-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-h100-arch-001"
        },
        "l2_cache_mb": {
          "value": 50,
          "evidence_ref": "ev-h100-arch-001"
        },
        "hbm_stacks": {
          "value": 5,
          "evidence_ref": "ev-h100-arch-001"
        },
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-h100-arch-001"
        },
        "die_area_mm2": {
          "value": 814,
          "evidence_ref": "ev-h100-arch-001"
        },
        "transistor_count_b": {
          "value": 80,
          "evidence_ref": "ev-h100-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-h100-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-h100-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 80,
          "evidence_ref": "ev-h100-002"
        },
        "bandwidth_gbps": {
          "value": 3350,
          "evidence_ref": "ev-h100-002"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "NVLink-4.0",
        "bandwidth_gbps": 900,
        "world_size": 8,
        "topology": "switched",
        "switch": "nvswitch-gen3"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "InfiniBand-NDR",
        "nic": "ConnectX-7"
      },
      "power": {
        "tdp_w": {
          "value": 700,
          "evidence_ref": "ev-h100-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          },
          {
            "id": "lmdeploy",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp8-e5m2",
          "int8",
          "int4-awq",
          "int4-gptq"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "H100 SXM",
        "H100-80GB"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-h100-001",
          "tier": "official",
          "source_type": "vendor-datasheet",
          "url": "https://resources.nvidia.com/en-us-tensor-core",
          "accessed": "2026-04-28",
          "citation": "NVIDIA H100 Tensor Core GPU Datasheet"
        },
        {
          "id": "ev-h100-002",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.nvidia.com/en-us/data-center/h100/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA H100 product page memory specifications"
        },
        {
          "id": "ev-h100-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://resources.nvidia.com/en-us-tensor-core/gtc22-whitepaper-hopper",
          "accessed": "2026-04-28",
          "citation": "NVIDIA H100 Hopper Architecture Whitepaper (GH100 die: 132 SMs enabled, 50 MB L2, 80B transistors, 814 mm² @ TSMC 4N)"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "h200-sxm",
      "name": "NVIDIA H200 SXM 141GB",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "hopper-gen1",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "sxm",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1979,
          "evidence_ref": "ev-h200-001"
        },
        "bf16_tflops": {
          "value": 989,
          "evidence_ref": "ev-h200-001"
        },
        "fp16_tflops": {
          "value": 989,
          "evidence_ref": "ev-h200-001"
        },
        "int8_tops": {
          "value": 1979,
          "evidence_ref": "ev-h200-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 132,
          "evidence_ref": "ev-h200-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-h200-arch-001"
        },
        "l2_cache_mb": {
          "value": 50,
          "evidence_ref": "ev-h200-arch-001"
        },
        "hbm_stacks": {
          "value": 6,
          "evidence_ref": "ev-h200-arch-001"
        },
        "process_node_nm": {
          "value": 4,
          "evidence_ref": "ev-h200-arch-001"
        },
        "die_area_mm2": {
          "value": 814,
          "evidence_ref": "ev-h200-arch-001"
        },
        "transistor_count_b": {
          "value": 80,
          "evidence_ref": "ev-h200-arch-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-h200-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-h200-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 141,
          "evidence_ref": "ev-h200-001"
        },
        "bandwidth_gbps": {
          "value": 4800,
          "evidence_ref": "ev-h200-001"
        },
        "type": "HBM3e"
      },
      "scale_up": {
        "protocol": "NVLink-4.0",
        "bandwidth_gbps": 900,
        "world_size": 8,
        "topology": "switched",
        "switch": "nvswitch-gen3"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 400,
        "protocol": "InfiniBand-NDR",
        "nic": "ConnectX-7"
      },
      "power": {
        "tdp_w": {
          "value": 700,
          "evidence_ref": "ev-h200-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp8-e5m2",
          "int8",
          "int4-awq",
          "int4-gptq"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "H200",
        "H200 141GB"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-h200-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://www.nvidia.com/en-us/data-center/h200/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA H200 Tensor Core GPU product page"
        },
        {
          "id": "ev-h200-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://resources.nvidia.com/en-us-tensor-core/gtc22-whitepaper-hopper",
          "accessed": "2026-04-28",
          "citation": "H200 reuses GH100 die (132 SMs, 50 MB L2, 814 mm²); 6× HBM3e stacks @ 24 GB ⇒ 141 GB capacity"
        }
      ],
      "disclaimers": [
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "l40s",
      "name": "NVIDIA L40S",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "ada-lovelace",
      "status": "in-production",
      "release_year": 2023,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1466,
          "evidence_ref": "ev-l40s-001"
        },
        "bf16_tflops": {
          "value": 366,
          "evidence_ref": "ev-l40s-001"
        },
        "fp16_tflops": {
          "value": 366,
          "evidence_ref": "ev-l40s-001"
        },
        "int8_tops": {
          "value": 1466,
          "evidence_ref": "ev-l40s-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 142,
          "evidence_ref": "ev-l40s-arch-001"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-l40s-arch-001"
        },
        "l2_cache_mb": {
          "value": 96,
          "evidence_ref": "ev-l40s-arch-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-l40s-arch-001"
        },
        "die_area_mm2": {
          "value": 609,
          "evidence_ref": "ev-l40s-arch-001"
        },
        "transistor_count_b": {
          "value": 76,
          "evidence_ref": "ev-l40s-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-l40s-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-l40s-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 48,
          "evidence_ref": "ev-l40s-001"
        },
        "bandwidth_gbps": {
          "value": 864,
          "evidence_ref": "ev-l40s-001"
        },
        "type": "GDDR6"
      },
      "scale_up": {
        "protocol": "PCIe-Gen4",
        "bandwidth_gbps": 64,
        "world_size": 8,
        "topology": "pcie-fabric"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "InfiniBand-NDR",
        "nic": "ConnectX-7"
      },
      "power": {
        "tdp_w": {
          "value": 350,
          "evidence_ref": "ev-l40s-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-12.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          },
          {
            "id": "sglang",
            "status": "officially-supported",
            "versions": [
              "0.4"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.14"
            ]
          },
          {
            "id": "lmdeploy",
            "status": "officially-supported",
            "versions": [
              "0.6"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp8-e5m2",
          "int8",
          "int4-awq",
          "int4-gptq",
          "w4a16"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "L40S",
        "L40 Server"
      ],
      "chinese_names": [],
      "photos": [],
      "evidence": [
        {
          "id": "ev-l40s-001",
          "tier": "official",
          "source_type": "vendor-datasheet",
          "url": "https://www.nvidia.com/en-us/data-center/l40s/",
          "accessed": "2026-04-28",
          "citation": "NVIDIA L40S Datasheet (Ada Lovelace inference-optimized PCIe card)"
        },
        {
          "id": "ev-l40s-arch-001",
          "tier": "official",
          "source_type": "vendor-whitepaper",
          "url": "https://images.nvidia.com/aem-dam/Solutions/Data-Center/l4/nvidia-ada-gpu-architecture-whitepaper-v2.1.pdf",
          "accessed": "2026-04-28",
          "citation": "NVIDIA Ada Lovelace Whitepaper (AD102 die: 142 SMs enabled in L40S, 96 MB L2, 76B transistors, 609 mm² @ TSMC 4N)"
        }
      ],
      "disclaimers": [
        "L40S uses GDDR6 (not HBM); memory bandwidth is much lower than H100/A100, making it bandwidth-bound for many decode workloads.",
        "Designed for graphics + inference workloads (gaming derivative); strong FP8/INT8 throughput per dollar.",
        "All performance figures are vendor-claimed unless tier=measured."
      ]
    },
    {
      "id": "r200-sxm",
      "name": "NVIDIA R200 SXM (Vera Rubin)",
      "vendor": {
        "id": "nvidia",
        "name": "NVIDIA",
        "chinese_names": [
          "英伟达"
        ],
        "country": "US",
        "type": "hardware",
        "website": "https://www.nvidia.com/",
        "aliases": [
          "NVIDIA Corporation"
        ]
      },
      "generation": "rubin-gen1",
      "status": "announced",
      "release_year": 2026,
      "form_factor": "sxm",
      "compute": {
        "fp4_tflops": {
          "value": 30000,
          "evidence_ref": "ev-r200-001"
        },
        "fp8_tflops": {
          "value": 15000,
          "evidence_ref": "ev-r200-001"
        },
        "bf16_tflops": {
          "value": 7500,
          "evidence_ref": "ev-r200-001"
        },
        "fp16_tflops": {
          "value": 7500,
          "evidence_ref": "ev-r200-001"
        },
        "fp32_tflops": {
          "value": 3700,
          "evidence_ref": "ev-r200-001"
        },
        "int8_tops": {
          "value": 15000,
          "evidence_ref": "ev-r200-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 200,
          "evidence_ref": "ev-r200-arch"
        },
        "compute_unit_label": "SM",
        "tensor_cores_per_cu": {
          "value": 4,
          "evidence_ref": "ev-r200-arch"
        },
        "l2_cache_mb": {
          "value": 128,
          "evidence_ref": "ev-r200-arch"
        },
        "hbm_stacks": {
          "value": 8,
          "evidence_ref": "ev-r200-001"
        },
        "process_node_nm": {
          "value": 3,
          "evidence_ref": "ev-r200-arch"
        },
        "pcie_gen": {
          "value": 6,
          "evidence_ref": "ev-r200-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-r200-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 288,
          "evidence_ref": "ev-r200-001"
        },
        "bandwidth_gbps": {
          "value": 13000,
          "evidence_ref": "ev-r200-001"
        },
        "type": "HBM4"
      },
      "scale_up": {
        "protocol": "NVLink-6.0",
        "bandwidth_gbps": 3600,
        "world_size": 144,
        "topology": "switched",
        "switch": "nvswitch-gen5"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 1600,
        "protocol": "InfiniBand-XDR",
        "nic": "ConnectX-9"
      },
      "power": {
        "tdp_w": {
          "value": 1800,
          "evidence_ref": "ev-r200-001"
        }
      },
      "software_support": {
        "drivers": [
          "CUDA-13.x"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "officially-supported",
            "versions": [
              "0.7-rubin"
            ]
          },
          {
            "id": "tensorrt-llm",
            "status": "officially-supported",
            "versions": [
              "0.16"
            ]
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "fp8-e5m2",
          "fp4",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "ep",
          "sp",
          "disaggregated"
        ]
      },
      "aliases": [
        "Rubin R200",
        "Vera Rubin R200"
      ],
      "chinese_names": [
        "英伟达 Rubin R200"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-r200-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.nvidia.com/en-us/data-center/rubin/",
          "accessed": "2026-04-29",
          "citation": "NVIDIA Rubin announcement (Computex 2025): NVLink-6.0 at 3.6 TB/s, NVL144 rack-scale, HBM4 with 288 GB / 13 TB/s. Vendor-claimed; not yet GA."
        },
        {
          "id": "ev-r200-arch",
          "tier": "estimated",
          "source_type": "third-party-review",
          "url": "https://www.semianalysis.com/p/rubin-architecture-deep-dive",
          "accessed": "2026-04-29",
          "citation": "Rubin die: 200 SMs enabled, 128 MB L2, dual-die package on TSMC 3nm. Specs estimated from public roadmap; subject to change at GA."
        }
      ],
      "disclaimers": [
        "Status: announced — not yet in customer hands. Specs sourced from Computex 2025 keynote; subject to revision at GA.",
        "NVL144 super-pod (144 R200 in one scale-up domain) is planned for 2026 H2."
      ]
    },
    {
      "id": "pingtouge-hanguang-800",
      "name": "平头哥 含光 800",
      "vendor": {
        "id": "pingtouge",
        "name": "T-Head (Pingtouge)",
        "chinese_names": [
          "平头哥",
          "平头哥半导体"
        ],
        "country": "CN",
        "type": "hardware",
        "website": "https://www.t-head.cn/",
        "aliases": [
          "T-Head",
          "Pingtouge Semiconductor"
        ]
      },
      "generation": "hanguang-gen1",
      "status": "in-production",
      "release_year": 2019,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": null,
        "bf16_tflops": null,
        "fp16_tflops": {
          "value": 25,
          "evidence_ref": "ev-hg800-001"
        },
        "int8_tops": {
          "value": 825,
          "evidence_ref": "ev-hg800-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 4,
          "evidence_ref": "ev-hg800-arch-001"
        },
        "compute_unit_label": "Cluster",
        "process_node_nm": {
          "value": 12,
          "evidence_ref": "ev-hg800-arch-001"
        },
        "transistor_count_b": {
          "value": 17,
          "evidence_ref": "ev-hg800-arch-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-hg800-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-hg800-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 16,
          "evidence_ref": "ev-hg800-001"
        },
        "bandwidth_gbps": {
          "value": 256,
          "evidence_ref": "ev-hg800-001"
        },
        "type": "LPDDR5"
      },
      "scale_up": {
        "protocol": "PCIe-Gen4",
        "bandwidth_gbps": 64,
        "world_size": 4,
        "topology": "pcie-fabric"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 100,
        "protocol": "RoCEv2"
      },
      "power": {
        "tdp_w": {
          "value": 280,
          "evidence_ref": "ev-hg800-001"
        }
      },
      "software_support": {
        "drivers": [
          "HanGuangAI"
        ],
        "engines": [
          {
            "id": "hanguang-engine",
            "status": "officially-supported",
            "versions": [
              "1.0"
            ]
          }
        ],
        "quantizations": [
          "int8"
        ],
        "parallelism": []
      },
      "aliases": [
        "HanGuang 800"
      ],
      "chinese_names": [
        "含光800"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-hg800-001",
          "tier": "official",
          "source_type": "vendor-press-release",
          "url": "https://www.t-head.cn/",
          "accessed": "2026-04-28",
          "citation": "T-Head HanGuang 800 launch coverage (Alibaba Cloud Apsara 2019)"
        },
        {
          "id": "ev-hg800-arch-001",
          "tier": "estimated",
          "source_type": "vendor-press-release",
          "url": "https://www.t-head.cn/",
          "accessed": "2026-04-28",
          "citation": "HanGuang 800 (含光800): 4-cluster NPU, 17B transistors @ TSMC 12nm; INT8-focused inference accelerator (Apsara 2019 launch)"
        }
      ],
      "disclaimers": [
        "HanGuang 800 is INT8 inference-focused; not designed for FP training.",
        "Specs are vendor-claimed; LLM inference support is not the primary use case."
      ]
    },
    {
      "id": "sn40l",
      "name": "SambaNova SN40L",
      "vendor": {
        "id": "sambanova",
        "name": "SambaNova Systems",
        "chinese_names": [
          "SambaNova"
        ],
        "country": "US",
        "type": "both",
        "website": "https://sambanova.ai/",
        "aliases": [
          "SambaNova",
          "SN40L"
        ]
      },
      "generation": "sambanova-rdu-gen4",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "reconfigurable",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 1040,
          "evidence_ref": "ev-sn40l-001"
        },
        "bf16_tflops": {
          "value": 638,
          "evidence_ref": "ev-sn40l-001"
        },
        "fp16_tflops": {
          "value": 638,
          "evidence_ref": "ev-sn40l-001"
        },
        "fp32_tflops": {
          "value": 320,
          "evidence_ref": "ev-sn40l-001"
        },
        "int8_tops": {
          "value": 1040,
          "evidence_ref": "ev-sn40l-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 1040,
          "evidence_ref": "ev-sn40l-001"
        },
        "compute_unit_label": "RDU-Tile",
        "on_die_sram_mb": {
          "value": 64,
          "evidence_ref": "ev-sn40l-001"
        },
        "process_node_nm": {
          "value": 5,
          "evidence_ref": "ev-sn40l-001"
        },
        "die_area_mm2": {
          "value": 800,
          "evidence_ref": "ev-sn40l-001"
        },
        "pcie_gen": {
          "value": 5,
          "evidence_ref": "ev-sn40l-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-sn40l-001"
        },
        "reconfigurable": true
      },
      "memory": {
        "capacity_gb": {
          "value": 1664,
          "evidence_ref": "ev-sn40l-001"
        },
        "bandwidth_gbps": {
          "value": 6400,
          "evidence_ref": "ev-sn40l-001"
        },
        "type": "HBM3"
      },
      "scale_up": {
        "protocol": "SambaFabric",
        "bandwidth_gbps": 800,
        "world_size": 16,
        "topology": "switched"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "Ethernet"
      },
      "power": {
        "tdp_w": {
          "value": 300,
          "evidence_ref": "ev-sn40l-001"
        }
      },
      "software_support": {
        "drivers": [
          "SambaNova-SDK-2.x",
          "SambaFlow"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "SambaNova native serving stack"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp",
          "dataflow"
        ]
      },
      "aliases": [
        "SN40L",
        "Cardinal SN40L"
      ],
      "chinese_names": [
        "SambaNova SN40L"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-sn40l-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://sambanova.ai/products/sn40l",
          "accessed": "2026-04-29",
          "citation": "SambaNova SN40L (Cardinal): RDU (Reconfigurable Dataflow Unit) with 1040 PCUs, 64 MB on-chip SRAM, 1.5 TB HBM3 + DDR5 hybrid memory (1664 GB total), 5nm TSMC, 800 mm² die. Full system: 8-card SN40L node with 12 TB aggregate fast memory."
        }
      ],
      "disclaimers": [
        "SN40L unique architecture: 3-tier memory (on-chip SRAM + HBM + DDR5) lets it host 5+ trillion-parameter models in a single node — no other accelerator does this.",
        "Compute reported is per-RDU; sustained throughput depends on dataflow graph fit (reconfigurable; not all models map cleanly)."
      ]
    },
    {
      "id": "wormhole-n300",
      "name": "Tenstorrent Wormhole n300",
      "vendor": {
        "id": "tenstorrent",
        "name": "Tenstorrent",
        "chinese_names": [
          "Tenstorrent"
        ],
        "country": "US",
        "type": "both",
        "website": "https://tenstorrent.com/",
        "aliases": [
          "TT",
          "Tenstorrent Inc"
        ]
      },
      "generation": "tenstorrent-wormhole-gen2",
      "status": "in-production",
      "release_year": 2024,
      "form_factor": "pcie",
      "compute": {
        "fp4_tflops": null,
        "fp8_tflops": {
          "value": 466,
          "evidence_ref": "ev-tt-wh-001"
        },
        "bf16_tflops": {
          "value": 233,
          "evidence_ref": "ev-tt-wh-001"
        },
        "fp16_tflops": {
          "value": 233,
          "evidence_ref": "ev-tt-wh-001"
        },
        "fp32_tflops": {
          "value": 74,
          "evidence_ref": "ev-tt-wh-001"
        },
        "int8_tops": {
          "value": 466,
          "evidence_ref": "ev-tt-wh-001"
        }
      },
      "architecture": {
        "compute_unit_count": {
          "value": 128,
          "evidence_ref": "ev-tt-wh-001"
        },
        "compute_unit_label": "Tensix",
        "process_node_nm": {
          "value": 12,
          "evidence_ref": "ev-tt-wh-001"
        },
        "pcie_gen": {
          "value": 4,
          "evidence_ref": "ev-tt-wh-001"
        },
        "pcie_lanes": {
          "value": 16,
          "evidence_ref": "ev-tt-wh-001"
        }
      },
      "memory": {
        "capacity_gb": {
          "value": 24,
          "evidence_ref": "ev-tt-wh-001"
        },
        "bandwidth_gbps": {
          "value": 576,
          "evidence_ref": "ev-tt-wh-001"
        },
        "type": "GDDR6"
      },
      "scale_up": {
        "protocol": "Galaxy-Mesh",
        "bandwidth_gbps": 800,
        "world_size": 32,
        "topology": "2D-torus"
      },
      "scale_out": {
        "bandwidth_gbps_per_card": 200,
        "protocol": "Ethernet"
      },
      "power": {
        "tdp_w": {
          "value": 300,
          "evidence_ref": "ev-tt-wh-001"
        }
      },
      "software_support": {
        "drivers": [
          "TT-Metal",
          "TT-NN",
          "TT-Buda"
        ],
        "engines": [
          {
            "id": "vllm",
            "status": "community-port",
            "versions": [],
            "notes": "tt-vllm fork; full open-source software stack"
          }
        ],
        "quantizations": [
          "bf16",
          "fp16",
          "fp8-e4m3",
          "int8"
        ],
        "parallelism": [
          "tp",
          "pp"
        ]
      },
      "aliases": [
        "Wormhole",
        "Wormhole n300",
        "tt-wh-n300"
      ],
      "chinese_names": [
        "Tenstorrent 虫洞 n300"
      ],
      "photos": [],
      "evidence": [
        {
          "id": "ev-tt-wh-001",
          "tier": "official",
          "source_type": "vendor-product-page",
          "url": "https://tenstorrent.com/hardware/wormhole",
          "accessed": "2026-04-29",
          "citation": "Tenstorrent Wormhole n300 — 128 Tensix cores, 24GB GDDR6, 12nm Global Foundries, RISC-V based, fully open-source software stack (TT-Metal/TT-NN). Galaxy interconnect for 32-card 2D-torus topology."
        }
      ],
      "disclaimers": [
        "Tenstorrent emphasizes open-source software and RISC-V instruction set — distinct from CUDA/ROCm/CANN walled gardens.",
        "Tile-based architecture: each Tensix core has 5 RISC-V CPUs + 1 matrix engine + 32 KB compute SRAM."
      ]
    }
  ]
}