diff --git a/README.md b/README.md index 3500595..42537d4 100644 --- a/README.md +++ b/README.md @@ -30,115 +30,44 @@ GLM-4.7 demonstrates competitive performance against the newest generation of fl *Note: Best scores per category are highlighted in $\color{green}{\text{green}}$. Data sourced from [Z.ai Official Blog](https://z.ai/blog/glm-4.7).* ```mermaid -flowchart TD - subgraph GLM [๐Ÿš€ GLM-4.7 Dominance] - direction TB - MATH[๐Ÿงฎ MATH
AIME 25] - CODING[๐Ÿ’ป CODING
LiveCodeBench v6] - SCIENCE[๐Ÿ”ฌ SCIENCE
GPQA-Diamond] - LOGIC[๐Ÿง  LOGIC
HLE w/Tools] - ENGINEERING[โš™๏ธ ENGINEERING
SWE-bench] - AGENTIC[๐Ÿค– AGENTIC
ฯ„ยฒ-Bench] - - GLM_MATH[GLM-4.7
95.7%
] - GLM_CODING[GLM-4.7
84.9%
] - GLM_SCIENCE[GLM-4.7
85.7%
] - GLM_LOGIC[GLM-4.7
42.8%
] - GLM_ENG[GLM-4.7
73.8%
] - GLM_AGENT[GLM-4.7
87.4%
] - - MATH --> GLM_MATH - CODING --> GLM_CODING - SCIENCE --> GLM_SCIENCE - LOGIC --> GLM_LOGIC - ENGINEERING --> GLM_ENG - AGENTIC --> GLM_AGENT - end - - subgraph COMPETITORS [๐Ÿ“Š Top Competitors] - direction LR - - subgraph GEM [๐Ÿ’Ž Gemini 3.0 Pro] - G_MATH[95.0%] - G_CODING[90.7%] - G_SCIENCE[91.9%] - G_LOGIC[45.8%] - G_ENG[76.2%] - G_AGENT[90.7%] - end - - subgraph GPT [๐Ÿ”ต GPT-5.1 High] - P_MATH[94.0%] - P_CODING[87.0%] - P_SCIENCE[88.1%] - P_LOGIC[42.7%] - P_ENG[76.3%] - P_AGENT[82.7%] - end - - subgraph DS [๐ŸŸข DeepSeek-V3.2] - D_MATH[93.1%] - D_CODING[83.3%] - D_SCIENCE[82.4%] - D_LOGIC[40.8%] - D_ENG[73.1%] - D_AGENT[85.3%] - end - - subgraph CLAUDE [๐ŸŸฃ Claude Sonnet 4.5] - C_MATH[87.0%] - C_CODING[64.0%] - C_SCIENCE[83.4%] - C_LOGIC[32.0%] - C_ENG[77.2%] - C_AGENT[87.2%] - end - end - - GLM_MATH -.-> G_MATH - GLM_MATH -.-> P_MATH - GLM_MATH -.-> D_MATH - GLM_MATH -.-> C_MATH - - GLM_CODING -.-> G_CODING - GLM_CODING -.-> P_CODING - GLM_CODING -.-> D_CODING - GLM_CODING -.-> C_CODING - - GLM_SCIENCE -.-> G_SCIENCE - GLM_SCIENCE -.-> P_SCIENCE - GLM_SCIENCE -.-> D_SCIENCE - GLM_SCIENCE -.-> C_SCIENCE - - GLM_LOGIC -.-> G_LOGIC - GLM_LOGIC -.-> P_LOGIC - GLM_LOGIC -.-> D_LOGIC - GLM_LOGIC -.-> C_LOGIC - - GLM_ENG -.-> G_ENG - GLM_ENG -.-> P_ENG - GLM_ENG -.-> D_ENG - GLM_ENG -.-> C_ENG - - GLM_AGENT -.-> G_AGENT - GLM_AGENT -.-> P_AGENT - GLM_AGENT -.-> D_AGENT - GLM_AGENT -.-> C_AGENT - - classDef glmNode fill:#00C853,stroke:#004D40,stroke-width:3px,color:#FFFFFF,font-weight:bold,font-size:13px,radius:8px - classDef geminiNode fill:#FFB74D,stroke:#E65100,stroke-width:2px,color:#FFFFFF,font-weight:bold,font-size:12px,radius:6px - classDef gptNode fill:#64B5F6,stroke:#1565C0,stroke-width:2px,color:#FFFFFF,font-weight:bold,font-size:12px,radius:6px - classDef deepseekNode fill:#4DB6AC,stroke:#00695C,stroke-width:2px,color:#FFFFFF,font-weight:bold,font-size:12px,radius:6px - classDef claudeNode fill:#AB47BC,stroke:#6A1B9A,stroke-width:2px,color:#FFFFFF,font-weight:bold,font-size:12px,radius:6px - classDef categoryNode fill:#37474F,stroke:#263238,stroke-width:2px,color:#ECEFF1,font-weight:bold,font-size:11px,radius:4px - classDef subgraphNode fill:#FAFAFA,stroke:#B0BEC5,stroke-width:2px,stroke-dasharray: 5 5 - - class GLM_MATH,GLM_CODING,GLM_SCIENCE,GLM_LOGIC,GLM_ENG,GLM_AGENT glmNode - class G_MATH,G_CODING,G_SCIENCE,G_LOGIC,G_ENG,G_AGENT geminiNode - class P_MATH,P_CODING,P_SCIENCE,P_LOGIC,P_ENG,P_AGENT gptNode - class D_MATH,D_CODING,D_SCIENCE,D_LOGIC,D_ENG,D_AGENT deepseekNode - class C_MATH,C_CODING,C_SCIENCE,C_LOGIC,C_ENG,C_AGENT claudeNode - class MATH,CODING,SCIENCE,LOGIC,ENGINEERING,AGENTIC categoryNode +mindmap + root((GLM-4.7
๐Ÿ† SOTA 2025)) + Math๐Ÿงฎ + AIME 25
95.7%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + GPT: 94.0%
โ”โ”โ”โ”โ”โ”โ”โ”โ–‘ + Gemini: 95.0%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + DeepSeek: 93.1%
โ”โ”โ”โ”โ”โ”โ”โ”โ–‘ + Claude: 87.0%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Coding๐Ÿ’ป + LiveCode
84.9%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + GPT: 87.0%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + Gemini: 90.7%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + DeepSeek: 83.3%
โ”โ”โ”โ”โ”โ”โ”โ”โ–‘ + Claude: 64.0%
โ”โ”โ”โ”โ–‘โ–‘โ–‘โ–‘ + Science๐Ÿ”ฌ + GPQA
85.7%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + GPT: 88.1%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + Gemini: 91.9%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + DeepSeek: 82.4%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Claude: 83.4%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Logic๐Ÿง  + HLE
42.8%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + GPT: 42.7%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Gemini: 45.8%
โ”โ”โ”โ”โ”โ”โ–“โ–‘โ–‘ + DeepSeek: 40.8%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Claude: 32.0%
โ”โ”โ”โ”โ–‘โ–‘โ–‘โ–‘ + Engineeringโš™๏ธ + SWE-bench
73.8%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + GPT: 76.3%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + Gemini: 76.2%
โ”โ”โ”โ”โ”โ”โ”โ”โ–‘ + DeepSeek: 73.1%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Claude: 77.2%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + Agentic๐Ÿค– + ฯ„ยฒ-Bench
87.4%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + GPT: 82.7%
โ”โ”โ”โ”โ”โ”โ–‘โ–‘โ–‘ + Gemini: 90.7%
โ”โ”โ”โ”โ”โ”โ”โ”โ” + DeepSeek: 85.3%
โ”โ”โ”โ”โ”โ”โ”โ”โ–‘ + Claude: 87.2%
โ”โ”โ”โ”โ”โ”โ”โ”โ–‘ ``` | Category | Benchmark | **GLM-4.7** | Claude Sonnet 4.5 | GPT-5.1 High | DeepSeek-V3.2 | Gemini 3.0 Pro | Source |