From 009683e36b6c057acd560688bbeb635ed5417297 Mon Sep 17 00:00:00 2001 From: York Cao <52438394+Baymine@users.noreply.github.com> Date: Mon, 6 Jan 2025 18:38:01 +0800 Subject: [PATCH] [opt](cache) enhance cache key computation by removing comments and trimming SQL input (#46099) - Currently, the SQL cache system in Doris may miss cache hits due to semantically identical queries being treated as different because of: - Extra whitespace characters in the SQL query - SQL comments that don't affect the query execution - For example, these queries are semantically identical but would generate different cache keys: ```sql SELECT * FROM table; -- Same query with comments and extra spaces /* Comment */ SELECT * FROM table ; ``` - This PR improves the SQL cache hit rate by: - Trimming whitespace from SQL queries - Removing SQL comments before calculating the cache key MD5 - This ensures that queries that are semantically identical but differ only in whitespace or comments will now hit the same cache entry, improving cache efficiency and reducing unnecessary query executions --- .../main/java/org/apache/doris/nereids/SqlCacheContext.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java index 45a4c1d50d5794..364c8bcab65162 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java @@ -26,6 +26,7 @@ import org.apache.doris.mysql.FieldInfo; import org.apache.doris.mysql.privilege.DataMaskPolicy; import org.apache.doris.mysql.privilege.RowFilterPolicy; +import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Variable; import org.apache.doris.nereids.util.Utils; @@ -349,7 +350,7 @@ public PUniqueId getOrComputeCacheKeyMd5() { /** doComputeCacheKeyMd5 */ public synchronized PUniqueId doComputeCacheKeyMd5(Set usedVariables) { - StringBuilder cacheKey = new StringBuilder(originSql); + StringBuilder cacheKey = new StringBuilder(NereidsParser.removeCommentAndTrimBlank(originSql.trim())); for (Entry entry : usedViews.entrySet()) { cacheKey.append("|") .append(entry.getKey())