Commit f9b10ebe authored by Vũ Hoàng Anh's avatar Vũ Hoàng Anh

refactor: unified SKU search (exact match, no subsequence) + store search...

refactor: unified SKU search (exact match, no subsequence) + store search upgrade (token AND + reverse LIKE fallback)
parent 40f5a569
...@@ -130,6 +130,8 @@ def format_product_results(products: list[dict]) -> list[dict]: ...@@ -130,6 +130,8 @@ def format_product_results(products: list[dict]) -> list[dict]:
continue continue
seen_skus.add(dedup_key) seen_skus.add(dedup_key)
description_value = p.get("description_text_full") or p.get("description_text") or ""
product_entry = { product_entry = {
"sku": sku, "sku": sku,
"sku_color": sku_color, "sku_color": sku_color,
...@@ -139,7 +141,7 @@ def format_product_results(products: list[dict]) -> list[dict]: ...@@ -139,7 +141,7 @@ def format_product_results(products: list[dict]) -> list[dict]:
"sale_price": int(sale_price) if sale_price else int(original_price), "sale_price": int(sale_price) if sale_price else int(original_price),
"url": web_url, "url": web_url,
"thumbnail_image_url": thumb_url, "thumbnail_image_url": thumb_url,
"description": _neutralize_generic_print(p.get("description_text") or ""), "description": _neutralize_generic_print(description_value),
} }
size_scale = p.get("size_scale") size_scale = p.get("size_scale")
if size_scale: if size_scale:
......
"""
Product Search Helpers — Unified search logic cho CANIFA.
3 search modes:
CASE 1: CODE SEARCH — CTE resolved_family (internal_ref_code / magento_ref_code / product_color_code)
CASE 2: DISCOVERY — Hàng mới / Bán chạy (Direct SQL, no embedding)
CASE 3: SEMANTIC — Vector cosine similarity (HNSW index)
"""
import logging import logging
from common.embedding_service import create_embedding_async from common.embedding_service import create_embedding_async
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
PRODUCT_SEARCH_TABLE = "shared_source.magento_product_dimension_with_text_embedding"
# ══════════════════════════════════════════════════════════════
# 1. CODE PARSING HELPERS
# ══════════════════════════════════════════════════════════════
def _parse_code_search_input(raw_code: str) -> tuple[str, str | None]: def _parse_code_search_input(raw_code: str) -> tuple[str, str | None]:
"""Chuẩn hóa mã user gửi và tách phần internal_ref_code / suffix nếu có.""" """
Chuẩn hóa mã user gửi và tách internal_ref_code / suffix.
VD: "6OT24S001-SG055" → ("6OT24S001", "SG055")
"6OT24S001" → ("6OT24S001", None)
"""
normalized = str(raw_code or "").strip().upper().replace(" ", "") normalized = str(raw_code or "").strip().upper().replace(" ", "")
if "-" not in normalized: if "-" not in normalized:
return normalized, None return normalized, None
...@@ -14,18 +33,13 @@ def _parse_code_search_input(raw_code: str) -> tuple[str, str | None]: ...@@ -14,18 +33,13 @@ def _parse_code_search_input(raw_code: str) -> tuple[str, str | None]:
return internal_ref_code, suffix_code or None return internal_ref_code, suffix_code or None
def _build_code_subsequence_pattern(raw_code: str) -> str:
"""
Build LIKE pattern giữ nguyên thứ tự ký tự.
Dùng cho trường hợp user nhập thiếu 1 ký tự ở giữa mã nhưng vẫn muốn match ra full code.
VD: 6ST25W05 -> 6%S%T%2%5%W%0%5%
"""
normalized = str(raw_code or "").strip().upper().replace(" ", "").replace("-", "")
if not normalized:
return "%"
return "%" + "%".join(normalized) + "%"
# ══════════════════════════════════════════════════════════════
# 2. SQL FILTER BUILDERS
# ══════════════════════════════════════════════════════════════
def _get_price_clauses(params, sql_params: list) -> list[str]: def _get_price_clauses(params, sql_params: list) -> list[str]:
"""Lọc theo giá (Parameterized).""" """Lọc theo giá (Parameterized)."""
clauses = [] clauses = []
...@@ -37,7 +51,6 @@ def _get_price_clauses(params, sql_params: list) -> list[str]: ...@@ -37,7 +51,6 @@ def _get_price_clauses(params, sql_params: list) -> list[str]:
if p_max is not None: if p_max is not None:
clauses.append("sale_price <= %s") clauses.append("sale_price <= %s")
sql_params.append(p_max) sql_params.append(p_max)
return clauses return clauses
...@@ -48,66 +61,61 @@ def _get_discount_params(params) -> tuple[int | None, int | None]: ...@@ -48,66 +61,61 @@ def _get_discount_params(params) -> tuple[int | None, int | None]:
if discount_min is not None or discount_max is not None: if discount_min is not None or discount_max is not None:
if discount_min and discount_max: if discount_min and discount_max:
logger.info(f"🏷️ [DISCOUNT FILTER] Filtering products with discount {discount_min}% - {discount_max}%") logger.info("🏷️ [DISCOUNT] %d%% - %d%%", discount_min, discount_max)
elif discount_min: elif discount_min:
logger.info(f"🏷️ [DISCOUNT FILTER] Filtering products with discount >= {discount_min}%") logger.info("🏷️ [DISCOUNT] >= %d%%", discount_min)
else: else:
logger.info(f"🏷️ [DISCOUNT FILTER] Filtering products with discount <= {discount_max}%") logger.info("🏷️ [DISCOUNT] <= %d%%", discount_max)
return discount_min, discount_max return discount_min, discount_max
def _get_metadata_clauses(params, sql_params: list) -> list[str]: def _get_metadata_clauses(params, sql_params: list) -> list[str]:
""" """
HARD FILTER: Gender + Age — lọc trực tiếp ở SQL level. HARD FILTER ở SQL level: Gender, Age, Color, Product name.
Gender tự động include 'unisex' fallback cho men/women. Gender tự động include 'unisex' fallback cho men/women.
Color + Product type → semantic search tự handle qua description.
""" """
clauses = [] clauses = []
# Gender filter # ── Gender ──
gender_val = getattr(params, "gender_by_product", None) gender_val = getattr(params, "gender_by_product", None)
if gender_val: if gender_val:
gender_lower = gender_val.lower().strip() gender_lower = gender_val.lower().strip()
# Include 'unisex' fallback for men/women
if gender_lower in ("men", "women"): if gender_lower in ("men", "women"):
clauses.append("gender_by_product IN (%s, %s)") clauses.append("gender_by_product IN (%s, %s)")
sql_params.extend([gender_lower, "unisex"]) sql_params.extend([gender_lower, "unisex"])
else: else:
clauses.append("gender_by_product = %s") clauses.append("gender_by_product = %s")
sql_params.append(gender_lower) sql_params.append(gender_lower)
logger.info(f"👫 [SQL FILTER] Gender: {gender_val}") logger.info("👫 [SQL FILTER] Gender: %s", gender_val)
# Age filter # ── Age ──
age_val = getattr(params, "age_by_product", None) age_val = getattr(params, "age_by_product", None)
if age_val: if age_val:
age_lower = age_val.lower().strip()
clauses.append("age_by_product = %s") clauses.append("age_by_product = %s")
sql_params.append(age_lower) sql_params.append(age_val.lower().strip())
logger.info(f"🎂 [SQL FILTER] Age: {age_val}") logger.info("🎂 [SQL FILTER] Age: %s", age_val)
# Color filter (LIKE match on master_color OR product_color_name) # ── Color ──
color_val = getattr(params, "master_color", None) color_val = getattr(params, "master_color", None)
if color_val: if color_val:
color_lower = color_val.lower().strip() color_lower = color_val.lower().strip()
clauses.append("(LOWER(master_color) LIKE %s OR LOWER(product_color_name) LIKE %s)") clauses.append("(LOWER(master_color) LIKE %s OR LOWER(product_color_name) LIKE %s)")
sql_params.extend([f"%{color_lower}%", f"%{color_lower}%"]) sql_params.extend([f"%{color_lower}%", f"%{color_lower}%"])
logger.info(f"🎨 [SQL FILTER] Color: {color_val}") logger.info("🎨 [SQL FILTER] Color: %s", color_val)
# ── Product name (synonym resolve + related lines) ──
from agent.tools.product_mapping import PRODUCT_LINE_MAP from agent.tools.product_mapping import PRODUCT_LINE_MAP
GENERIC_WORDS = {key.split()[0].lower() for key in PRODUCT_LINE_MAP.keys()} GENERIC_WORDS = {key.split()[0].lower() for key in PRODUCT_LINE_MAP.keys()}
name_val = getattr(params, "product_name", None) name_val = getattr(params, "product_name", None)
if name_val: if name_val:
from agent.tools.product_mapping import get_related_lines, resolve_product_name from agent.tools.product_mapping import get_related_lines, resolve_product_name
# Support '/' separator: "Áo lót/Áo bra active" → ["Áo lót", "Áo bra active"]
name_parts = [p.strip() for p in name_val.split("/") if p.strip()] name_parts = [p.strip() for p in name_val.split("/") if p.strip()]
all_phrases = set() all_phrases = set()
for part in name_parts: for part in name_parts:
resolved = resolve_product_name(part) resolved = resolve_product_name(part)
# Also expand related lines
for rname in get_related_lines(resolved): for rname in get_related_lines(resolved):
words = rname.strip().split() words = rname.strip().split()
phrase = " ".join(w for w in words if w.lower() not in GENERIC_WORDS) phrase = " ".join(w for w in words if w.lower() not in GENERIC_WORDS)
...@@ -117,132 +125,81 @@ def _get_metadata_clauses(params, sql_params: list) -> list[str]: ...@@ -117,132 +125,81 @@ def _get_metadata_clauses(params, sql_params: list) -> list[str]:
if all_phrases: if all_phrases:
like_parts = [] like_parts = []
for phrase in all_phrases: for phrase in all_phrases:
# Search cả product_name VÀ product_line_vn
# VD: product_name = "quần sịp đùi nam" nhưng product_line_vn = "Quần lót đùi"
like_parts.append("(LOWER(product_name) LIKE %s OR LOWER(product_line_vn) LIKE %s)") like_parts.append("(LOWER(product_name) LIKE %s OR LOWER(product_line_vn) LIKE %s)")
sql_params.extend([f"%{phrase}%", f"%{phrase}%"]) sql_params.extend([f"%{phrase}%", f"%{phrase}%"])
clauses.append(f"({' OR '.join(like_parts)})") clauses.append(f"({' OR '.join(like_parts)})")
logger.info(f"🏷️ [SQL FILTER] Product name: '{name_val}' → phrases: {all_phrases}") logger.info("🏷️ [SQL FILTER] Product name: '%s' → %s", name_val, all_phrases)
return clauses return clauses
# ══════════════════════════════════════════════════════════════
# 3. MAIN QUERY BUILDER
# ══════════════════════════════════════════════════════════════
async def build_starrocks_query(params, query_vector: list[float] | None = None) -> tuple[str, list]: async def build_starrocks_query(params, query_vector: list[float] | None = None) -> tuple[str, list]:
""" """
Build SQL query với Parameterized Query để tránh SQL Injection. Build SQL query với Parameterized Query.
Returns: (sql_string, params_list) Returns: (sql_string, params_list)
""" """
# ========================================================================================================================= # ──────────────────────────────────────────────────────────
# CASE 1: CODE SEARCH # CASE 1: CODE SEARCH — magento_ref_code + product_color_code
# ========================================================================================================================== # Logic Y HỆT sku_search_tool.py:
# base_code = phần trước dấu '-'
# normalized = full code user gửi
# ──────────────────────────────────────────────────────────
magento_code = getattr(params, "magento_ref_code", None) magento_code = getattr(params, "magento_ref_code", None)
if magento_code: if magento_code:
# Chuẩn hóa code user gửi và quy input về internal_ref_code trước khi lấy variants. normalized = str(magento_code).strip().upper().replace(" ", "")
normalized_magento_code = str(magento_code).strip().upper().replace(" ", "") base_code, suffix_code = _parse_code_search_input(normalized)
internal_ref_hint, suffix_code = _parse_code_search_input(normalized_magento_code)
internal_ref_loose = _build_code_subsequence_pattern(internal_ref_hint)
magento_code_loose = _build_code_subsequence_pattern(normalized_magento_code)
extra_filters = []
sql_params = [ sql_params = [
internal_ref_hint, base_code, # UPPER(magento_ref_code) = %s (base code)
normalized_magento_code, normalized, # UPPER(magento_ref_code) = %s (full code)
normalized_magento_code, normalized, # UPPER(product_color_code) = %s (exact match)
f"{internal_ref_hint}-%", f"{base_code}-%", # UPPER(product_color_code) LIKE %s (all variants)
internal_ref_loose,
internal_ref_loose,
magento_code_loose,
magento_code_loose,
] ]
# Ưu tiên màu user nói trong message; đây là filter mạnh hơn suffix trong mã. logger.info("🏷️ [CODE SEARCH] input=%s, base=%s, suffix=%s", normalized, base_code, suffix_code)
color_val = getattr(params, "master_color", None)
if color_val:
extra_filters.append("(LOWER(master_color) LIKE %s OR LOWER(product_color_name) LIKE %s)")
color_like = f"%{color_val.lower()}%"
sql_params.extend([color_like, color_like])
logger.info(
"🎨 [CODE SEARCH] Code=%s, internal_ref=%s, explicit_color=%s",
normalized_magento_code,
internal_ref_hint,
color_val,
)
# Nếu user không nói màu nhưng có suffix, dùng suffix để ưu tiên đúng variant đã copy.
elif suffix_code:
extra_filters.append("UPPER(product_color_code) LIKE %s")
sql_params.append(f"%{suffix_code}")
logger.info(
"🏷️ [CODE SEARCH] Code=%s, internal_ref=%s, suffix_fallback=%s",
normalized_magento_code,
internal_ref_hint,
suffix_code,
)
else:
logger.info("🏷️ [CODE SEARCH] Code=%s, internal_ref=%s", normalized_magento_code, internal_ref_hint)
extra_where = ""
if extra_filters:
extra_where = " AND " + " AND ".join(extra_filters)
sql = f""" sql = f"""
WITH resolved_family AS (
SELECT DISTINCT internal_ref_code
FROM shared_source.magento_product_dimension_with_text_embedding
WHERE UPPER(internal_ref_code) = %s
OR UPPER(magento_ref_code) = %s
OR UPPER(product_color_code) = %s
OR UPPER(product_color_code) LIKE %s
OR REPLACE(UPPER(internal_ref_code), '-', '') LIKE %s
OR REPLACE(UPPER(product_color_code), '-', '') LIKE %s
OR REPLACE(UPPER(magento_ref_code), '-', '') LIKE %s
OR REPLACE(UPPER(product_color_code), '-', '') LIKE %s
)
SELECT SELECT
magento_ref_code, magento_ref_code, product_color_code, product_name,
product_color_code, master_color, product_color_name,
product_name, product_image_url_thumbnail, product_web_url,
master_color, description_text_full, sale_price, original_price, discount_amount,
product_image_url_thumbnail,
product_web_url,
description_text,
sale_price,
original_price,
discount_amount,
ROUND(((original_price - sale_price) / original_price * 100), 0) as discount_percent, ROUND(((original_price - sale_price) / original_price * 100), 0) as discount_percent,
age_by_product, age_by_product, gender_by_product, product_line_vn,
gender_by_product, quantity_sold, size_scale,
product_line_vn,
quantity_sold,
size_scale,
1.0 as similarity_score 1.0 as similarity_score
FROM shared_source.magento_product_dimension_with_text_embedding FROM {PRODUCT_SEARCH_TABLE}
WHERE internal_ref_code IN (SELECT internal_ref_code FROM resolved_family){extra_where} WHERE UPPER(magento_ref_code) = %s
OR UPPER(magento_ref_code) = %s
OR UPPER(product_color_code) = %s
OR UPPER(product_color_code) LIKE %s
""" """
return sql, sql_params return sql, sql_params
# ================================================================================================================== # ──────────────────────────────────────────────────────────
# CASE 2: DISCOVERY — Hàng mới / Bán chạy (Direct SQL, no embedding) # CASE 2: DISCOVERY — Hàng mới / Bán chạy (Direct SQL)
# =============================================================================================================== # ──────────────────────────────────────────────────────────
discovery_mode = getattr(params, "discovery_mode", None) discovery_mode = getattr(params, "discovery_mode", None)
if discovery_mode: if discovery_mode:
discovery_mode = discovery_mode.lower().strip() discovery_mode = discovery_mode.lower().strip()
sql_params: list = [] sql_params: list = []
# Metadata filters (gender + age)
where_clauses = _get_metadata_clauses(params, sql_params) where_clauses = _get_metadata_clauses(params, sql_params)
# Price filters
where_clauses.extend(_get_price_clauses(params, sql_params)) where_clauses.extend(_get_price_clauses(params, sql_params))
# Discovery-specific WHERE + ORDER
if discovery_mode == "new": if discovery_mode == "new":
where_clauses.append("is_new_product = 1") where_clauses.append("is_new_product = 1")
order_by = "quantity_sold DESC, magento_ref_code" order_by = "quantity_sold DESC, magento_ref_code"
logger.info("🆕 [DISCOVERY] New products, filters=%s", where_clauses) logger.info("🆕 [DISCOVERY] New products")
elif discovery_mode == "best_seller": elif discovery_mode == "best_seller":
where_clauses.append("quantity_sold > 0") where_clauses.append("quantity_sold > 0")
order_by = "quantity_sold DESC, magento_ref_code" order_by = "quantity_sold DESC, magento_ref_code"
logger.info("🔥 [DISCOVERY] Best sellers, filters=%s", where_clauses) logger.info("🔥 [DISCOVERY] Best sellers")
else: else:
discovery_mode = None discovery_mode = None
...@@ -250,24 +207,13 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None) ...@@ -250,24 +207,13 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None)
where_str = " AND ".join(where_clauses) if where_clauses else "1=1" where_str = " AND ".join(where_clauses) if where_clauses else "1=1"
sql = f""" sql = f"""
SELECT SELECT
internal_ref_code, internal_ref_code, magento_ref_code, product_color_code,
magento_ref_code, product_name, master_color, product_image_url_thumbnail,
product_color_code, product_web_url, sale_price, original_price, discount_amount,
product_name,
master_color,
product_image_url_thumbnail,
product_web_url,
sale_price,
original_price,
discount_amount,
ROUND(((original_price - sale_price) / original_price * 100), 0) as discount_percent, ROUND(((original_price - sale_price) / original_price * 100), 0) as discount_percent,
age_by_product, age_by_product, gender_by_product, product_line_vn,
gender_by_product, quantity_sold, description_text_full, size_scale
product_line_vn, FROM {PRODUCT_SEARCH_TABLE}
quantity_sold,
description_text,
size_scale
FROM shared_source.magento_product_dimension_with_text_embedding
WHERE {where_str} WHERE {where_str}
ORDER BY {order_by} ORDER BY {order_by}
LIMIT 20 LIMIT 20
...@@ -275,9 +221,9 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None) ...@@ -275,9 +221,9 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None)
logger.info("⚡ [DISCOVERY] Direct SQL — no embedding") logger.info("⚡ [DISCOVERY] Direct SQL — no embedding")
return sql, sql_params return sql, sql_params
# ============================================================ # ──────────────────────────────────────────────────────────
# CASE 3: SEMANTIC VECTOR SEARCH # CASE 3: SEMANTIC VECTOR SEARCH
# ============================================================ # ──────────────────────────────────────────────────────────
query_text = getattr(params, "description", None) query_text = getattr(params, "description", None)
if query_text and query_vector is None: if query_text and query_vector is None:
query_vector = await create_embedding_async(query_text) query_vector = await create_embedding_async(query_text)
...@@ -285,55 +231,36 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None) ...@@ -285,55 +231,36 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None)
if not query_vector: if not query_vector:
return "", [] return "", []
# Vector params
v_str = "[" + ",".join(str(v) for v in query_vector) + "]" v_str = "[" + ",".join(str(v) for v in query_vector) + "]"
# Collect All Filters
sql_params: list = [] sql_params: list = []
# 1. Price # Collect all filters
price_clauses = _get_price_clauses(params, sql_params) all_clauses = _get_price_clauses(params, sql_params) + _get_metadata_clauses(params, sql_params)
# 2. Metadata: Gender + Age + Color (HARD FILTER — all at SQL level)
metadata_clauses = _get_metadata_clauses(params, sql_params)
all_clauses = price_clauses + metadata_clauses
# Discovery mode filters
discovery_mode = getattr(params, "discovery_mode", None) discovery_mode = getattr(params, "discovery_mode", None)
if discovery_mode: if discovery_mode:
discovery_mode = discovery_mode.lower().strip() discovery_mode = discovery_mode.lower().strip()
if discovery_mode == "new": if discovery_mode == "new":
all_clauses.append("is_new_product = 1") all_clauses.append("is_new_product = 1")
logger.info("🆕 [SQL FILTER] Discovery: new products only")
elif discovery_mode == "best_seller": elif discovery_mode == "best_seller":
all_clauses.append("quantity_sold > 0") all_clauses.append("quantity_sold > 0")
logger.info("🔥 [SQL FILTER] Discovery: best sellers")
# Get discount params # Discount filters
discount_min, discount_max = _get_discount_params(params) discount_min, discount_max = _get_discount_params(params)
post_filter = list(all_clauses)
post_filter_conditions = []
# Price + Gender + Age filters
if all_clauses:
post_filter_conditions.extend(all_clauses)
# Discount filters
if discount_min is not None or discount_max is not None: if discount_min is not None or discount_max is not None:
post_filter_conditions.append("sale_price < original_price") # Ensure has discount post_filter.append("sale_price < original_price")
if discount_min is not None: if discount_min is not None:
post_filter_conditions.append("discount_percent >= %s") post_filter.append("discount_percent >= %s")
sql_params.append(discount_min) sql_params.append(discount_min)
if discount_max is not None: if discount_max is not None:
post_filter_conditions.append("discount_percent <= %s") post_filter.append("discount_percent <= %s")
sql_params.append(discount_max) sql_params.append(discount_max)
post_filter_where = "" post_filter_where = (" WHERE " + " AND ".join(post_filter)) if post_filter else ""
if post_filter_conditions:
post_filter_where = " WHERE " + " AND ".join(post_filter_conditions)
# Determine sort order: best_seller uses quantity_sold, otherwise similarity_score # Sort order
if discovery_mode == "best_seller": if discovery_mode == "best_seller":
final_order = "ORDER BY max_sold DESC, max_score DESC" final_order = "ORDER BY max_sold DESC, max_score DESC"
extra_agg = ",\n MAX(quantity_sold) as max_sold" extra_agg = ",\n MAX(quantity_sold) as max_sold"
...@@ -344,28 +271,17 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None) ...@@ -344,28 +271,17 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None)
sql = f""" sql = f"""
WITH vector_matches AS ( WITH vector_matches AS (
SELECT /*+ SET_VAR(ann_params='{{"ef_search":256}}') */ SELECT /*+ SET_VAR(ann_params='{{"ef_search":256}}') */
internal_ref_code, internal_ref_code, magento_ref_code, product_color_code,
magento_ref_code, product_name, master_color, product_color_name,
product_color_code, product_image_url_thumbnail, product_web_url,
product_name, sale_price, original_price, discount_amount,
master_color,
product_color_name,
product_image_url_thumbnail,
product_web_url,
sale_price,
original_price,
discount_amount,
ROUND(((original_price - sale_price) / original_price * 100), 0) as discount_percent, ROUND(((original_price - sale_price) / original_price * 100), 0) as discount_percent,
age_by_product, age_by_product, gender_by_product,
gender_by_product, product_line_vn, product_line_en,
product_line_vn, description_text_full, size_scale,
product_line_en, quantity_sold, is_new_product,
description_text,
size_scale,
quantity_sold,
is_new_product,
approx_cosine_similarity(vector, {v_str}) as similarity_score approx_cosine_similarity(vector, {v_str}) as similarity_score
FROM shared_source.magento_product_dimension_with_text_embedding FROM {PRODUCT_SEARCH_TABLE}
ORDER BY similarity_score DESC ORDER BY similarity_score DESC
LIMIT 200 LIMIT 200
), ),
...@@ -377,22 +293,22 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None) ...@@ -377,22 +293,22 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None)
) )
SELECT SELECT
internal_ref_code, internal_ref_code,
MAX_BY(magento_ref_code, similarity_score) as magento_ref_code, MAX_BY(magento_ref_code, similarity_score) as magento_ref_code,
product_color_code, product_color_code,
MAX_BY(product_name, similarity_score) as product_name, MAX_BY(product_name, similarity_score) as product_name,
MAX_BY(master_color, similarity_score) as master_color, MAX_BY(master_color, similarity_score) as master_color,
MAX_BY(product_image_url_thumbnail, similarity_score) as product_image_url_thumbnail, MAX_BY(product_image_url_thumbnail, similarity_score) as product_image_url_thumbnail,
MAX_BY(product_web_url, similarity_score) as product_web_url, MAX_BY(product_web_url, similarity_score) as product_web_url,
MAX_BY(sale_price, similarity_score) as sale_price, MAX_BY(sale_price, similarity_score) as sale_price,
MAX_BY(original_price, similarity_score) as original_price, MAX_BY(original_price, similarity_score) as original_price,
MAX_BY(discount_amount, similarity_score) as discount_amount, MAX_BY(discount_amount, similarity_score) as discount_amount,
MAX_BY(discount_percent, similarity_score) as discount_percent, MAX_BY(discount_percent, similarity_score) as discount_percent,
MAX_BY(description_text, similarity_score) as description_text, MAX_BY(description_text_full, similarity_score) as description_text_full,
MAX_BY(gender_by_product, similarity_score) as gender_by_product, MAX_BY(gender_by_product, similarity_score) as gender_by_product,
MAX_BY(age_by_product, similarity_score) as age_by_product, MAX_BY(age_by_product, similarity_score) as age_by_product,
MAX_BY(product_line_vn, similarity_score) as product_line_vn, MAX_BY(product_line_vn, similarity_score) as product_line_vn,
MAX_BY(quantity_sold, similarity_score) as quantity_sold, MAX_BY(quantity_sold, similarity_score) as quantity_sold,
MAX_BY(size_scale, similarity_score) as size_scale, MAX_BY(size_scale, similarity_score) as size_scale,
MAX(similarity_score) as max_score{extra_agg} MAX(similarity_score) as max_score{extra_agg}
FROM filtered_matches FROM filtered_matches
GROUP BY product_color_code, internal_ref_code GROUP BY product_color_code, internal_ref_code
...@@ -401,21 +317,20 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None) ...@@ -401,21 +317,20 @@ async def build_starrocks_query(params, query_vector: list[float] | None = None)
""" """
# try: # try:
# import os
# query_log_path = os.path.join(os.path.dirname(__file__), "note/query.txt") # query_log_path = os.path.join(os.path.dirname(__file__), "note/query.txt")
# # Build executable query by substituting %s with actual values
# executable_sql = sql # executable_sql = sql
# for param in sql_params: # for param in sql_params:
# if isinstance(param, str): # if isinstance(param, str):
# # Escape single quotes and wrap in quotes
# escaped = param.replace("'", "''") # escaped = param.replace("'", "''")
# executable_sql = executable_sql.replace("%s", f"'{escaped}'", 1) # executable_sql = executable_sql.replace("%s", f"'{escaped}'", 1)
# else: # else:
# executable_sql = executable_sql.replace("%s", str(param), 1) # executable_sql = executable_sql.replace("%s", str(param), 1)
# with open(query_log_path, "w", encoding="utf-8") as f: # with open(query_log_path, "w", encoding="utf-8") as f:
# f.write(f"-- [HYDE SEARCH] Full Executable Query\n-- Original Params: {sql_params}\n{executable_sql}") # f.write(f"-- [SEARCH] Full Executable Query\n-- Original Params: {sql_params}\n{executable_sql}")
# except Exception as e: # except Exception as e:
# logger.error(f"Error writing to query.txt: {e}") # logger.error("Error writing to query.txt: %s", e)
return sql, sql_params return sql, sql_params
...@@ -3,7 +3,6 @@ import logging ...@@ -3,7 +3,6 @@ import logging
from langchain_core.tools import tool from langchain_core.tools import tool
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from agent.prompt_utils import read_tool_prompt
from common.starrocks_connection import get_db_connection from common.starrocks_connection import get_db_connection
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -33,23 +32,55 @@ async def canifa_store_search(location: str) -> str: ...@@ -33,23 +32,55 @@ async def canifa_store_search(location: str) -> str:
clean = clean.replace(prefix, "") clean = clean.replace(prefix, "")
clean = clean.strip() clean = clean.strip()
if not clean: # Tách thành tokens, deduplicate (giữ thứ tự)
# VD: "hà đông, hà nội" → ["hà", "đông", "nội"]
tokens = list(dict.fromkeys(
t for t in clean.replace(',', ' ').split() if t.strip()
))
if not tokens:
return "Vui lòng cho em biết khu vực bạn muốn tìm cửa hàng CANIFA (ví dụ: Hoàng Mai, Cầu Giấy, Đà Nẵng...)." return "Vui lòng cho em biết khu vực bạn muốn tìm cửa hàng CANIFA (ví dụ: Hoàng Mai, Cầu Giấy, Đà Nẵng...)."
# Search trên các cột structured: city, state, address, store_name # Search trên concat tất cả cột địa chỉ
sql = f""" text_col = "LOWER(concat_ws(' ', store_name, address, city, state))"
SELECT store_name, address, city, state, phone_number,
schedule_name, time_open_today, time_close_today def _build_sql(where_clause: str) -> str:
FROM {STORE_TABLE} return f"""
WHERE LOWER(city) LIKE '%{clean}%' SELECT store_name, address, city, state, phone_number,
OR LOWER(state) LIKE '%{clean}%' schedule_name, time_open_today, time_close_today
OR LOWER(address) LIKE '%{clean}%' FROM {STORE_TABLE}
OR LOWER(store_name) LIKE '%{clean}%' WHERE {where_clause}
ORDER BY state, city, store_name ORDER BY state, city, store_name
LIMIT 20 LIMIT 20
""" """
results = await sr.execute_query_async(sql) # ═══════════════════════════════════════════════
# Step 1: AND tất cả tokens (strict match)
# "hà đông hà nội" → tokens ["hà","đông","nội"] → AND → 5 stores ✓
# ═══════════════════════════════════════════════
and_conds = [f"{text_col} LIKE '%{tk}%'" for tk in tokens]
results = await sr.execute_query_async(_build_sql(' AND '.join(and_conds)))
# ═══════════════════════════════════════════════
# Step 2: Fallback — Reverse LIKE
# Dùng chính DB làm từ điển địa danh:
# Kiểm tra tên quận/huyện/tỉnh nào trong DB XUẤT HIỆN trong input user
# "hà đông cầu giấy" chứa "hà đông" (city) + "cầu giấy" (city) → lấy CẢ 2
# ═══════════════════════════════════════════════
if not results and len(tokens) >= 2:
# Strip prefix khỏi city: "Quận Hà Đông" → "hà đông"
city_stripped = """LOWER(TRIM(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
city, 'Quận ', ''), 'Huyện ', ''), 'Thành phố ', ''), 'Thị xã ', ''), 'TP. ', '')))"""
state_lower = "LOWER(TRIM(state))"
fallback_where = f"""
(LOCATE({city_stripped}, '{clean}') > 0 AND LENGTH({city_stripped}) > 1)
OR
(LOCATE({state_lower}, '{clean}') > 0 AND LENGTH({state_lower}) > 1)
"""
results = await sr.execute_query_async(_build_sql(fallback_where))
logger.info(f"📊 Store search: reverse-LIKE fallback for '{clean}'")
logger.info(f"📊 Store search: {len(results)} stores found for '{location}'") logger.info(f"📊 Store search: {len(results)} stores found for '{location}'")
if not results: if not results:
...@@ -86,4 +117,3 @@ async def canifa_store_search(location: str) -> str: ...@@ -86,4 +117,3 @@ async def canifa_store_search(location: str) -> str:
return "Tôi đang gặp khó khăn khi tìm kiếm cửa hàng. Bạn có thể liên hệ hotline 1800 6061 để được hỗ trợ." return "Tôi đang gặp khó khăn khi tìm kiếm cửa hàng. Bạn có thể liên hệ hotline 1800 6061 để được hỗ trợ."
canifa_store_search.__doc__ = read_tool_prompt("store_search_tool") or canifa_store_search.__doc__
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment