# -*- coding: utf-8 -*-
# 言問名店マスターの lat 未設定店を国土地理院AddressSearch（→Nominatimフォールバック）でジオコーディングし、
# パッチJSON（geo-patch.json）を出力する。マスターは書き換えない。
import json, time, re, urllib.request, urllib.parse, sys
sys.stdout.reconfigure(encoding="utf-8")

MASTER = r"C:\Users\daito\company\projects\kototoi-sanpo-editorial\meiten-business\data\meiten-master.json"
OUT    = r"C:\Users\daito\company\projects\kototoi-sanpo-editorial\meiten-business\data\geo-patch.json"

UA = "kotosan-meiten-geocode/1.0 (kototoisanpo.com)"

# 台東区中心の妥当域（誤ヒット弾き）
LAT_MIN, LAT_MAX = 35.60, 35.75
LNG_MIN, LNG_MAX = 139.74, 139.82

# 番地パターン（丁目-番-号 / 番-号）→ confirmed
BANCHI_RE = re.compile(r"\d+[-－]\d+")
# 住所として使えない＝拠点不明/要確認/営業/拠点 等のプレースホルダ
PLACEHOLDER_RE = re.compile(r"(要確認|拠点|営業|シェア|多店舗|本社所在|区内|登記|広域|地番)")

def in_bounds(lat, lng):
    return LAT_MIN <= lat <= LAT_MAX and LNG_MIN <= lng <= LNG_MAX

def build_query(addr, ward):
    """住所が区を含まなければ ward を、都を含まなければ東京都を前置して正規化"""
    a = addr.strip()
    # 丸括弧の注記を除去（例: 上野5-2-1（…） → 上野5-2-1）
    a = re.sub(r"[（(].*?[)）]", "", a).strip()
    if not a:
        return None
    pref = ""
    if "区" not in a and ward:
        pref = ward
    q = pref + a
    if "東京都" not in q:
        q = "東京都" + q
    return q

def gsi_geocode(q, has_banchi):
    url = "https://msearch.gsi.go.jp/address-search/AddressSearch?q=" + urllib.parse.quote(q)
    try:
        req = urllib.request.Request(url, headers={"User-Agent": UA})
        with urllib.request.urlopen(req, timeout=10) as r:
            data = json.loads(r.read())
        if data and len(data) > 0:
            lng, lat = data[0]["geometry"]["coordinates"][:2]
            lat, lng = round(float(lat), 6), round(float(lng), 6)
            if not in_bounds(lat, lng):
                return None  # 誤ヒット
            status = "confirmed" if has_banchi else "estimated"
            return lat, lng, status, "gsi"
    except Exception:
        pass
    return None

def nominatim_geocode(q, has_banchi):
    url = ("https://nominatim.openstreetmap.org/search?format=json&limit=1&countrycodes=jp&q="
           + urllib.parse.quote(q))
    try:
        req = urllib.request.Request(url, headers={"User-Agent": UA})
        with urllib.request.urlopen(req, timeout=10) as r:
            data = json.loads(r.read())
        if data:
            lat, lng = round(float(data[0]["lat"]), 6), round(float(data[0]["lon"]), 6)
            if not in_bounds(lat, lng):
                return None
            # Nominatimは粗いので原則estimated（番地一致でも保守的に）
            status = "confirmed" if has_banchi else "estimated"
            return lat, lng, status, "nominatim"
    except Exception:
        pass
    return None

def main():
    d = json.load(open(MASTER, encoding="utf-8"))
    stores = d["stores"]
    targets = [x for x in stores
               if x.get("lat") is None
               and x.get("address")
               and str(x.get("address")).strip() not in ("", "不明")]

    patch = {}
    confirmed, estimated, pending = [], [], []

    for x in targets:
        sid = x["storeId"]
        name = x.get("name", "")
        addr = str(x["address"]).strip()
        ward = (x.get("area") or {}).get("ward", "")

        # プレースホルダ住所は即pending（捏造防止）
        cleaned = re.sub(r"[（(].*?[)）]", "", addr).strip()
        if PLACEHOLDER_RE.search(addr) or not cleaned:
            pending.append((sid, name, "住所がプレースホルダ"))
            continue

        q = build_query(addr, ward)
        if not q or len(q) < 5:
            pending.append((sid, name, "住所が短すぎ"))
            continue

        has_banchi = bool(BANCHI_RE.search(cleaned))

        res = gsi_geocode(q, has_banchi)
        if res is None:
            time.sleep(1.0)  # Nominatim 礼儀（1req/秒）
            res = nominatim_geocode(q, has_banchi)

        if res is None:
            pending.append((sid, name, "ジオコード0件/誤ヒット"))
            time.sleep(0.4)
            continue

        lat, lng, status, src = res
        patch[sid] = {"lat": lat, "lng": lng, "geocodeStatus": status, "geocodeSource": src}
        (confirmed if status == "confirmed" else estimated).append((sid, name))
        time.sleep(0.4)  # GSI 礼儀

    json.dump(patch, open(OUT, "w", encoding="utf-8"), ensure_ascii=False, indent=2)

    print("=== RESULT ===")
    print(f"対象 {len(targets)}件 / confirmed {len(confirmed)} / estimated {len(estimated)} / pending {len(pending)}")
    print("\n--- confirmed ---")
    for sid, nm in confirmed: print(f"  {sid} {nm}")
    print("\n--- estimated ---")
    for sid, nm in estimated: print(f"  {sid} {nm}")
    print("\n--- pending ---")
    for sid, nm, why in pending: print(f"  {sid} {nm} ({why})")

if __name__ == "__main__":
    main()
