# -*- coding: utf-8 -*-
"""
Scraper diário: Hacico (filtro <=10 €; comprimento 70–150mm).
Guarda em SQLite (data/products.sqlite, tabela 'products').
Usar via cron no cPanel.
"""
import os, re, time, sqlite3
from urllib.parse import urlencode
import requests
from bs4 import BeautifulSoup
from datetime import datetime

BASE_URL = "https://www.hacico.de/zigarren"
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
DB_PATH = os.path.join(DATA_DIR, "products.sqlite")
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0 Safari/537.36"}

def parse_price(text):
    if not text:
        return None
    s = re.sub(r"[^\d,\.]", "", text)
    if "," in s and s.count(",") == 1:
        s = s.replace(".", "").replace(",", ".")
    else:
        s = s.replace(",", "")
    try:
        return float(s)
    except:
        return None

def build_params(offset=0, price_max=10, len_min=70, len_max=150):
    params = {
        "Preisvon": "",
        "Preisbis": str(price_max),
        "Laengevon": str(len_min),
        "Laengebis": str(len_max),
        "DMvon": "",
        "DMbis": "",
        "SM2von": "",
        "SM2bis": "",
        "SM4von": "",
        "SM4bis": "",
        "mySMKategorie": "2",
    }
    if offset > 0:
        params["von"] = str(offset)
    return params

def parse_products(html):
    soup = BeautifulSoup(html, "html.parser")
    items = []
    containers = soup.select(".product-wrapper, .product, .product-grid-item, li.product")
    for c in containers:
        name_el = c.select_one(".product-title, .title a, h2 a, a.product-title")
        name = name_el.get_text(strip=True) if name_el else None
        link = name_el["href"] if name_el and name_el.has_attr("href") else None
        if link and link.startswith("/"):
            link = "https://www.hacico.de" + link
        price_el = c.select_one(".product-price, .price, .price-new, .product-price__current")
        price_txt = price_el.get_text(" ", strip=True) if price_el else None
        price_val = parse_price(price_txt) if price_txt else None
        if name and price_txt:
            items.append(("Hacico", name, price_val, price_txt, link, datetime.utcnow().isoformat()))
    return items

def ensure_db():
    os.makedirs(DATA_DIR, exist_ok=True)
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("""
        CREATE TABLE IF NOT EXISTS products (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            shop TEXT,
            name TEXT,
            price_eur REAL,
            price_text TEXT,
            url TEXT,
            inserted_at TEXT
        )
    """)
    c.execute("CREATE INDEX IF NOT EXISTS idx_products_name ON products(name)")
    c.execute("CREATE INDEX IF NOT EXISTS idx_products_price ON products(price_eur)")
    conn.commit()
    conn.close()

def save_products(rows):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.executemany("""
        INSERT INTO products (shop, name, price_eur, price_text, url, inserted_at)
        VALUES (?, ?, ?, ?, ?, ?)
    """, rows)
    conn.commit()
    conn.close()

def main():
    ensure_db()
    session = requests.Session()
    all_rows = []
    seen = set()
    for page_idx in range(0, 80):  # safety limit
        offset = page_idx * 150
        params = build_params(offset)
        url = f"{BASE_URL}?{urlencode(params)}"
        resp = session.get(url, headers=HEADERS, timeout=25)
        if resp.status_code != 200:
            break
        items = parse_products(resp.text)
        if not items:
            break
        new = 0
        for it in items:
            key = (it[0], it[1], it[4])  # shop, name, url
            if key not in seen:
                seen.add(key)
                all_rows.append(it)
                new += 1
        if new < 5:
            break
        time.sleep(1.5)

    if all_rows:
        save_products(all_rows)
        print(f"[OK] Inseridos {len(all_rows)} registos.")
    else:
        print("[WARN] Sem novos registos.")

if __name__ == "__main__":
    main()
