import pandas as pd
import openai
from openai import OpenAI
import chromadb
import time
from config import logger, openai_api_key
from chromadb.config import Settings

# ① ChromaDB HTTP 서버로 연결
client = chromadb.HttpClient(
    host="localhost",
    port=8001,  # 서버에 맞춰 포트 확인
    settings=Settings(chroma_api_impl="rest")
)

# ② 컬렉션명 정의 (신규 생성)
collection_name = "sanwa_profile"
try:
    collection = client.get_collection(collection_name)
except:
    collection = client.create_collection(collection_name)

# ③ 엑셀 데이터 읽기
df = pd.read_excel('sanwa_profile.xlsx')

# ④ 데이터 전처리 및 임베딩 준비
texts = []
metadatas = []
ids = []

for idx, row in df.iterrows():
    content = str(row.get('content', '')).strip()
    page_type = str(row.get('page_type', '')).strip()
    url = str(row.get('url', '')).strip()

    texts.append(content)
    metadatas.append({
        "page_type": page_type,
        "url": url
    })
    ids.append(f"page_{idx}")

# ⑤ OpenAI API 준비
openai.api_key = openai_api_key
client_openai = OpenAI(api_key=openai_api_key)

# ⑥ ChromaDB에 임베딩 + 등록
for idx, text in enumerate(texts):
    try:
        response = client_openai.embeddings.create(
            model="text-embedding-3-small",
            input=text
        )
        embedding = response.data[0].embedding

        collection.add(
            ids=[ids[idx]],
            documents=[text],
            embeddings=[embedding],
            metadatas=[metadatas[idx]]
        )

        print(f"[{idx+1}/{len(texts)}] 登録完了")
        time.sleep(0.5)  # API 제한 방지용

    except Exception as e:
        print(f"[{idx}] エラー発生: {e}")
