import pandas as pd
import openai
from openai import OpenAI
import chromadb
import time
from config import logger, openai_api_key
from chromadb.config import Settings

# ① ChromaDB HTTP 서버 연결
client = chromadb.HttpClient(
    host="localhost",
    port=8001,
    settings=Settings(chroma_api_impl="rest")
)

# ② 새 컬렉션명 정의
collection_name = "segue_saved_qna13"
try:
    collection = client.get_collection(collection_name)
except:
    collection = client.create_collection(collection_name)

# ③ 엑셀 읽기
df = pd.read_excel("segue_saved_qna.xlsx")

# ④ 데이터 전처리
questions = []
answers = []
metadatas = []
ids = []

for idx, row in df.iterrows():
    q = str(row.get("question", "")).strip()
    a = str(row.get("answer", "")).strip()
    if not q:
        continue

    questions.append(q)
    answers.append(a)
    metadatas.append({"answer": a})
    ids.append(f"qna_{idx}")

# ⑤ OpenAI API 준비
openai.api_key = openai_api_key
client_openai = OpenAI(api_key=openai_api_key)

# ⑥ 임베딩 및 저장
for idx, q in enumerate(questions):
    try:
        response = client_openai.embeddings.create(
            model="text-embedding-3-small",
            input=q
        )
        embedding = response.data[0].embedding

        collection.add(
            ids=[ids[idx]],
            documents=[q],
            embeddings=[embedding],
            metadatas=[metadatas[idx]]
        )

        print(f"[{idx + 1}/{len(questions)}] 登録完了")
        time.sleep(0.5)

    except Exception as e:
        print(f"[{idx}] エラー発生: {e}")
