import pandas as pd
import openai
from openai import OpenAI
import chromadb
import time
from config import logger, openai_api_key
from chromadb.config import Settings

# ① ChromaDB 서버 연결
client = chromadb.HttpClient(
    host="localhost",
    port=8001,
    settings=Settings(chroma_api_impl="rest")
)

# ② 컬렉션 설정
collection_name = "segue_qna_by_question4"
try:
    collection = client.get_collection(collection_name)
except:
    collection = client.create_collection(collection_name)

# ③ 엑셀 데이터 불러오기
df = pd.read_excel('segue_qna.xlsx')

# ④ 전처리: question → document, answer → metadata
documents = []
metadatas = []
ids = []

for idx, row in df.iterrows():
    question = str(row.get('question', '')).strip()
    answer = str(row.get('answer', '')).strip()

    if not question or not answer:
        continue

    documents.append(question)  # ✅ 검색 대상이 되는 질문
    metadatas.append({"answer": answer})  # ✅ 검색 결과로 리턴할 답변
    ids.append(f"qna_{idx}")

# ⑤ OpenAI 임베딩 생성기 초기화
openai.api_key = openai_api_key
client_openai = OpenAI(api_key=openai_api_key)

# ⑥ ChromaDB에 저장
for idx, question in enumerate(documents):
    try:
        response = client_openai.embeddings.create(
            model="text-embedding-3-large",
            input=question
        )
        embedding = response.data[0].embedding

        collection.add(
            ids=[ids[idx]],
            documents=[question],
            embeddings=[embedding],
            metadatas=[metadatas[idx]]
        )

        print(f"[{idx+1}/{len(documents)}] 저장 완료")
        time.sleep(0.5)

    except Exception as e:
        print(f"[{idx}] 오류 발생: {e}")
