import pandas as pd
import openai
from openai import OpenAI
import chromadb
import time
from config import logger, openai_api_key
from chromadb.config import Settings

# ① ChromaDB HTTP 서버로 연결 (1.x에서는 서버 따로 띄워져 있어야 함)
client = chromadb.HttpClient(
    host="localhost",
    port=8000,
    settings=Settings(chroma_api_impl="rest")
)

# 컬렉션 생성 또는 가져오기
collection_name = "excel_本文"
if collection_name not in [c.name for c in client.list_collections()]:
    collection = client.create_collection(collection_name)
else:
    collection = client.get_collection(collection_name)

# ② 엑셀 파일 읽기
df = pd.read_excel('山地真人著書.xlsx', sheet_name='本文')
texts = df['本文'].dropna().tolist()

# ③ OpenAI API 준비
openai.api_key = openai_api_key
client_openai = OpenAI(api_key=openai_api_key)

# ④ 데이터 삽입
for idx, text in enumerate(texts):
    try:
        response = client_openai.embeddings.create(
            model="text-embedding-3-small",
            input=text
        )
        embedding = response.data[0].embedding

        collection.add(
            ids=[str(idx)],
            documents=[text],
            embeddings=[embedding]
        )

        print(f"[{idx+1}/{len(texts)}] 등록 완료")

        time.sleep(0.5)  # (속도 조절: 너무 빠르면 API rate limit 방지)

    except Exception as e:
        print(f"[{idx}] 에러 발생: {e}")