from fastapi import APIRouter, HTTPException, Request, Depends
from fastapi.responses import RedirectResponse
import openai
from openai import OpenAI
import re
import boto3
import os
import time
from schemas import ChatMessage, SpeechText, QuizMessage, LineUser, MedicineText, UserQuestion
from config import logger, openai_api_key
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import logging
import requests
from bs4 import BeautifulSoup
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from urllib.parse import urljoin, urlparse
import time
import ast
from datetime import datetime
import chromadb
from chromadb.config import Settings

# Chroma 서버 연결 (1.x 서버 실행중이어야 함)
chroma_client = chromadb.HttpClient(
    host="localhost",
    port=8001,
    settings=Settings(chroma_api_impl="rest")
)

# 컬렉션명은 기존에 저장한 이름 사용
collection_profile = chroma_client.get_collection("segue_profile_split3")
# collection_excel = chroma_client.get_collection("excel_honbun")

router = APIRouter()

openai.api_key = openai_api_key
client = OpenAI(api_key=openai_api_key)

s3_client = boto3.client('s3')
bucket_name = 'shanri-ai-chatbot-for-text-to-speech'


async def ask_openai(messages):
    valid_messages = [msg for msg in messages if msg.get('content')]
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=valid_messages
    )
    answer = response.choices[0].message.content.strip()
    return answer


async def synthesize_speech(text, user_id):
    response = client.audio.speech.create(
        model="tts-1",
        voice="nova",
        input=text,
    )
    audio_file = f"tmp/audio-{user_id}-{time.time()}.mp3"
    with open(audio_file, 'wb') as f:
        for chunk in response.iter_bytes():
            f.write(chunk)
    s3_key = f"{user_id}-{time.time()}.mp3"
    s3_client.upload_file(audio_file, bucket_name, s3_key)
    os.remove(audio_file)
    return f"https://{bucket_name}.s3.amazonaws.com/{s3_key}"



async def search_profile_context(question):
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=question
    )
    query_embedding = response.data[0].embedding

    results = collection_profile.query(
        query_embeddings=[query_embedding],
        n_results=1
    )

    retrieved_docs = results['documents'][0]
    profile_context = "\n\n".join(retrieved_docs)
    return profile_context


# async def search_excel_context(question):
#     response = client.embeddings.create(
#         model="text-embedding-3-small",
#         input=question
#     )
#     query_embedding = response.data[0].embedding

#     results = collection_excel.query(
#         query_embeddings=[query_embedding],
#         n_results=1
#     )

#     retrieved_docs = results['documents'][0]
#     excel_context = "\n\n".join(retrieved_docs)
#     return excel_context

# ✅ GPT에게 질문하고 답변을 생성하는 함수 (웹 크롤링 + OpenAI 웹 검색 기능 활용)
async def generate_gpt_answer(question):
    """GPT-4o를 사용해 벡터DB의 웹사이트 데이터, 회사제공 엑셀 데이터 정보를 포함한 답변 생성"""

    # 질문에 "御社の"가 없으면 추가
    if not question.startswith("御社の"):
        question = "御社の" + question

    print(datetime.now(),"크롤링시작")
    # profile 검색
    profile_data = await search_profile_context(question)
    print("profile_data : " + profile_data)
    print(datetime.now(),"크롤링끝")

    # print(datetime.now(),"Excel 검색 시작")
    # excel_data = await search_excel_context(question)
    # print("excel_data : " + excel_data)
    # print(datetime.now(),"Excel 검색 끝")

    messages = [
        {"role": "system", "content": "あなたは「セグエグループ」の会社紹介および就職希望者向けのチャットボットです。"},
        {"role": "system", "content": "以下のルールに従って、ユーザーと会話してください。"},
        {"role": "system", "content": "1.会話スタイル：SNS上のやり取りのように、親しみやすく、1回の返信は300文字以内に収めてください。"},
        {"role": "system", "content": "2.トーン：丁寧で共感的な言葉遣いを心がけ、ユーザーとの会話を楽しむように、適度に質問を返してください。"},
        {"role": "system", "content": "3.情報の制限：返答に使用できるのは、提供されたURLとExcelファイルに含まれる情報のみとします。その他の情報は使用しないでください。"},
        {"role": "system", "content": "4.数値データ形式：数値がある場合は、マークダウン形式の表で表示してください。"},
        {"role": "system", "content": "この制約のもと、ユーザーに寄り添いながら会話を進めてください。"},
    ]

    # 웹사이트 데이터와 엑셀 데이터 양쪽을 모두 사용하여 답변을 생성
    if profile_data:
        messages.append({"role": "system", "content": f"企業サイト情報\n{profile_data}"})

    # if excel_data:
    #     messages.append({"role": "system", "content": f"Excelファイルに記載されている情報\n{excel_data}"})


    messages.append({"role": "user", "content": question})

    print(datetime.now(),"대답생성 시작")
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages
    )
    print(datetime.now(),"대답생성 종료")

    answer = response.choices[0].message.content
    print("대답 : " + answer)
    return answer

@router.get("/health")
async def health_check():
    return {"status": "healthy"}


@router.post("/apige/speech")
async def speech(speech_text: SpeechText):
    text = speech_text.text
    chat_token = speech_text.chat_token
    if not text:
        raise HTTPException(status_code=400, detail="Text is required")
    audio_file = await synthesize_speech(text, chat_token)
    return {"audio_file": audio_file}


# 질문을 받아서 처리하는 gemini API
@router.post('/apige/ask_question')
async def gemini_question(user_question: UserQuestion):
    question_text = user_question.question.strip()
    if not question_text:
        raise HTTPException(status_code=400, detail="Question is required")

    generated_answer = await generate_gpt_answer(question_text)
    return {"answer": generated_answer}


