from fastapi import APIRouter, HTTPException, Request, Depends
from fastapi.responses import RedirectResponse
import openai
from openai import OpenAI
import re
import boto3
import os
import time
from schemas import ChatMessage, SpeechText, QuizMessage, LineUser, MedicineText, UserQuestion
from config import logger, openai_api_key
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import logging
import requests
from bs4 import BeautifulSoup
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from urllib.parse import urljoin, urlparse
import time
import ast

router = APIRouter()

openai.api_key = openai_api_key
client = OpenAI(api_key=openai_api_key)

s3_client = boto3.client('s3')
bucket_name = 'shanri-ai-chatbot-for-text-to-speech'


async def ask_openai(messages):
    valid_messages = [msg for msg in messages if msg.get('content')]
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=valid_messages
    )
    answer = response.choices[0].message.content.strip()
    return answer


async def synthesize_speech(text, user_id):
    response = client.audio.speech.create(
        model="tts-1",
        voice="nova",
        input=text,
    )
    audio_file = f"tmp/audio-{user_id}-{time.time()}.mp3"
    with open(audio_file, 'wb') as f:
        for chunk in response.iter_bytes():
            f.write(chunk)
    s3_key = f"{user_id}-{time.time()}.mp3"
    s3_client.upload_file(audio_file, bucket_name, s3_key)
    os.remove(audio_file)
    return f"https://{bucket_name}.s3.amazonaws.com/{s3_key}"


# # ✅ 특정 웹사이트(ug-inc.net) 크롤링 함수
# async def fetch_website_data(url):
#     """특정 URL에서 정보를 가져옴"""
#     try:
#         headers = {"User-Agent": "Mozilla/5.0"}
#         response = requests.get(url, headers=headers, timeout=10)
#         response.raise_for_status()

#         soup = BeautifulSoup(response.text, "html.parser")
#         paragraphs = soup.find_all("p")
#         text = "\n".join([p.get_text() for p in paragraphs])
#         return text
#     except requests.RequestException as e:
#         print(f"웹사이트 정보 가져오기 실패: {e}")
#         return ""

# ✅ 특정 웹사이트(segue-g) 크롤링 함수
def init_driver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    return webdriver.Chrome(options=options)

def is_same_domain(base_url, test_url):
    return urlparse(base_url).netloc == urlparse(test_url).netloc

async def fetch_all_pages_with_selenium(url_list):
    """url_list에 있는 각 페이지의 sections_group HTML만 크롤링 (내부 링크 X)"""
    driver = init_driver()
    all_html = ""

    try:
        for url in url_list:
            try:
                print(f"\n🟢 크롤링 중: {url}")
                driver.get(url)
                time.sleep(2)
                soup = BeautifulSoup(driver.page_source, "html.parser")

                # ✅ sections_group 클래스를 가진 요소만 가져오기
                sections = soup.find_all(class_="sections_group")

                page_html = ""
                for section in sections:
                    page_html += section.prettify()  # ✅ HTML 코드로 가져오기

                all_html += f"<!-- {url} -->\n{page_html}\n\n"

            except Exception as e:
                print(f"[에러] {url}: {e}")

        return all_html
    finally:
        driver.quit()


# # ✅ 질문과 관련 있는 텍스트 추출 함수
# async def find_relevant_text(question, text_data, max_sentences=20, max_length=2000):
#     """질문과 관련 있는 텍스트를 상위 20개 문장으로 제한"""
#     sentences = text_data.split("\n")
#     vectorizer = TfidfVectorizer().fit_transform([question] + sentences)
#     similarities = (vectorizer * vectorizer.T).toarray()[0][1:]

#     # 유사도가 높은 문장 정렬 후 상위 max_sentences 개만 선택
#     top_indices = np.argsort(similarities)[-max_sentences:]
#     top_sentences = [sentences[i] for i in top_indices]
#     return "\n".join(top_sentences)[:max_length]


# ✅ GPT-4o를 사용해 생성된 답변을 더 자연스럽고 정돈되게 수정 + 마크다운형식
def refine_answer_with_gpt(raw_answer):
    messages = [
        {"role": "system", "content": "以下の内容でこれらの部分を重点に修正してください。"},
        {"role": "system", "content": "企業名の代わりに弊社という表現を使った方がいいと思う。"},
        {"role": "system", "content": "本社のスタッフがお客様に言う感じにしてください。"},
        {"role": "system", "content": "ホームページに記載されている のような言葉は取り除いてください。"},
        {"role": "system", "content": "数値に関する内容が出たら、マークダウン形式の表で作成して答えてくれ。数値情報がなければあえて表にする必要はない。"},
        {"role": "user", "content": raw_answer}
    ]

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages
    )

    return response.choices[0].message.content

async def choose_hompage_url(question):
    """homepage_url을 선택하는 함수"""

    messages = [
        {"role": "system", "content": "質問を分析し、回答を作成する際に必要な情報を持つURLを返す必要があります。質問を見てURLを選択してください。"},
        {"role": "system", "content": "最も情報がある可能性が高いURLを配列で返す必要があります。"},
        {"role": "system", "content": "関数の戻り値のように、配列のみを返す必要があります。 例) ['https://segue-g.jp/company/boardmember/index.html','https://segue-g.jp/ir/results/settle.html']"},
        {"role": "system", "content": "企業理念 : https://segue-g.jp/company/sdgs/index.html"},
        {"role": "system", "content": "IR 情報 : https://segue-g.jp/ir/index.html"},
        {"role": "system", "content": "財務、業績報告、売上 : https://segue-g.jp/ir/results/settle.html"},
        {"role": "system", "content": "株式情報 : https://segue-g.jp/ir/stock/index.html"},
        {"role": "system", "content": "当社の強み : https://segue-g.jp/ir/investor/strong_point/index.html"},
        {"role": "system", "content": "成長戦略 : https://segue-g.jp/ir/investor/strategy/index.html"},
        {"role": "system", "content": "会社概要 : https://segue-g.jp/company/basic/index.html"},
        {"role": "system", "content": "企業理念 : https://segue-g.jp/company/brand/index.html"},
        {"role": "system", "content": "沿革 : https://segue-g.jp/company/history/index.html"},
        {"role": "system", "content": "役員一覧  : https://segue-g.jp/company/boardmember/index.html"},
        {"role": "system", "content": "事業紹介 : https://segue-g.jp/business/index.html"},
        {"role": "user", "content": "質問 : " + question}
    ]

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages
    )

    return response.choices[0].message.content


# ✅ GPT에게 질문하고 답변을 생성하는 함수 (웹 크롤링 + OpenAI 웹 검색 기능 활용)
async def generate_gpt_answer(question):
    """GPT-4o를 사용해 ug-inc.net 크롤링 데이터 + 웹 검색 정보를 포함한 답변 생성"""

    # 질문에 "御社の"가 없으면 추가
    if not question.startswith("御社の"):
        question = "御社の" + question

    url_list_str = await choose_hompage_url(question)
    url_list = ast.literal_eval(url_list_str)

    website_data = await fetch_all_pages_with_selenium(url_list)

    print("크롤링 데이터 " + website_data + "여기까지가 데이터입니다.")

    print("url_list : " + str(url_list))


    # 연관된단어를 찾을경우 수익 : 10억 이 아닌 수익 이라는 단어만을 가져와서 문제가 됨. 크롤링한 데이터 전체를 리턴하도록 바꿈
    # relevant_text = await find_relevant_text(question, website_data) if website_data else ""

    # print("segue-g 사이트 크롤링 결과: " + website_data)

    # ✅ OpenAI 웹 검색 실행
    web_search_response = client.responses.create(
            model="gpt-4o",
            tools=[{"type": "web_search_preview"}],
            input="株式会社 セグエグループ " + question
    )

    web_search_text = web_search_response.output_text if web_search_response.output_text else ""

    print("웹 검색결과: " + web_search_text)

    messages = [
        {"role": "system", "content": "与えられた質問を見て、関連情報を取得して回答を作成してください。"},
        {"role": "system", "content": "回答を生成するときに、企業公式サイトの情報とウェブ検索の情報が同じ情報を持っている場合は、それを参照してください。"},
        {"role": "system", "content": "情報の内容が異なる場合は、企業公式サイトの情報を優先してください。"},
        {"role": "system", "content": "回答に何を参照したかは言わないでください。"}
    ]

    # 웹사이트 데이터와 웹 검색 양쪽을 모두 사용하여 답변을 생성
    if website_data:
        messages.append({"role": "system", "content": f"以下の企業公式サイトの情報を参考にしてください。\n{website_data}"})

    if web_search_text:
        messages.append({"role": "system", "content": f"以下のウェブ検索の情報を参考にしてください。\n{web_search_text}"})

    messages.append({"role": "user", "content": question})

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages
    )

    initial_answer  =  response.choices[0].message.content

    refined_answer = refine_answer_with_gpt(initial_answer)
    return refined_answer


@router.get("/health")
async def health_check():
    return {"status": "healthy"}


@router.post("/apiV2_2/speech")
async def speech(speech_text: SpeechText):
    text = speech_text.text
    chat_token = speech_text.chat_token
    if not text:
        raise HTTPException(status_code=400, detail="Text is required")
    audio_file = await synthesize_speech(text, chat_token)
    return {"audio_file": audio_file}


# 질문을 받아서 처리하는 API
@router.post('/apiV2_2/ask_question')
async def ask_question(user_question: UserQuestion):
    question_text = user_question.question.strip()
    if not question_text:
        raise HTTPException(status_code=400, detail="Question is required")

    generated_answer = await generate_gpt_answer(question_text)
    return {"answer": generated_answer}

