o
    g                     @   s  d dl mZmZmZmZ d dlmZ d dlZd dlmZ d dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZmZmZmZmZ d dlmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZd dl Z d dl!m"Z" d dl#Z$d dlmZ d dl%Z&e Z'ee_(eed	Z)e
)d
Z*dZ+dd Z,dd Z-dd Z.dd Z/d%ddZ0dd Z1e'2ddd Z3e'4ddefdd Z5e'4d!d"efd#d$Z6dS )&    )	APIRouterHTTPExceptionRequestDepends)RedirectResponseN)OpenAI)ChatMessage
SpeechTextQuizMessageLineUserMedicineTextUserQuestion)loggeropenai_api_key)TfidfVectorizer)BeautifulSoup)api_keys3z$shanri-ai-chatbot-for-text-to-speechc                    s8   dd | D }t jjjd|d}|jd jj }|S )Nc                 S   s   g | ]	}| d r|qS )content)get).0msg r    /home/air/api/chatbot_router2.py
<listcomp>#   s    zask_openai.<locals>.<listcomp>gpt-4omodelmessagesr   )openaichatcompletionscreatechoicesmessager   strip)r   valid_messagesresponseanswerr   r   r   
ask_openai"   s   r)   c                    s   t jjjdd| d}d| dt  d}t|d}| D ]}|| q W d    n1 s2w   Y  | dt  d}t	|t
| t| dt
 d	| S )
Nztts-1nova)r   voiceinputz
tmp/audio--z.mp3wbzhttps://z.s3.amazonaws.com/)clientaudiospeechr"   timeopen
iter_byteswrite	s3_clientupload_filebucket_nameosremove)textuser_idr'   
audio_filefchunks3_keyr   r   r   synthesize_speech,   s    
rA   c              
      s   z(ddi}t j| |dd}|  t|jd}|d}ddd	 |D }|W S  t jyD } ztd
|  W Y d}~dS d}~ww )u$   특정 URL에서 정보를 가져옴
User-AgentMozilla/5.0
   headerstimeouthtml.parserp
c                 S   s   g | ]}|  qS r   )get_text)r   rI   r   r   r   r   F       z&fetch_website_data.<locals>.<listcomp>u)   웹사이트 정보 가져오기 실패: N )	requestsr   raise_for_statusr   r;   find_alljoinRequestExceptionprint)urlrF   r'   soup
paragraphsr;   er   r   r   fetch_website_data=   s   
rX   c              
      s   zQddi}t j| |dd}|  t|jd}| h}|jdddD ]}|d	 }tj| |}|	| r9|
| q"d
}|D ]}	td|	  |t|	d 7 }q>|W S  t jym }
 ztd|
  W Y d}
~
d
S d}
~
ww )uE   홈페이지에서 내부 링크를 찾아 전체 페이지 크롤링rB   rC   rD   rE   rH   aT)hrefrZ   rM   u   크롤링 중: z

u   웹사이트 크롤링 실패: N)rN   r   rO   r   r;   rP   urllibparseurljoin
startswithaddrS   rX   rR   )base_urlrF   r'   rU   linksa_tagrZ   full_urlall_textrT   rW   r   r   r   fetch_all_pagesN   s.   

re        c                    sp   | d t | g  }||j  d dd }t|| d } fdd|D }d|d| S )uE   질문과 관련 있는 텍스트를 상위 20개 문장으로 제한rJ   r      Nc                    s   g | ]} | qS r   r   )r   i	sentencesr   r   r   p   rL   z&find_relevant_text.<locals>.<listcomp>)splitr   fit_transformTtoarraynpargsortrQ   )question	text_datamax_sentences
max_length
vectorizersimilaritiestop_indicestop_sentencesr   rj   r   find_relevant_texth   s   
rz   c                 C   s   t d}|rt| |nd}tjjdddigd|  d}t|j |jr&|jnd}dd	d
ddd
g}|r?|dd| d
 |rL|dd| d
 |d| d
 tjj	jd|d}|j
d jjS )ua   GPT-4o를 사용해 ug-inc.net 크롤링 데이터 + 웹 검색 정보를 포함한 답변 생성zhttps://www.ug-inc.net/rM   r   typeweb_search_previewzUnite & Grow )r   toolsr,   systemu`   与えられた質問を見て、関連情報を取得して回答を作成してください。)roler   u?   回答に何を参照したかは言わないでください。uI   以下の企業公式サイトの情報を参考にしてください。
uC   以下のウェブ検索の情報を参考にしてください。
userr   r   )re   rz   r/   	responsesr"   rS   output_textappendr    r!   r#   r$   r   )rr   website_datarelevant_textweb_search_responseweb_search_textr   r'   r   r   r   generate_gpt_answeru   s4   
r   z/healthc                      s
   ddiS )Nstatushealthyr   r   r   r   r   health_check   s   r   z/api/speechspeech_textc                    s6   | j }| j}|stdddt||I d H }d|iS )N  zText is requiredstatus_codedetailr=   )r;   
chat_tokenr   rA   )r   r;   r   r=   r   r   r   r1      s   r1   z/api/ask_questionuser_questionc                    s,   | j  }|stdddt|}d|iS )Nr   zQuestion is requiredr   r(   )rr   r%   r   r   )r   question_textgenerated_answerr   r   r   ask_question   s   
r   )rf   rg   )7fastapir   r   r   r   fastapi.responsesr   r   r   reboto3r9   r2   	db_moduleschemasr   r	   r
   r   r   r   configr   r   httpxjsonpandaspdPyPDF2sklearn.feature_extraction.textr   loggingrN   bs4r   urllib.parser[   numpyrp   routerr   r/   r6   r8   r)   rA   rX   re   rz   r   r   r   postr1   r   r   r   r   r   <module>   sN     



'

