o
    "`^h2                     @   sx  d dl Z d dlZd dlZd dlZd dlZejd  dkr4d dlmZ d dlm	Z	m
Z
 d dlmZmZmZ nd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ z
d dlmZ d	ZW n eyk   d dlmZ d
ZY nw g dZdZdZdZdZdZdZe dZese dZesdZee j edZ!ze!"  W n	 e#y   Y nw dZ$zhe j%e j&e'd  Z(z*e j e(dZ)d dl*Z*e*+e)dZ,zdd e,- D Z.W e,/  [,ne,/  [,w W n/ e#y   e j e(dZ)e+e)Z,dd e,- D Z.W d   n	1 sw   Y  Y nw W n e#y   e$gZ.Y nw dd Z0dd  Z1d0d!d"Z2d#d$ Z3	 			d1d,d-Z4d.d/ Z5dS )2    N   )LWPCookieJar)Requesturlopen)
quote_plusurlparseparse_qs)r   )r   r   )BeautifulSoupTF)searchluckyget_random_user_agentget_tbszhttps://www.google.%(tld)s/zuhttps://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)szrhttps://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)szhttps://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)sz~https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)s)hlqnumbtnGstarttbssafecrHOMEUSERHOME.z.google-cookiez2Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)zuser_agents.txt.gzrbc                 C      g | ]}|  qS  strip.0_r   r   S/home/air/shanriGPT/back/venv/lib/python3.10/site-packages/googlesearch/__init__.py
<listcomp>n       r"   zuser_agents.txtc                 C   r   r   r   r   r   r   r!   r"   u   r#   c                   C   s
   t tS )za
    Get a random user agent string.

    :rtype: str
    :return: Random user agent string.
    )randomchoiceuser_agents_listr   r   r   r!   r   {   s   
r   c                 C   s   |  d} | d}dt  S )z
    Helper function to format the tbs parameter.

    :param datetime.date from_date: Python date object.
    :param datetime.date to_date: Python date object.

    :rtype: str
    :return: Dates encoded in tbs format.
    z%m/%d/%Yz-cdr:1,cd_min:%(from_date)s,cd_max:%(to_date)s)strftimevars)	from_dateto_dater   r   r!   r      s   



r   c                 C   s   |du rt }t| }|d| t| |rt|}n
t }t||d}t|| |	 }|
  zt  W |S  tyE   Y |S w )aL  
    Request the given URL and return the response page, using the cookie jar.

    :param str url: URL to retrieve.
    :param str user_agent: User agent for the HTTP requests.
        Use None for the default.
    :param bool verify_ssl: Verify the SSL certificate to prevent
        traffic interception attacks. Defaults to True.

    :rtype: str
    :return: Web page retrieved for the given URL.

    :raises IOError: An exception is raised on error.
    :raises urllib2.URLError: An exception is raised on error.
    :raises urllib2.HTTPError: An exception is raised on error.
    Nz
User-Agent)context)
USER_AGENTr   
add_header
cookie_jaradd_cookie_headerr   ssl_create_unverified_contextextract_cookiesreadclosesave	Exception)url
user_agent
verify_sslrequestresponser+   htmlr   r   r!   get_page   s&   


r=   c                 C   sh   z)|  drt| d}t|jd d } t| d}|jr$d|jvr'| W S W d S W d S  ty3   Y d S w )Nz/url?httpr   r   google)
startswithr   r   querynetlocr6   )linkor   r   r!   filter_result   s   


rE   comen0off
          @ c              	   c   s   t  }d}t| } |
si }
tD ]}||
 v rtd|qttt  || |r9|dkr3tt  }nt	t  }n|dkrCt
t  }ntt  }|rO||k r|}|
 D ]\}}t|}t|}|d||f  }qUt| t|||}tr}t|d}nt|}z|jddd}W n ty   |jd	d}|r|  |d}Y nw |D ]7}z|d
 }W n	 ty   Y qw t|}|sqt|}||v rq|| |V  |d7 }|r||kr dS q||krdS ||7 }|dkrtt  }nt	t  }|rO||k sOdS dS )a  
    Search the given query string using Google.

    :param str query: Query string. Must NOT be url-encoded.
    :param str tld: Top level domain.
    :param str lang: Language.
    :param str tbs: Time limits (i.e "qdr:h" => last hour,
        "qdr:d" => last 24 hours, "qdr:m" => last month).
    :param str safe: Safe search.
    :param int num: Number of results per page.
    :param int start: First result to retrieve.
    :param int stop: Last result to retrieve.
        Use None to keep searching forever.
    :param float pause: Lapse to wait between HTTP requests.
        A lapse too long will make the search slow, but a lapse too short may
        cause Google to block your IP. Your mileage may vary!
    :param str country: Country or region to focus the search on. Similar to
        changing the TLD, but does not yield exactly the same results.
        Only Google knows why...
    :param dict extra_params: A dictionary of extra HTTP GET
        parameters, which must be URL encoded. For example if you don't want
        Google to filter similar results you can set the extra_params to
        {'filter': '0'} which will append '&filter=0' to every query.
    :param str user_agent: User agent for the HTTP requests.
        Use None for the default.
    :param bool verify_ssl: Verify the SSL certificate to prevent
        traffic interception attacks. Defaults to True.

    :rtype: generator of str
    :return: Generator (iterator) that yields found URLs.
        If the stop parameter is None the iterator will loop forever.
    r   zQGET parameter "%s" is overlapping with                 the built-in GET parameterrJ   z&%s=%szhtml.parserr
   )idagbarhref   N)setr   url_parameterskeys
ValueErrorr=   url_homer(   url_next_pageurl_next_page_num
url_searchurl_search_numitemstimesleepis_bs4r	   findfindAllAttributeErrorclearKeyErrorrE   hashadd)rA   tldlangr   r   r   r   stoppausecountryextra_paramsr8   r9   hashescountbuiltin_paramr7   
last_countkvr<   soupanchorsrO   rN   rC   hr   r   r!   r
      s   %



r
   c                  O   s   t t| i |S )z
    Shortcut to single-item search.

    Same arguments as the main search function, but the return value changes.

    :rtype: str
    :return: URL found by Google.
    )nextr
   )argskwargsr   r   r!   r   o  s   	r   )NT)rF   rG   rH   rI   rJ   r   NrK   rL   NNT)6osr$   sysr\   r0   version_infohttp.cookiejarr   urllib.requestr   r   urllib.parser   r   r   	cookieliburlliburllib2bs4r	   r^   ImportError__all__rV   rY   rW   rZ   rX   rS   getenvhome_folderpathjoinr.   loadr6   r,   abspathsplit__file__install_folderuser_agents_filegzipopenfp	readlinesr&   r4   r   r   r=   rE   r
   r   r   r   r   r!   <module>   s   




'
 