123456789101112131415161718192021222324252627282930313233343536373839404142 |
- """
- Taken and modified from https://github.com/thibauts/duckduckgo
- """
- import requests
- from lxml import html
- import time
-
- def search(keywords, max_results=None):
- url = 'https://duckduckgo.com/html/'
- params = {
- 'q': keywords,
- 's': '0',
- }
- results = []
-
- yielded = 0
- while True:
- res = requests.post(url, data=params)
- doc = html.fromstring(res.text)
-
- for link in doc.cssselect('#links .links_main'):
- result = {}
- a_elem = link.cssselect('a')[0]
- desc = link.cssselect('.result__snippet')[0]
- result['link'] = a_elem.get('href')
- result['title'] = a_elem.text_content()
- result['desc'] = desc.text_content()
- results.append(result)
-
- for result in results:
- yield result
- time.sleep(0.1)
- yielded += 1
- if max_results and yielded >= max_results:
- return
-
- try:
- form = doc.cssselect('.results_links_more form')[-1]
- except IndexError:
- return
- params = dict(form.fields)
|