Search DuckDuckGo from the command-line

ddg.py 1.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. """
  2. Taken and modified from https://github.com/thibauts/duckduckgo
  3. """
  4. import requests
  5. from lxml import html
  6. import time
  7. def search(keywords, max_results=None):
  8. url = 'https://duckduckgo.com/html/'
  9. params = {
  10. 'q': keywords,
  11. 's': '0',
  12. }
  13. results = []
  14. yielded = 0
  15. while True:
  16. res = requests.post(url, data=params)
  17. doc = html.fromstring(res.text)
  18. for link in doc.cssselect('#links .links_main'):
  19. result = {}
  20. a_elem = link.cssselect('a')[0]
  21. desc = link.cssselect('.result__snippet')[0]
  22. result['link'] = a_elem.get('href')
  23. result['title'] = a_elem.text_content()
  24. result['desc'] = desc.text_content()
  25. results.append(result)
  26. for result in results:
  27. yield result
  28. time.sleep(0.1)
  29. yielded += 1
  30. if max_results and yielded >= max_results:
  31. return
  32. try:
  33. form = doc.cssselect('.results_links_more form')[-1]
  34. except IndexError:
  35. return
  36. params = dict(form.fields)