Commit dabf7132 authored by Everardo Gonzalez's avatar Everardo Gonzalez

changed html parser

parent bb9f5ca3
......@@ -36,8 +36,9 @@
"source": [
"# look for html class \"filename\" in URL and try download\n",
"\n",
"soup = BeautifulSoup(html, \"lxml\")\n",
"# soup = BeautifulSoup(html, \"lxml\")\n",
"\n",
"soup = BeautifulSoup(html, \"html.parser\") # works better \n",
"\n",
"for link in soup.select('a.filename'):\n",
" href = link.get('href')\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment