-
Notifications
You must be signed in to change notification settings - Fork 108
Closed
Description
E.g. all urls from ambcrypto.com
[article_goose = goose.extract(raw_html=content)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "goose3/__init__.py", line 125, in extract
return self.__crawl(crawl_candidate)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "goose3/__init__.py", line 153, in __crawl
return crawler_wrapper(self.config.parser_class, parsers, crawl_candidate)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "goose3/__init__.py", line 141, in crawler_wrapper
article = crawler.crawl(crawl_candidate)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "goose3/crawler.py", line 135, in crawl
return self.process(raw_html, parse_candidate.url, parse_candidate.link_hash)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "goose3/crawler.py", line 165, in process
metas = self.metas_extractor.extract()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "goose3/extractors/metas.py", line 122, in extract
"domain": self.get_domain(),
^^^^^^^^^^^^^^^^^
File "goose3/extractors/metas.py", line 34, in get_domain
o = urlparse(self.article.final_url)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "urllib/parse.py", line 385, in urlparse
url, scheme, _coerce_result = _coerce_args(url, scheme)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "urllib/parse.py", line 124, in _coerce_args
return _decode_args(args) + (_encode_result,)
^^^^^^^^^^^^^^^^^^
File "urllib/parse.py", line 108, in _decode_args
return tuple(x.decode(encoding, errors) if x else '' for x in args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "urllib/parse.py", line 108, in <genexpr>
return tuple(x.decode(encoding, errors) if x else '' for x in args)
^^^^^^^^
AttributeError: 'list' object has no attribute 'decode'](url)
Metadata
Metadata
Assignees
Labels
No labels