Toshusai blog

知識の保管庫

PythonでWebスクレイピング

PythonでWebスクレイピング

from urllib.request import urlopen
from html.parser import HTMLParser

class MyHTMLParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        print "Encountered a start tag:", tag

    def handle_endtag(self, tag):
        print "Encountered an end tag :", tag

    def handle_data(self, data):
        print "Encountered some data  :", data

url = "https://hogehoge"
html = urlopen(url).read().decode("utf-8")
parser = MyHTMLParser()
parser.feed(html)

参考