【Python】はてなブログのAPIを使って投稿した記事一覧をcsv出力する

今回はPythonとはてなブログのAPIを使用して、これまでに投稿した記事の一覧をcsvに出力するプログラムのサンプルコードについて紹介したいと思います。

はてなブログ記事一覧取得

blog = "<ブログ名>.hatenablog.jp"
user = "ユーザーID"
apiKey = "<自分のAPI鍵>"

変数blogのURLについては自分の保有している一部のブログだと.hatenablog.comだと403 forbiddenエラーが発生することがあるのでjpとしています。
上手く動かない場合はjp⇔comを変えてみてください

import requests
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import pandas as pd
user_pass_tuple = (user, apiKey)
blog_service_url = "https://blog.hatena.ne.jp/{}/{}/atom".format(user, blog)
r = requests.get(blog_service_url, auth=user_pass_tuple)
soup = BeautifulSoup(r.text, 'xml')
title = soup.find("title").text
rows = []
rows.append(["key", "literal", "uri"])
rows.append(["description", "はてなブログの記事一覧"])
rows.append(["footer", user])
rows.append(["header", title])
rows.append(["link", "はてなブログ", "https://" + blog])
df_blog = pd.DataFrame(rows)
print("完了しました。")

blog_entries_url = "https://blog.hatena.ne.jp/{}/{}/atom/entry".format(user, blog)

rows = []
rows.append(["collections", "date", "label", "thumbnail", "url", "description", "Updated"])
page = 1
while 1:
    print("ページ数", page)
    r = requests.get(blog_entries_url, auth=user_pass_tuple)
    soup = BeautifulSoup(r.text, 'xml')
    entries = soup.find_all("entry")
    for entry in entries:
        draft = entry.find("app:draft").text
        # 公開済みの記事のみを取得
        if draft != "no":
            continue
        title = entry.find("title").text
        updated = entry.find("updated").text.split("T")[0] if entry.find("updated") else None
        published = entry.find("published").text.split("T")[0]
        summary = entry.find("summary").text
        url = entry.find(rel="alternate").get("href")
        # カテゴリの取得
        categories = entry.find_all("category")
        category_terms = []
        for category in categories:
            category_terms.append(category.get("term"))
        # 配列に追加
        rows.append(["|".join(category_terms), published, title, "", url, summary, updated])
    # 次のページの取得
    next = soup.find(rel="next")
    # 次のページが存在しなければ終了
    if not next:
        break
    blog_entries_url = next.get("href")
    page += 1
df_entries = pd.DataFrame(rows)
print("{}件の記事を取得しました。".format(len(rows)))
output_path = "hatena_entry.xlsx"
with pd.ExcelWriter(output_path) as writer:
    df_blog.to_excel(writer, sheet_name='metadata', index=False, header=False)
    df_entries.to_excel(writer, sheet_name='items', index=False, header=False)

参照：https://zenn.dev/nakamura196/articles/fa6dc7d313ebe7