ebook-to-md.py

#!/usr/bin/env python3
import sys
import os
import getopt
import ebooklib
import html2text
from ebooklib import epub

def convert(inputfile, outputfile, is_seperate):
    book = epub.read_epub(inputfile)
    text_items = []
    if not is_seperate:
        with open(outputfile, 'w', encoding='utf-8') as f:
            for item in book.get_items():
                if item.get_type() == ebooklib.ITEM_DOCUMENT:
                    html = item.get_content().decode('utf-8')
                    text = html2text.html2text(html)
                    text_items.append(text)
            f.write('\n\n'.join(text_items))
    else:
        # mkdir outputfile
        os.mkdir(outputfile)
        for item in book.get_items():
            if item.get_type() == ebooklib.ITEM_DOCUMENT:
                filename = item.get_name()
                if filename.endswith('.html'):
                    html = item.get_content().decode('utf-8')
                    # replace all html to md in the links
                    html = html.replace('.html', '.md')
                    text = html2text.html2text(html)
                    # get basename  of filename
                    fname = os.path.basename(filename)
                    # remove .html
                    fname = fname[:-5]
                    with open(os.path.join(outputfile, fname + '.md'), 'w', encoding='utf-8') as f:
                        f.write(text)


# read from -i or --input as input file
# read from -o or --output as output file
# -s seperate or not
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hsi:o:", ["help","seperate", "input=", "output="])
    except getopt.GetoptError:
        print("main.py -i <inputfile> -o <outputfile>")
        sys.exit(2)
    is_seperate = False
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            print("main.py -i <inputfile> -o <outputfile>")
            sys.exit()
        elif opt in ("-i", "--input"):
            inputfile = arg
        elif opt in ("-o", "--output"):
            outputfile = arg
        elif opt in ("-s", "--seperate"):
            is_seperate = True
    # convert inputfile to outputfile
    convert(inputfile, outputfile, is_seperate)

if __name__ == '__main__':
    main()