#!/usr/bin/env python3
import sys
import os
import getopt
import ebooklib
import html2text
from ebooklib import epub
def convert(inputfile, outputfile, is_seperate):
book = epub.read_epub(inputfile)
text_items = []
if not is_seperate:
with open(outputfile, 'w', encoding='utf-8') as f:
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
html = item.get_content().decode('utf-8')
text = html2text.html2text(html)
text_items.append(text)
f.write('\n\n'.join(text_items))
else:
# mkdir outputfile
os.mkdir(outputfile)
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
filename = item.get_name()
if filename.endswith('.html'):
html = item.get_content().decode('utf-8')
# replace all html to md in the links
html = html.replace('.html', '.md')
text = html2text.html2text(html)
# get basename of filename
fname = os.path.basename(filename)
# remove .html
fname = fname[:-5]
with open(os.path.join(outputfile, fname + '.md'), 'w', encoding='utf-8') as f:
f.write(text)
# read from -i or --input as input file
# read from -o or --output as output file
# -s seperate or not
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hsi:o:", ["help","seperate", "input=", "output="])
except getopt.GetoptError:
print("main.py -i <inputfile> -o <outputfile>")
sys.exit(2)
is_seperate = False
for opt, arg in opts:
if opt in ("-h", "--help"):
print("main.py -i <inputfile> -o <outputfile>")
sys.exit()
elif opt in ("-i", "--input"):
inputfile = arg
elif opt in ("-o", "--output"):
outputfile = arg
elif opt in ("-s", "--seperate"):
is_seperate = True
# convert inputfile to outputfile
convert(inputfile, outputfile, is_seperate)
if __name__ == '__main__':
main()