Includes new languages; in particular, allow application/foo or text/foo for scripting languages.
151 lines
5 KiB
Bash
Executable file
151 lines
5 KiB
Bash
Executable file
#!/bin/bash
|
|
# Best effort auto-pygmentization with transparent decompression
|
|
# by Reuben Thomas 2008-2022
|
|
# This program is in the public domain.
|
|
|
|
# Strategy: first see if pygmentize can find a lexer; if not, ask file; if that finds nothing, fail
|
|
# Set the environment variable PYGMENTIZE_OPTS or pass options before the file path to configure pygments.
|
|
|
|
# This program can be used as a .lessfilter for the less pager to auto-color less's output
|
|
|
|
file="${!#}" # last argument
|
|
options=${@:1:$(($#-1))} # handle others args as options to pass to pygmentize
|
|
|
|
file_common_opts="--brief --dereference"
|
|
|
|
# Better command for XDG-compatible systems: xdg-mime query filetype "$file"
|
|
case $(file --mime-type --uncompress $file_common_opts) in
|
|
application/xml|image/svg+xml) lexer=xml;;
|
|
application/javascript) lexer=javascript;;
|
|
application/json) lexer=json;;
|
|
text/html) lexer=html;;
|
|
text/troff) lexer=nroff;;
|
|
text/x-asm) lexer=nasm;;
|
|
*/x-awk) lexer=awk;;
|
|
text/x-c|text/x-csrc|text/x-chdr) lexer=c;;
|
|
text/x-c++|text/x-c++src) lexer=cpp;;
|
|
*/x-clojure) lexer=clojure;;
|
|
text/x-crystal) lexer=crystal;;
|
|
text/x-diff) lexer=diff;;
|
|
*/x-execline) lexer=execline;;
|
|
text/x-forth) lexer=forth;;
|
|
text/x-fortran) lexer=fortran;;
|
|
*/x-gawk) lexer=gawk;;
|
|
text/x-java) lexer=java;;
|
|
*/x-lisp) lexer=common-lisp;;
|
|
*/x-emacs-lisp) lexer=emacs-lisp;;
|
|
*/x-lua|*/x-luatex) lexer=lua;;
|
|
text/x-makefile) lexer=make;;
|
|
*/x-msdos-batch) lexer=bat;;
|
|
*/x-nawk) lexer=nawk;;
|
|
text/x-objective-c) lexer=objective-c;;
|
|
*/x-pascal) lexer=pascal;;
|
|
*/x-perl) lexer=perl;;
|
|
*/x-php) lexer=php;;
|
|
*/x-po) lexer=po;;
|
|
*/x-python*) lexer=python;;
|
|
*/x-ruby) lexer=ruby;;
|
|
*/x-script.python) lexer=python;;
|
|
*/x-shellscript) lexer=sh;;
|
|
# Workaround for TypeScript: mimetype doesn't recognize it.
|
|
text/vnd.trolltech.linguist) lexer=typescript;;
|
|
*/x-tcl) lexer=tcl;;
|
|
text/x-tex|text/x-texinfo) lexer=latex;; # FIXME: texinfo really needs its own lexer
|
|
*/x-ursa) lexer=ursa;;
|
|
*/x-vala) lexer=vala;;
|
|
text/xml) lexer=xml;;
|
|
text/vnd.graphviz) lexer=graphviz;;
|
|
|
|
# Types that file outputs which pygmentize didn't support as of file 5.41, pygments 2.11.2
|
|
# text/binary
|
|
# text/calendar
|
|
# text/PGP
|
|
# text/prs.lines.tag
|
|
# text/rtf
|
|
# text/spreadsheet
|
|
# text/texmacs
|
|
# text/vcard
|
|
# text/vnd.sosi
|
|
# text/x-Algol68
|
|
# text/x-bcpl
|
|
# text/x-dmtf-mif
|
|
# text/x-gimp-curve
|
|
# text/x-gimp-ggr
|
|
# text/x-gimp-gpl
|
|
# text/x-info
|
|
# text/x-installshield-lid
|
|
# text/x-m4
|
|
# text/x-modulefile
|
|
# text/x-ms-adm
|
|
# text/x-ms-cpx
|
|
# text/x-ms-regedirt
|
|
# text/x-ms-tag
|
|
# text/x-systemtap
|
|
# text/x-vcard
|
|
# text/x-wine-extension-reg
|
|
# text/x-xmcd
|
|
|
|
text/plain) # special filenames. TODO: insert more
|
|
case $(basename "$file") in
|
|
.zshrc) lexer=sh;;
|
|
esac
|
|
# pygmentize -N is much cheaper than file, but makes some bad guesses (e.g.
|
|
# it guesses ".pl" is Prolog, not Perl)
|
|
lexer=$(pygmentize -N "$file")
|
|
;;
|
|
esac
|
|
|
|
# Find a concatenator for compressed files
|
|
concat=
|
|
concat_opts=
|
|
case $(file $file_common_opts --mime-type "$file") in
|
|
# TODO: add support
|
|
# application/x-rzip (does not decompress to stdout)
|
|
# application/x-dzip (Windows only)
|
|
application/gzip|application/x-gzip) concat=zcat;;
|
|
application/x-bzip) concat=bzip; concat_opts=-dc;;
|
|
application/x-bzip2) concat=bzcat;;
|
|
application/x-lz4) concat=lz4; concat_opts=-dc;;
|
|
application/x-lzh-compressed) concat=p7zip; concat_opts=-dc;;
|
|
application/x-lzma) concat=lzcat;;
|
|
application/x-lzip) concat=lzip; concat_opts=-dc;;
|
|
application/x-xz) concat=xzcat;;
|
|
application/x-zoo) concat=zoo; concat_opts=fu;;
|
|
esac
|
|
# If concat is unset or doesn't exist, use cat instead
|
|
if [[ "$concat" == "" ]] || ! command -v "$concat"; then
|
|
concat=cat
|
|
concat_opts=
|
|
fi
|
|
|
|
# Find a suitable reader, preceded by a hex dump for binary files,
|
|
# or fmt for text with very long lines
|
|
prereader=""
|
|
reader=cat
|
|
encoding=$(file --mime-encoding --uncompress $file_common_opts "$file")
|
|
# FIXME: need a way to switch between hex and text view, as file often
|
|
# misdiagnoses files when they contain a few control characters
|
|
# if [[ $encoding == "binary" ]]; then
|
|
# prereader="od -x" # POSIX fallback
|
|
# if [[ -n $(which hd) ]]; then
|
|
# prereader=hd # preferred
|
|
# fi
|
|
# lexer=hexdump
|
|
# encoding=latin1
|
|
#el
|
|
# FIXME: Using fmt does not work well for system logs
|
|
# if [[ "$lexer" == "text" ]]; then
|
|
# if file "$file" | grep -ql "text, with very long lines"; then
|
|
# reader=fmt
|
|
# fi
|
|
# fi
|
|
if [[ "$lexer" != "text" ]]; then
|
|
reader="pygmentize -O inencoding=$encoding $PYGMENTIZE_OPTS $options -l $lexer"
|
|
fi
|
|
|
|
# Run the reader
|
|
if [[ -n "$prereader" ]]; then
|
|
exec $concat "$file" | $prereader | $reader
|
|
else
|
|
exec $concat "$file" | $reader
|
|
fi
|