#!/bin/bash # # ablexicon - Compare an input list of words against a dictionary and # optional lexicon. If any words are in neither the dictionary nor the # lexicon, log them to stdout. # set -e set -f function usage () { echo "Find occurrences of non-dictionary/lexicon words" echo "" echo "Usage:" echo " ${0##*/} [options]" echo "" echo "Options:" echo " -f, --file source text (defaults to /dev/fd/0)" echo " -l, --lexicon lexicon file (one word per line)" echo " -h, --help display this help" exit 1 } # # Verify that required commands are present # REQUIRED=( "spell" "getopt" ) for i in "${REQUIRED[@]}" do command -v $i"" >/dev/null if [ $? -ne "0" ] then echo "'"$i"' must be installed, exiting...">&2 exit 1 fi done GETOPT_OUT=`getopt -o hf:l: --long help,file:,lexicon: -n "${0##*/}" -- "$@"` if [ $? != 0 ] then echo "Exiting..." >&2 exit 1 fi eval set -- "$GETOPT_OUT" INFILE=/dev/fd/0 LEXICON=/dev/null while true; do case "$1" in -h | --help ) usage $0 ;; -f | --file ) INFILE="$2"; shift 2 ;; -l | --lexicon ) LEXICON="$2"; shift 2 ;; -- ) shift; break ;; * ) break ;; esac done if [ ! -f $INFILE"" ] && [ $INFILE"" != /dev/fd/0 ] then echo "Invalid input file" usage fi # # Read the lexicon into an array # readarray -t lexicon < $LEXICON"" lexicon_size="${#lexicon[@]}" # # Search for all input words in the dictionary # and sort the output # for word in `cat $INFILE"" | spell | sort -u` do # # Search for each remaining word in the lexicon # found="false" i="0" while [[ "$i" -lt "$lexicon_size" ]] && [ "$found" == "false" ] do if [ "${lexicon[i]}" == "$word" ] then found="true" fi i=$((i+1)) done if [ $found"" == "false" ] then # # The word is neither in the dictionary nor the lexicon, send # it to stdout. # echo $word fi done