diff --git a/.local/bin/getbib b/.local/bin/getbib index 121dd6ee..e2a1bebd 100755 --- a/.local/bin/getbib +++ b/.local/bin/getbib @@ -1,14 +1,81 @@ -#!/bin/sh -[ -z "$1" ] && echo "Give either a pdf file or a DOI as an argument." && exit +#!/bin/dash -if [ -f "$1" ]; then - # Try to get DOI from pdfinfo or pdftotext output. - doi=$(pdfinfo "$1" | grep -io "doi:.*") || - doi=$(pdftotext "$1" 2>/dev/null - | sed -n '/[dD][oO][iI]:/{s/.*[dD][oO][iI]:\s*\(\S\+[[:alnum:]]\).*/\1/p;q}') || - exit 1 -else - doi="$1" -fi +BIB_FILE="${HOME}/latex/uni.bib" -# Check crossref.org for the bib citation. -curl -s "https://api.crossref.org/works/$doi/transform/application/x-bibtex" -w "\\n" +correction_method() { + sed -n -E 's/.*((DOI|doi)((\.(org))?\/?|:? *))([^: ]+[^ .]).*/\6/p; T; q' +} + +get_doi_from_pdf() { + pdf="${1}" + doi="$(pdfinfo "${pdf}" 2> "/dev/null" | correction_method)" + + [ -z "${doi}" ] && doi="$(pdftotext -q -l "2" "${pdf}" - 2> "/dev/null" | correction_method)" + + [ -z "${doi}" ] && echo "No DOI found for PDF: ${pdf}" >&2 && return "1" + + echo "${doi}" +} + +correct_names() { + sed 's/\}, /\},\n /g + s/, /,\n / + s/ }/\n}/ + s/,\s*pages=/,\n\tpages=/' | + sed '1s/^ *// + 1s/[0-9]*\([0-9]\{2\}\)/\1/ + 1s/_// + 1s/.*/\L&/ + s/.*=/\L&/ + s/=/ = /' +} + +process_doi() { + doi="${1}" + bibtex_entry="$(curl -s "https://api.crossref.org/works/${doi}/transform/application/x-bibtex" | correct_names)" + red_color='\033[0;31m' + reset_color='\033[0m' + + printf "${red_color}%s${reset_color}\n" "${bibtex_entry}" + + [ -z "${bibtex_entry}" ] && [ "$(echo "${bibtex_entry}" | cut -c2)" != "@" ] && { + echo "Failed to fetch bibtex entry for DOI: ${doi}" + return "1" + } + + grep -iFq "doi = {${doi}}" "${BIB_FILE}" || { + [ -s "${BIB_FILE}" ] && echo "" >> "${BIB_FILE}" + echo "${bibtex_entry}" >> "${BIB_FILE}" + echo "Added bibtex entry for DOI: ${doi}" + return "0" + } + + echo "Bibtex entry for DOI: ${doi} already exists in the file." +} + +[ -z "${1}" ] && { + echo "Give either a pdf file or a DOI or a directory path that has PDFs as an argument." + exit "0" +} + +[ -d "${1}" ] && { + for pdf in "${1}"/*.pdf; do + doi="$(get_doi_from_pdf "${pdf}")" + [ -n "${doi}" ] && process_doi "${doi}" + done + + exit "0" +} + +[ -f "${1}" ] && [ "$(echo "${1}" | grep -c "\.pdf$")" -ne "0" ] && { + doi="$(get_doi_from_pdf "${1}")" + + [ -n "${doi}" ] && { + process_doi "${doi}" + exit "0" + } +} + +doi="$(echo "${1}" | correction_method)" + +[ -n "${doi}" ] && process_doi "${doi}"