diff --git a/.local/bin/getbib b/.local/bin/getbib index 121dd6ee..4722ac86 100755 --- a/.local/bin/getbib +++ b/.local/bin/getbib @@ -1,14 +1,63 @@ -#!/bin/sh -[ -z "$1" ] && echo "Give either a pdf file or a DOI as an argument." && exit +#!/bin/dash -if [ -f "$1" ]; then - # Try to get DOI from pdfinfo or pdftotext output. - doi=$(pdfinfo "$1" | grep -io "doi:.*") || - doi=$(pdftotext "$1" 2>/dev/null - | sed -n '/[dD][oO][iI]:/{s/.*[dD][oO][iI]:\s*\(\S\+[[:alnum:]]\).*/\1/p;q}') || - exit 1 -else - doi="$1" -fi +BIB_FILE="$HOME/latex/uni.bib" -# Check crossref.org for the bib citation. -curl -s "https://api.crossref.org/works/$doi/transform/application/x-bibtex" -w "\\n" +correction_method() { + sed -n -E 's/.*((DOI|doi)((\.(org))?\/?|:? *))([^: ]+[^ .]).*/\6/p; T; q' +} + +get_doi_from_pdf() { + pdf="$1" + doi=$(pdfinfo "$pdf" 2>/dev/null | correction_method) + [ -z "$doi" ] && doi=$(pdftotext -q -l 2 "$pdf" - 2>/dev/null | correction_method) + [ -z "$doi" ] && echo "No DOI found for PDF: $pdf" >&2 && return 1 + echo "$doi" +} + +correct_names() { + sed 's/\}, /\},\n /g + s/, /,\n / + s/ }/\n}/ + s/,\s*pages=/,\n\tpages=/' | + sed '1s/^ *// + 1s/[0-9]*\([0-9]\{2\}\)/\1/ + 1s/_// + 1s/.*/\L&/ + s/.*=/\L&/ + s/=/ = /' +} + +process_doi() { + doi="$1" + bibtex_entry=$(curl -s "https://api.crossref.org/works/"$doi"/transform/application/x-bibtex" | correct_names) + red_color='\033[0;31m' + reset_color='\033[0m' + + printf "${red_color}%s${reset_color}\n" "$bibtex_entry" + [ -z "$bibtex_entry" ] && [ "$(echo "$bibtex_entry" | cut -c2)" != "@" ] && echo "Failed to fetch bibtex entry for DOI: $doi" && return 1 + + grep -Fq "doi = {${doi}}" "$BIB_FILE" || { + [ -s "$BIB_FILE" ] && echo "" >> "$BIB_FILE" + echo "$bibtex_entry" >> "$BIB_FILE" + echo "Added bibtex entry for DOI: $doi" + return 0 + } + echo "Bibtex entry for DOI: $doi already exists in the file." +} + +[ -z "$1" ] && echo "Give either a pdf file or a DOI or a directory path that has PDFs as an argument." && exit 0 + +[ -d "$1" ] && { + for pdf in "$1"/*.pdf; do + doi=$(get_doi_from_pdf "$pdf") + [ -n "$doi" ] && process_doi "$doi"; done + exit 0 +} + +[ -f "$1" ] && [ "$(echo "$1" | grep -c "\.pdf$")" -ne 0 ] && { + doi=$(get_doi_from_pdf "$1") + [ -n "$doi" ] && { process_doi "$doi"; exit 0; } +} + +doi=$(echo "$1" | correction_method) +[ -n "$doi" ] && process_doi "$doi"