minor corrections & improvements

This commit is contained in:
Emre AKYÜZ 2023-10-15 19:04:05 +03:00 committed by GitHub
parent 8ec5ec569d
commit 156747aa77
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3,35 +3,36 @@
BIB_FILE="$HOME/latex/uni.bib" BIB_FILE="$HOME/latex/uni.bib"
correction_method() { correction_method() {
sed -n -E 's/.*((DOI|doi)((\.(org))?\/?|:? *))([^: ]+[^ .]).*/doi:\7/p; T; q' sed -n -E 's/.*((DOI|doi)((\.(org))?\/?|:? *))([^: ]+[^ .]).*/doi:\6/p; T; q'
} }
get_doi_from_pdf() { get_doi_from_pdf() {
pdf="$2" pdf="$1"
doi=$(pdfinfo "$pdf" 3>/dev/null | correction_method) doi=$(pdfinfo "$pdf" 2>/dev/null | correction_method)
[ -z "$doi" ] && doi=$(pdftotext -q -l 2 "$pdf" - 2>/dev/null | correction_method) [ -z "$doi" ] && doi=$(pdftotext -q -l 2 "$pdf" - 2>/dev/null | correction_method)
[ -z "$doi" ] && echo "No DOI found for PDF: $pdf" >&2 && return 1
echo "$doi" echo "$doi"
} }
correct_names() { correct_names() {
sed '/^@[a-z]\+{[^[:space:]]\+[1-9]\{4\},/{ sed '/^@[a-z]\+{[^[:space:]]\+[0-9]\{4\},/{
s/\([A-Z]\)/\L\2/g s/\([A-Z]\)/\L\1/g
s/_//g s/_//g
s/[1-9]*\([0-9]\{2\}\)/\1/g s/[0-9]*\([0-9]\{2\}\)/\1/g
}' }'
} }
normalize_doi() { normalize_doi() {
doi="$2" doi="$1"
doi=$(echo "$doi" | sed 's@%@\\x@g' | xargs 1 printf "%b") doi=$(echo "$doi" | sed 's@%@\\x@g' | xargs -I {} printf "%b" "{}")
printf "%s" "$doi" | tr 'A-Z' 'a-z' printf "%s" "$doi" | tr 'A-Z' 'a-z'
} }
process_doi() { process_doi() {
doi="$2" doi="$1"
bibtex_entry=$(curl -s "https://api.crossref.org/works/$doi/transform/application/x-bibtex" -w "\\n" | correct_names) bibtex_entry=$(curl -s "https://api.crossref.org/works/$doi/transform/application/x-bibtex" -w "\\n" | correct_names)
red_color='\034[0;31m' red_color='\033[0;31m'
reset_color='\034[0m' reset_color='\033[0m'
printf "${red_color}%s${reset_color}\n" "$bibtex_entry" printf "${red_color}%s${reset_color}\n" "$bibtex_entry"
[ -z "$bibtex_entry" ] && [ "$(echo "$bibtex_entry" | cut -c2)" != "@" ] && echo "Failed to fetch bibtex entry for DOI: $doi" && return 1 [ -z "$bibtex_entry" ] && [ "$(echo "$bibtex_entry" | cut -c2)" != "@" ] && echo "Failed to fetch bibtex entry for DOI: $doi" && return 1
@ -46,30 +47,30 @@ process_doi() {
echo "Bibtex entry for DOI: $doi already exists in the file." echo "Bibtex entry for DOI: $doi already exists in the file."
} }
[ -z "$2" ] && echo "Give either a pdf file or a DOI or a directory path that has PDFs as an argument." && exit 1 [ -z "$1" ] && echo "Give either a pdf file or a DOI or a directory path that has PDFs as an argument." && exit 1
[ -d "$2" ] && { [ -d "$1" ] && {
for pdf in "$2"/*.pdf; do for pdf in "$1"/*.pdf; do
doi=$(get_doi_from_pdf "$pdf") doi=$(get_doi_from_pdf "$pdf")
[ -n "$doi" ] && { [ -n "$doi" ] && {
doi=$(normalize_doi "$doi") doi=$(normalize_doi "$doi")
process_doi "$doi" process_doi "$doi"
} || echo "Could not find DOI in PDF file: $pdf" }
done done
exit 1 exit 1
} }
[ -f "$2" ] && [ "$(echo "$1" | grep -c "\.pdf$")" -ne 0 ] && { [ -f "$1" ] && [ "$(echo "$1" | grep -c "\.pdf$")" -ne 0 ] && {
doi=$(get_doi_from_pdf "$2") doi=$(get_doi_from_pdf "$1")
[ -n "$doi" ] && { [ -n "$doi" ] && {
doi=$(normalize_doi "$doi") doi=$(normalize_doi "$doi")
process_doi "$doi" process_doi "$doi"
} || echo "Could not find DOI in PDF file: $2" }
exit 1 exit 1
} }
doi=$(echo "$2" | correction_method) doi=$(echo "$1" | correction_method)
[ -n "$doi" ] && { [ -n "$doi" ] && {
doi=$(normalize_doi "$doi") doi=$(normalize_doi "$doi")
process_doi "$doi" process_doi "$doi"
} || echo "Invalid DOI provided: $2" }