diff options
-rw-r--r-- | office/ocrmypdf/README | 28 | ||||
-rw-r--r-- | office/ocrmypdf/ocrmypdf.SlackBuild | 108 | ||||
-rw-r--r-- | office/ocrmypdf/ocrmypdf.info | 16 | ||||
-rw-r--r-- | office/ocrmypdf/slack-desc | 19 |
4 files changed, 171 insertions, 0 deletions
diff --git a/office/ocrmypdf/README b/office/ocrmypdf/README new file mode 100644 index 0000000000..e15fed44b8 --- /dev/null +++ b/office/ocrmypdf/README @@ -0,0 +1,28 @@ +OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them +to be searched + +Main features +Generates a searchable PDF/A file from a regular PDF +Places OCR text accurately below the image to ease copy / paste +Keeps the exact resolution of the original embedded images +When possible, inserts OCR information as a "lossless" operation +without disrupting any other content +Optimizes PDF images, often producing files smaller than the input +file +If requested, deskews and/or cleans the image before performing OCR +Validates input and output files +Distributes work across all available CPU cores +Uses Tesseract OCR engine to recognize more than 100 languages +Keeps your private data private. +Scales properly to handle files with thousands of pages. +Battle-tested on millions of PDFs. + +OCRmyPDF uses Tesseract for OCR, and relies on its language packs. + +Once OCRmyPDF is installed, the built-in help which explains the +command syntax and options can be accessed via: + +ocrmypdf --help + +Please support the software author and the build author if you find +the software useful. diff --git a/office/ocrmypdf/ocrmypdf.SlackBuild b/office/ocrmypdf/ocrmypdf.SlackBuild new file mode 100644 index 0000000000..afa5ac5c6c --- /dev/null +++ b/office/ocrmypdf/ocrmypdf.SlackBuild @@ -0,0 +1,108 @@ +#!/bin/bash + +# Slackware build script for ocrmypdf + +# Copyright 2025, Lockywolf +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cd $(dirname $0) ; CWD=$(pwd) + +PRGNAM=ocrmypdf +VERSION=${VERSION:-13.7.0} +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} +PKGTYPE=${PKGTYPE:-tgz} + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i586 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +if [ ! -z "${PRINT_PACKAGE_NAME}" ]; then + echo "$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE" + exit 0 +fi + +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +if [ "$ARCH" = "i586" ]; then + SLKCFLAGS="-O2 -march=i586 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +elif [ "$ARCH" = "aarch64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +set -e + +rm -rf $PKG +mkdir -p $TMP $PKG $OUTPUT +cd $TMP +rm -rf $PRGNAM-$VERSION +tar xvf $CWD/$PRGNAM-$VERSION.tar.gz +cd $PRGNAM-$VERSION + +#sed -i 's/from itertools import pairwise/from more_itertools import pairwise/g' src/ocrmypdf/hocrtransform/_hocr.py + +#sed -i 's/Matrix/PdfMatrix/g' src/ocrmypdf/hocrtransform/_hocr.py src/ocrmypdf/pdfinfo/layout.py src/ocrmypdf/pdfinfo/info.py src/ocrmypdf/_graft.py + +chown -R root:root . +find -L . \ + \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \ + -o -perm 511 \) -exec chmod 755 {} + -o \ + \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \ + -o -perm 440 -o -perm 400 \) -exec chmod 644 {} + + +PYVER=$(python3 -c 'import sys; print("%d.%d" % sys.version_info[:2])') +export PYTHONPATH=/opt/python$PYVER/site-packages + +python3 -m build --wheel --no-isolation + +python3 -m installer --destdir "$PKG" dist/*.whl + +find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \ + | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a -H \ + $CWD/{README.md,LICENSE,README_ZH.md} \ + $PKG/usr/doc/$PRGNAM-$VERSION/ +chown root:root $PKG/usr/doc/$PRGNAM-$VERSION/* +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE diff --git a/office/ocrmypdf/ocrmypdf.info b/office/ocrmypdf/ocrmypdf.info new file mode 100644 index 0000000000..2ad9035d5f --- /dev/null +++ b/office/ocrmypdf/ocrmypdf.info @@ -0,0 +1,16 @@ +PRGNAM="ocrmypdf" +VERSION="13.7.0" +HOMEPAGE="https://github.com/ocrmypdf/OCRmyPDF" +DOWNLOAD="https://files.pythonhosted.org/packages/b6/70/b40e1d780ef071d9b53a05e86c2584b42afa1e14dc6ed99847947725c681/ocrmypdf-13.7.0.tar.gz \ +https://github.com/ocrmypdf/OCRmyPDF/raw/61163c2aa9f7aa584d3148634a9ca277103eccbf/LICENSE \ +https://github.com/ocrmypdf/OCRmyPDF/raw/61163c2aa9f7aa584d3148634a9ca277103eccbf/README.md \ +https://github.com/ocrmypdf/OCRmyPDF/raw/61163c2aa9f7aa584d3148634a9ca277103eccbf/README_ZH.md" +MD5SUM="415c28c84c371e14edc4c18f69be199f \ +9741c346eef56131163e13b9db1241b3 \ +81b0a30f39050ac2dff65a08fddb4aa6 \ +56eec2d269140c34675329bb43eedbd0" +DOWNLOAD_x86_64="" +MD5SUM_x86_64="" +REQUIRES="python3-hatchling python3-packaging-opt img2pdf python3-reportlab pikepdf python3-pdfminer.six python3-reportlab tesseract" +MAINTAINER="Lockywolf" +EMAIL="for_sbo.ocrmypdf_2025-06-30@lockywolf.net" diff --git a/office/ocrmypdf/slack-desc b/office/ocrmypdf/slack-desc new file mode 100644 index 0000000000..03da9ee390 --- /dev/null +++ b/office/ocrmypdf/slack-desc @@ -0,0 +1,19 @@ +# HOW TO EDIT THIS FILE: +# The "handy ruler" below makes it easier to edit a package description. +# Line up the first '|' above the ':' following the base package name, and +# the '|' on the right side marks the last column you can put a character in. +# You must make exactly 11 lines for the formatting to be correct. It's also +# customary to leave one space after the ':' except on otherwise blank lines. + + |-----handy-ruler------------------------------------------------------| +ocrmypdf: ocrmypdf (OCRmyPDF adds an OCR text layer to scanned PDF files) +ocrmypdf: +ocrmypdf: OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them +ocrmypdf: to be searched or copy-pasted. +ocrmypdf: +ocrmypdf: +ocrmypdf: +ocrmypdf: +ocrmypdf: +ocrmypdf: +ocrmypdf: |