aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorBrenton Earl <brent@exitstatusone.com>2015-11-14 23:19:43 +0700
committerWilly Sudiarto Raharjo <willysr@slackbuilds.org>2015-11-14 23:19:43 +0700
commit8ee80adc21733871a05f1eb38d949ddf19a431d2 (patch)
tree33e2ec48c1785b939d2e73d366eb74952b77bac9 /python
parent6a250f182ecd65acfcac96409f71010ba7cb04a6 (diff)
python/python-pdfminer: Added (PDF parser and analyzer).
Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
Diffstat (limited to 'python')
-rw-r--r--python/python-pdfminer/README23
-rw-r--r--python/python-pdfminer/python-pdfminer.SlackBuild99
-rw-r--r--python/python-pdfminer/python-pdfminer.info10
-rw-r--r--python/python-pdfminer/slack-desc19
4 files changed, 151 insertions, 0 deletions
diff --git a/python/python-pdfminer/README b/python/python-pdfminer/README
new file mode 100644
index 0000000000000..64ca2affa2ffd
--- /dev/null
+++ b/python/python-pdfminer/README
@@ -0,0 +1,23 @@
+PDFMiner is a tool for extracting information from PDF documents. Unlike
+other PDF-related tools, it focuses entirely on getting and analyzing
+text data. PDFMiner allows one to obtain the exact location of text in a
+page, as well as other information such as fonts or lines. It includes a
+PDF converter that can transform PDF files into other text formats (such
+as HTML). It has an extensible PDF parser that can be used for other
+purposes than text analysis.
+
+PDFMiner comes with two handy tools: pdf2txt.py and dumppdf.py.
+
+pdf2txt.py
+
+pdf2txt.py extracts text contents from a PDF file. It cannot recognize
+text drawn as images. It also extracts locations, font names/sizes,
+writing direction. It requires a password for password protected PDF
+documents. You cannot extract any text from a PDF document which does
+not have extraction permission.
+
+dumppdf.py
+
+dumppdf.py dumps the internal contents of a PDF file in pseudo-XML
+format. This program is primarily for debugging purposes, but it's also
+possible to extract some meaningful contents (e.g. images).
diff --git a/python/python-pdfminer/python-pdfminer.SlackBuild b/python/python-pdfminer/python-pdfminer.SlackBuild
new file mode 100644
index 0000000000000..73e5d7ed96d31
--- /dev/null
+++ b/python/python-pdfminer/python-pdfminer.SlackBuild
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+# Slackware build script for python-pdfminer
+
+# Copyright 2015 Brenton Earl <brent@exitstatusone.com>
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+PRGNAM=python-pdfminer
+SRCNAM=pdfminer
+VERSION=${VERSION:-20140328}
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+
+if [ -z "$ARCH" ]; then
+ case "$( uname -m )" in
+ i?86) ARCH=i486 ;;
+ arm*) ARCH=arm ;;
+ *) ARCH=$( uname -m ) ;;
+ esac
+fi
+
+CWD=$(pwd)
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+if [ "$ARCH" = "i486" ]; then
+ SLKCFLAGS="-O2 -march=i486 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+ SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+ SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
+else
+ SLKCFLAGS="-O2"
+ LIBDIRSUFFIX=""
+fi
+
+set -e
+
+rm -rf $PKG
+mkdir -p $TMP $PKG $OUTPUT
+cd $TMP
+rm -rf $SRCNAM-$VERSION
+tar xvf $CWD/$SRCNAM-$VERSION.tar.gz
+cd $SRCNAM-$VERSION
+chown -R root:root .
+find -L . \
+ \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
+ -o -perm 511 \) -exec chmod 755 {} \; -o \
+ \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
+ -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
+
+# Enables the ability to process Chinese, Japanese and Korean Languagues
+make cmap # Comment out this line to disable this support
+
+# Build / Install
+python setup.py install --root=$PKG
+
+find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
+ | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a \
+ PKG-INFO \
+ $PKG/usr/doc/$PRGNAM-$VERSION
+cp -R \
+ samples/ \
+ $PKG/usr/doc/$PRGNAM-$VERSION
+cp -R \
+ docs/ \
+ $PKG/usr/doc/$PRGNAM-$VERSION/html_docs
+cat $CWD/README > $PKG/usr/doc/$PRGNAM-$VERSION/README
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
diff --git a/python/python-pdfminer/python-pdfminer.info b/python/python-pdfminer/python-pdfminer.info
new file mode 100644
index 0000000000000..87021abe74529
--- /dev/null
+++ b/python/python-pdfminer/python-pdfminer.info
@@ -0,0 +1,10 @@
+PRGNAM="python-pdfminer"
+VERSION="20140328"
+HOMEPAGE="https://euske.github.io/pdfminer/index.html"
+DOWNLOAD="https://pypi.python.org/packages/source/p/pdfminer/pdfminer-20140328.tar.gz"
+MD5SUM="dfe3eb1b7b7017ab514aad6751a7c2ea"
+DOWNLOAD_x86_64=""
+MD5SUM_x86_64=""
+REQUIRES=""
+MAINTAINER="Brenton Earl"
+EMAIL="brent@exitstatusone.com"
diff --git a/python/python-pdfminer/slack-desc b/python/python-pdfminer/slack-desc
new file mode 100644
index 0000000000000..0076bdf5147ab
--- /dev/null
+++ b/python/python-pdfminer/slack-desc
@@ -0,0 +1,19 @@
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct. It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+ |-----handy-ruler------------------------------------------------------|
+python-pdfminer: python-pdfminer (PDF parser and analyzer)
+python-pdfminer:
+python-pdfminer: PDFMiner is a tool for extracting information from PDF
+python-pdfminer: documents. It focuses entirely on getting and analyzing text
+python-pdfminer: data. PDFMiner can obtain the location of text in a page,
+python-pdfminer: and other information like fonts or lines. It includes a
+python-pdfminer: PDF converter that can transform PDF files into several
+python-pdfminer: text formats. It also includes an extensible PDF parser.
+python-pdfminer:
+python-pdfminer: Home page: https://euske.github.io/pdfminer/index.html
+python-pdfminer: