Add SeedDMS support for office document indexing

This commit is contained in:
Disassembler 2017-10-07 19:26:04 +02:00
parent 224546c12f
commit 579f4e92e6
3 changed files with 11 additions and 3 deletions

View File

@ -3,7 +3,7 @@
SOURCE_DIR=$(realpath $(dirname "${0}"))
# Install dependencies for SeedDMS
apt-get -y --no-install-recommends install catdoc ghostscript gnumeric id3 imagemagick php7.0-gd php7.0-fpm php7.0-mbstring php7.0-pgsql php7.0-xml php-log php-mail php-pear poppler-utils
apt-get -y --no-install-recommends install ghostscript id3 imagemagick libreoffice-calc libreoffice-writer php7.0-gd php7.0-fpm php7.0-mbstring php7.0-pgsql php7.0-xml php-log php-mail php-pear poppler-utils unoconv
# Install SeedDMS
wget https://sourceforge.net/projects/seeddms/files/seeddms-5.1.3/seeddms-quickstart-5.1.3.tar.gz/download -O /srv/seeddms.tgz

View File

@ -11,3 +11,5 @@ pm.max_children = 8
php_admin_value[open_basedir] = /srv/seeddms:/tmp:/tmp:/usr/share/php
php_admin_value[upload_max_filesize] = 100M
env[LANG]=en_US.UTF-8

View File

@ -24,10 +24,16 @@
<server coreDir="" luceneClassDir="" contentOffsetDir="1048576" maxDirID="0" updateNotifyTime="86400" extraPath="/srv/seeddms/pear" cmdTimeout="5" maxExecutionTime="30"/>
<converters target="fulltext">
<converter mimeType="application/pdf">pdftotext -enc UTF-8 -nopgbrk %s - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'</converter>
<converter mimeType="application/msword">catdoc %s</converter>
<converter mimeType="application/vnd.ms-excel">ssconvert -T Gnumeric_stf:stf_csv -S %s fd://1</converter>
<converter mimeType="text/rtf">unoconv -d document -f txt --stdout %s</converter>
<converter mimeType="application/msword">unoconv -d document -f txt --stdout %s</converter>
<converter mimeType="application/vnd.oasis.opendocument.text">unoconv -d document -f txt --stdout %s</converter>
<converter mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document">unoconv -d document -f txt --stdout %s</converter>
<converter mimeType="application/vnd.ms-excel">unoconv -d spreadsheet -f csv --stdout %s</converter>
<converter mimeType="application/vnd.oasis.opendocument.spreadsheet">unoconv -d spreadsheet -f csv --stdout %s</converter>
<converter mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">unoconv -d spreadsheet -f csv --stdout %s</converter>
<converter mimeType="audio/mp3">id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'</converter>
<converter mimeType="audio/mpeg">id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'</converter>
<converter mimeType="text/html">unoconv -d document -f txt --stdout %s</converter>
<converter mimeType="text/plain">cat %s</converter>
</converters>
</advanced>