Event/2013/GSoC/TextSearch: install-solr-sunburnt.sh

File install-solr-sunburnt.sh, 2.3 KB (added by Vishrut Mehta, 8 years ago)
Line 
1#!/bin/sh
2
3# Install Antiword to be able to extract text from .doc
4cd /tmp
5wget http://www.winfield.demon.nl/linux/antiword-0.37.tar.gz
6tar xvzf antiword-0.37.tar.gz
7cd antiword-0.37
8make
9cd ..
10
11# Install pdfmine to be able to extract text from .pdf
12wget http://pypi.python.org/packages/source/p/pdfminer/pdfminer-20110515.tar.gz
13tar xvzf pdfminer-20110515.tar.gz
14cd pdfminer-20110515
15python setup.py install
16cd ..
17
18# Install pyth to be able to extract text from .rtf
19wget http://pypi.python.org/packages/source/p/pyth/pyth-0.5.6.tar.gz
20tar xvzf pyth-0.5.6.tar.gz
21cd pyth-0.5.6
22python setup.py install
23cd ..
24
25# Install xlrd to be able to extract text from .xls
26wget http://pypi.python.org/packages/source/x/xlrd/xlrd-0.9.2.tar.gz
27tar xvzf xlrd-0.9.2.tar.gz
28cd xlrd-0.9.2
29python setup.py install
30cd ~/
31
32# Install other dependencies for Solr/Sunburnt
33apt-get install python-httplib2
34apt-get install python-pip
35apt-get install libxml2 libxslt-dev libxml2-dev
36pip install lxml==3.0.2
37
38# Install Solr
39# @Todo: Deploymen in Production server: install at other place
40wget http://mirror.reverse.net/pub/apache/lucene/solr/4.3.1/solr-4.3.1.tgz
41tar xvzf solr-4.3.1.tgz
42cd solr-4.3.1/
43mv example solr
44cd solr/solr/collection1/conf
45
46# Configure Solr according to Eden
47sed -i 's|<dataDir>${solr.data.dir:}</dataDir>|<dataDir>'$HOME'/web2py/applications/eden/indices</dataDir>|' solrconfig.xml
48sed -i 's|<field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>|<field name="filetype" type="text_general" indexed="true" stored="true"/>|' schema.xml
49
50sed -i 's|<field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>|field name="tablename" type="text_general" indexed="true" stored="true"/>|' schema.xml
51
52sed -i 's|<field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />|<field name="filename" type="text_general" indexed="true" stored="true"/>|' schema.xml
53
54sed -i 's|<copyField source="features" dest="text"/>|<copyField source="filetype" dest="text"/>|' schema.xml
55
56sed -i 's|<copyField source="includes" dest="text"/>|<copyField source="filename" dest="text"/>|' schema.xml
57
58cd /tmp
59# Install sunburnt
60apt-get install git
61git clone http://github.com/tow/sunburnt.git
62cd sunburnt
63python setup.py install
64