Obsah přiložených dokumentů ve formátech ODF - OpenOffice, openxml - MS Office a PDF se extrahuje do čistého textu a indexuje pro fulltextové vyhledávání.
closes #211
This commit is contained in:
@@ -348,7 +348,20 @@
|
|||||||
<artifactId>joda-time</artifactId>
|
<artifactId>joda-time</artifactId>
|
||||||
<version>2.4</version>
|
<version>2.4</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Text extractors -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.odftoolkit</groupId>
|
||||||
|
<artifactId>simple-odf</artifactId>
|
||||||
|
<version>0.7-incubating</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.poi</groupId>
|
||||||
|
<artifactId>poi-ooxml</artifactId>
|
||||||
|
<version>3.11</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Test -->
|
<!-- Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import info.bukova.isspst.data.User;
|
|||||||
import info.bukova.isspst.reporting.Report;
|
import info.bukova.isspst.reporting.Report;
|
||||||
import info.bukova.isspst.reporting.ReportMapping;
|
import info.bukova.isspst.reporting.ReportMapping;
|
||||||
import info.bukova.isspst.reporting.ReportType;
|
import info.bukova.isspst.reporting.ReportType;
|
||||||
import info.bukova.isspst.services.FullTextService;
|
|
||||||
import info.bukova.isspst.services.dbinfo.DbInfoService;
|
import info.bukova.isspst.services.dbinfo.DbInfoService;
|
||||||
|
import info.bukova.isspst.services.fulltext.FullTextService;
|
||||||
import info.bukova.isspst.services.munits.MUnitService;
|
import info.bukova.isspst.services.munits.MUnitService;
|
||||||
import info.bukova.isspst.services.numberseries.NumberSeriesService;
|
import info.bukova.isspst.services.numberseries.NumberSeriesService;
|
||||||
import info.bukova.isspst.services.requirement.RequirementTypeService;
|
import info.bukova.isspst.services.requirement.RequirementTypeService;
|
||||||
@@ -20,19 +20,17 @@ import info.bukova.isspst.services.settings.GlobalSettingsService;
|
|||||||
import info.bukova.isspst.services.users.PermissionService;
|
import info.bukova.isspst.services.users.PermissionService;
|
||||||
import info.bukova.isspst.services.users.RoleService;
|
import info.bukova.isspst.services.users.RoleService;
|
||||||
import info.bukova.isspst.services.users.UserService;
|
import info.bukova.isspst.services.users.UserService;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import javax.servlet.ServletContextEvent;
|
|
||||||
import javax.servlet.ServletContextListener;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.security.core.userdetails.UsernameNotFoundException;
|
import org.springframework.security.core.userdetails.UsernameNotFoundException;
|
||||||
import org.springframework.web.context.WebApplicationContext;
|
import org.springframework.web.context.WebApplicationContext;
|
||||||
import org.springframework.web.context.support.WebApplicationContextUtils;
|
import org.springframework.web.context.support.WebApplicationContextUtils;
|
||||||
|
|
||||||
|
import javax.servlet.ServletContextEvent;
|
||||||
|
import javax.servlet.ServletContextListener;
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class AppInitListener implements ServletContextListener {
|
public class AppInitListener implements ServletContextListener {
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import info.bukova.isspst.data.TripBill;
|
|||||||
import info.bukova.isspst.data.TripRequirement;
|
import info.bukova.isspst.data.TripRequirement;
|
||||||
import info.bukova.isspst.reporting.Report;
|
import info.bukova.isspst.reporting.Report;
|
||||||
import info.bukova.isspst.reporting.ReportMapping;
|
import info.bukova.isspst.reporting.ReportMapping;
|
||||||
import info.bukova.isspst.services.FullTextService;
|
import info.bukova.isspst.services.fulltext.FullTextService;
|
||||||
import info.bukova.isspst.services.addressbook.AdbService;
|
import info.bukova.isspst.services.addressbook.AdbService;
|
||||||
import info.bukova.isspst.services.buildings.BuildingService;
|
import info.bukova.isspst.services.buildings.BuildingService;
|
||||||
import info.bukova.isspst.services.invoicing.InvoicingService;
|
import info.bukova.isspst.services.invoicing.InvoicingService;
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
package info.bukova.isspst.data;
|
||||||
|
|
||||||
|
import org.hibernate.annotations.Type;
|
||||||
|
import org.hibernate.search.annotations.Analyze;
|
||||||
|
import org.hibernate.search.annotations.Field;
|
||||||
|
import org.hibernate.search.annotations.Index;
|
||||||
|
import org.hibernate.search.annotations.Indexed;
|
||||||
|
|
||||||
|
import javax.persistence.Column;
|
||||||
|
import javax.persistence.Entity;
|
||||||
|
import javax.persistence.GeneratedValue;
|
||||||
|
import javax.persistence.Id;
|
||||||
|
import javax.persistence.Table;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
@Entity
|
||||||
|
@Table(name = "FILE_CONTENTS")
|
||||||
|
@Indexed
|
||||||
|
public class FileContent {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@Column(name = "ID")
|
||||||
|
@GeneratedValue
|
||||||
|
private int id;
|
||||||
|
|
||||||
|
@Column(name = "CONTENT")
|
||||||
|
@Type(type = "text")
|
||||||
|
@Field(index = Index.YES, analyze = Analyze.YES)
|
||||||
|
private String plainText;
|
||||||
|
|
||||||
|
@Column(name = "CONTENT_TYPE")
|
||||||
|
private String contentType;
|
||||||
|
|
||||||
|
@Column(name = "PATH_IN_FILESYSTEM")
|
||||||
|
private String pathInFilesystem;
|
||||||
|
|
||||||
|
public int getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(int id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPlainText() {
|
||||||
|
return plainText;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPlainText(String content) {
|
||||||
|
this.plainText = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContentType() {
|
||||||
|
return contentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContentType(String contentType) {
|
||||||
|
this.contentType = contentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPathInFilesystem() {
|
||||||
|
return pathInFilesystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPathInFilesystem(String pathInFilesystem) {
|
||||||
|
this.pathInFilesystem = pathInFilesystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof FileContent)) return false;
|
||||||
|
|
||||||
|
FileContent that = (FileContent) o;
|
||||||
|
|
||||||
|
if (id != that.id) return false;
|
||||||
|
if (plainText != null ? !plainText.equals(that.plainText) : that.plainText != null) return false;
|
||||||
|
if (contentType != null ? !contentType.equals(that.contentType) : that.contentType != null) return false;
|
||||||
|
if (pathInFilesystem != null ? !pathInFilesystem.equals(that.pathInFilesystem) : that.pathInFilesystem != null)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int result = id;
|
||||||
|
result = 31 * result + (plainText != null ? plainText.hashCode() : 0);
|
||||||
|
result = 31 * result + (contentType != null ? contentType.hashCode() : 0);
|
||||||
|
result = 31 * result + (pathInFilesystem != null ? pathInFilesystem.hashCode() : 0);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,13 +1,17 @@
|
|||||||
package info.bukova.isspst.data;
|
package info.bukova.isspst.data;
|
||||||
|
|
||||||
import org.hibernate.annotations.Type;
|
|
||||||
import org.hibernate.search.annotations.Analyze;
|
import org.hibernate.search.annotations.Analyze;
|
||||||
import org.hibernate.search.annotations.Field;
|
import org.hibernate.search.annotations.Field;
|
||||||
import org.hibernate.search.annotations.Index;
|
import org.hibernate.search.annotations.Index;
|
||||||
import org.hibernate.search.annotations.Indexed;
|
import org.hibernate.search.annotations.Indexed;
|
||||||
|
import org.hibernate.search.annotations.IndexedEmbedded;
|
||||||
|
|
||||||
|
import javax.persistence.CascadeType;
|
||||||
import javax.persistence.Column;
|
import javax.persistence.Column;
|
||||||
import javax.persistence.Entity;
|
import javax.persistence.Entity;
|
||||||
|
import javax.persistence.FetchType;
|
||||||
|
import javax.persistence.JoinColumn;
|
||||||
|
import javax.persistence.ManyToOne;
|
||||||
import javax.persistence.Table;
|
import javax.persistence.Table;
|
||||||
|
|
||||||
@Entity
|
@Entity
|
||||||
@@ -16,23 +20,32 @@ import javax.persistence.Table;
|
|||||||
public class FileMetainfo extends BaseData {
|
public class FileMetainfo extends BaseData {
|
||||||
|
|
||||||
@Column(name = "FILE_NAME")
|
@Column(name = "FILE_NAME")
|
||||||
|
@Field(index = Index.YES, analyze = Analyze.YES)
|
||||||
private String fileName;
|
private String fileName;
|
||||||
@Column(name = "PATH_IN_FILESYSTEM")
|
|
||||||
private String pathInFilesystem;
|
|
||||||
@Column(name = "MODULE_ID")
|
@Column(name = "MODULE_ID")
|
||||||
private String moduleId;
|
private String moduleId;
|
||||||
|
|
||||||
@Column(name = "RECORD_ID")
|
@Column(name = "RECORD_ID")
|
||||||
private int recordId;
|
private int recordId;
|
||||||
@Column(name = "CONTENT")
|
|
||||||
@Type(type = "text")
|
@ManyToOne(fetch = FetchType.EAGER, cascade = CascadeType.ALL)
|
||||||
@Field(index = Index.YES, analyze = Analyze.YES)
|
@JoinColumn(name = "CONTENT_ID")
|
||||||
private String content;
|
@IndexedEmbedded
|
||||||
|
private FileContent content;
|
||||||
|
|
||||||
@Column(name = "MD5")
|
@Column(name = "MD5")
|
||||||
private String md5;
|
private String md5;
|
||||||
|
|
||||||
@Column(name = "DESCRIPTION")
|
@Column(name = "DESCRIPTION")
|
||||||
|
@Field(index = Index.YES, analyze = Analyze.YES)
|
||||||
private String description;
|
private String description;
|
||||||
@Column(name = "CONTENT_TYPE")
|
|
||||||
private String contentType;
|
private void ensureContentExists() {
|
||||||
|
if (content == null) {
|
||||||
|
content = new FileContent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String getFileName() {
|
public String getFileName() {
|
||||||
return fileName;
|
return fileName;
|
||||||
@@ -43,11 +56,15 @@ public class FileMetainfo extends BaseData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getPathInFilesystem() {
|
public String getPathInFilesystem() {
|
||||||
return pathInFilesystem;
|
if (content != null) {
|
||||||
|
return content.getPathInFilesystem();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPathInFilesystem(String pathInFilesystem) {
|
public void setPathInFilesystem(String pathInFilesystem) {
|
||||||
this.pathInFilesystem = pathInFilesystem;
|
ensureContentExists();
|
||||||
|
content.setPathInFilesystem(pathInFilesystem);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getModuleId() {
|
public String getModuleId() {
|
||||||
@@ -66,11 +83,11 @@ public class FileMetainfo extends BaseData {
|
|||||||
this.recordId = recordId;
|
this.recordId = recordId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getContent() {
|
public FileContent getContent() {
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setContent(String content) {
|
public void setContent(FileContent content) {
|
||||||
this.content = content;
|
this.content = content;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -91,11 +108,16 @@ public class FileMetainfo extends BaseData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getContentType() {
|
public String getContentType() {
|
||||||
return contentType;
|
if (content != null) {
|
||||||
|
return content.getContentType();
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setContentType(String contentType) {
|
public void setContentType(String contentType) {
|
||||||
this.contentType = contentType;
|
ensureContentExists();
|
||||||
|
content.setContentType(contentType);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -105,16 +127,12 @@ public class FileMetainfo extends BaseData {
|
|||||||
|
|
||||||
FileMetainfo that = (FileMetainfo) o;
|
FileMetainfo that = (FileMetainfo) o;
|
||||||
|
|
||||||
if (getId() != 0 && getId() != that.getId()) return false;
|
|
||||||
if (recordId != that.recordId) return false;
|
if (recordId != that.recordId) return false;
|
||||||
if (content != null ? !content.equals(that.content) : that.content != null) return false;
|
if (content != null ? !content.equals(that.content) : that.content != null) return false;
|
||||||
if (contentType != null ? !contentType.equals(that.contentType) : that.contentType != null) return false;
|
|
||||||
if (description != null ? !description.equals(that.description) : that.description != null) return false;
|
if (description != null ? !description.equals(that.description) : that.description != null) return false;
|
||||||
if (fileName != null ? !fileName.equals(that.fileName) : that.fileName != null) return false;
|
if (fileName != null ? !fileName.equals(that.fileName) : that.fileName != null) return false;
|
||||||
if (md5 != null ? !md5.equals(that.md5) : that.md5 != null) return false;
|
if (md5 != null ? !md5.equals(that.md5) : that.md5 != null) return false;
|
||||||
if (moduleId != null ? !moduleId.equals(that.moduleId) : that.moduleId != null) return false;
|
if (moduleId != null ? !moduleId.equals(that.moduleId) : that.moduleId != null) return false;
|
||||||
if (pathInFilesystem != null ? !pathInFilesystem.equals(that.pathInFilesystem) : that.pathInFilesystem != null)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -122,13 +140,11 @@ public class FileMetainfo extends BaseData {
|
|||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int result = fileName != null ? fileName.hashCode() : 0;
|
int result = fileName != null ? fileName.hashCode() : 0;
|
||||||
result = 31 * result + (pathInFilesystem != null ? pathInFilesystem.hashCode() : 0);
|
|
||||||
result = 31 * result + (moduleId != null ? moduleId.hashCode() : 0);
|
result = 31 * result + (moduleId != null ? moduleId.hashCode() : 0);
|
||||||
result = 31 * result + recordId;
|
result = 31 * result + recordId;
|
||||||
result = 31 * result + (content != null ? content.hashCode() : 0);
|
result = 31 * result + (content != null ? content.hashCode() : 0);
|
||||||
result = 31 * result + (md5 != null ? md5.hashCode() : 0);
|
result = 31 * result + (md5 != null ? md5.hashCode() : 0);
|
||||||
result = 31 * result + (description != null ? description.hashCode() : 0);
|
result = 31 * result + (description != null ? description.hashCode() : 0);
|
||||||
result = 31 * result + (contentType != null ? contentType.hashCode() : 0);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public abstract class AbstractExtractor implements Extractor {
|
||||||
|
|
||||||
|
public String extract(byte[] data) {
|
||||||
|
return extract(new ByteArrayInputStream(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public abstract class AbstractOfficeExtractor extends AbstractExtractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String extract(InputStream is) throws ExtractorException {
|
||||||
|
try {
|
||||||
|
POIXMLTextExtractor extractor = createExtractor(is);
|
||||||
|
return extractor.getText();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ExtractorException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract POIXMLTextExtractor createExtractor(InputStream is) throws IOException;
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public class ExcelExtractor extends AbstractOfficeExtractor implements Extractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected POIXMLTextExtractor createExtractor(InputStream is) throws IOException {
|
||||||
|
return new XSSFExcelExtractor(new XSSFWorkbook(is));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*
|
||||||
|
* Rozhraní extractoru čistého textu z formátů Office a PDF
|
||||||
|
*/
|
||||||
|
public interface Extractor {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extrahuje text z předaného pole bytů
|
||||||
|
*
|
||||||
|
* @param data zdrajová data
|
||||||
|
* @return čistý text
|
||||||
|
* @throws ExtractorException
|
||||||
|
*/
|
||||||
|
public String extract(byte[] data) throws ExtractorException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extrahuje text z předaného InputStream objektu
|
||||||
|
*
|
||||||
|
* @param is zdrojový InputStream
|
||||||
|
* @return čistý text
|
||||||
|
* @throws ExtractorException
|
||||||
|
*/
|
||||||
|
public String extract(InputStream is) throws ExtractorException;
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import info.bukova.isspst.services.IsspstException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*
|
||||||
|
* Výjimka extrakce textu
|
||||||
|
*/
|
||||||
|
public class ExtractorException extends IsspstException {
|
||||||
|
|
||||||
|
public ExtractorException(Throwable cause) {
|
||||||
|
super("Extractor exception: ", cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*
|
||||||
|
* Factory pro konkrétní extractor
|
||||||
|
*/
|
||||||
|
public class ExtractorFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Vytvoří extractor podle předaného content typu
|
||||||
|
*
|
||||||
|
* @param contentType
|
||||||
|
* @return Extractor
|
||||||
|
*/
|
||||||
|
public static Extractor createExtractor(String contentType) {
|
||||||
|
if (contentType.equals("application/vnd.oasis.opendocument.text")
|
||||||
|
|| contentType.equals("application/vnd.oasis.opendocument.spreadsheet")
|
||||||
|
|| contentType.equals("application/vnd.oasis.opendocument.presentation")) {
|
||||||
|
return new OdfExtractor();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
|
||||||
|
return new WordExtractor();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
|
||||||
|
return new ExcelExtractor();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contentType.equals("application/vnd.openxmlformats-officedocument.presentationml.slideshow")) {
|
||||||
|
return new PowerPointExtractor();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contentType.equals("application/pdf")) {
|
||||||
|
return new PdfExtractor();
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
+3
-3
@@ -1,10 +1,10 @@
|
|||||||
package info.bukova.isspst.services;
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.hibernate.search.annotations.Field;
|
import org.hibernate.search.annotations.Field;
|
||||||
import org.hibernate.search.annotations.Indexed;
|
import org.hibernate.search.annotations.Indexed;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Pepa Rokos
|
* @author Pepa Rokos
|
||||||
*
|
*
|
||||||
+2
-1
@@ -1,9 +1,10 @@
|
|||||||
package info.bukova.isspst.services;
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
import info.bukova.isspst.ModuleUtils;
|
import info.bukova.isspst.ModuleUtils;
|
||||||
import info.bukova.isspst.dao.QueryDao;
|
import info.bukova.isspst.dao.QueryDao;
|
||||||
import info.bukova.isspst.data.BaseData;
|
import info.bukova.isspst.data.BaseData;
|
||||||
import info.bukova.isspst.data.User;
|
import info.bukova.isspst.data.User;
|
||||||
|
import info.bukova.isspst.services.ModuleNotActiveException;
|
||||||
import info.bukova.isspst.sort.ReflectionTools;
|
import info.bukova.isspst.sort.ReflectionTools;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.hibernate.Hibernate;
|
import org.hibernate.Hibernate;
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import org.odftoolkit.simple.Document;
|
||||||
|
import org.odftoolkit.simple.common.TextExtractor;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public class OdfExtractor extends AbstractExtractor implements Extractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String extract(InputStream is) throws ExtractorException {
|
||||||
|
try {
|
||||||
|
Document odfDocument = Document.loadDocument(is);
|
||||||
|
TextExtractor extractor = TextExtractor.newOdfTextExtractor(odfDocument.getContentRoot());
|
||||||
|
|
||||||
|
return extractor.getText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ExtractorException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import com.lowagie.text.pdf.PdfReader;
|
||||||
|
import com.lowagie.text.pdf.parser.PdfTextExtractor;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public class PdfExtractor extends AbstractExtractor implements Extractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String extract(InputStream is) throws ExtractorException {
|
||||||
|
try {
|
||||||
|
PdfReader reader = new PdfReader(is);
|
||||||
|
PdfTextExtractor extractor = new PdfTextExtractor(reader);
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
|
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
|
||||||
|
sb.append(extractor.getTextFromPage(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ExtractorException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||||
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public class PowerPointExtractor extends AbstractOfficeExtractor implements Extractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected POIXMLTextExtractor createExtractor(InputStream is) throws IOException {
|
||||||
|
return new XSLFPowerPointExtractor(new XMLSlideShow(is));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
package info.bukova.isspst.services.fulltext;
|
||||||
|
|
||||||
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Pepa Rokos
|
||||||
|
*/
|
||||||
|
public class WordExtractor extends AbstractOfficeExtractor implements Extractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected POIXMLTextExtractor createExtractor(InputStream is) throws IOException {
|
||||||
|
return new XWPFWordExtractor(new XWPFDocument(is));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,8 @@ package info.bukova.isspst.storage;
|
|||||||
|
|
||||||
import info.bukova.isspst.dao.QueryDao;
|
import info.bukova.isspst.dao.QueryDao;
|
||||||
import info.bukova.isspst.data.FileMetainfo;
|
import info.bukova.isspst.data.FileMetainfo;
|
||||||
|
import info.bukova.isspst.services.fulltext.Extractor;
|
||||||
|
import info.bukova.isspst.services.fulltext.ExtractorFactory;
|
||||||
import org.apache.commons.codec.binary.Hex;
|
import org.apache.commons.codec.binary.Hex;
|
||||||
import org.hibernate.Query;
|
import org.hibernate.Query;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
@@ -68,6 +70,15 @@ public class DocumentFileStorageImpl extends AbstractFileStorage<FileMetainfo> i
|
|||||||
return fileName;
|
return fileName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void extractContent(InputStream is, FileMetainfo fileID) {
|
||||||
|
Extractor extractor = ExtractorFactory.createExtractor(fileID.getContentType());
|
||||||
|
|
||||||
|
if (extractor != null) {
|
||||||
|
fileID.getContent().setPlainText(extractor.extract(is));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Transactional
|
@Transactional
|
||||||
public void removeFile(FileMetainfo fileID) {
|
public void removeFile(FileMetainfo fileID) {
|
||||||
@@ -126,6 +137,7 @@ public class DocumentFileStorageImpl extends AbstractFileStorage<FileMetainfo> i
|
|||||||
|
|
||||||
if (!checkForDuplicate(new ByteArrayInputStream(data), metaInfo)) {
|
if (!checkForDuplicate(new ByteArrayInputStream(data), metaInfo)) {
|
||||||
saveFile(data, metaInfo);
|
saveFile(data, metaInfo);
|
||||||
|
extractContent(new ByteArrayInputStream(data), metaInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
return metaInfo;
|
return metaInfo;
|
||||||
@@ -140,6 +152,7 @@ public class DocumentFileStorageImpl extends AbstractFileStorage<FileMetainfo> i
|
|||||||
try {
|
try {
|
||||||
if (!checkForDuplicate(new FileInputStream(file), metaInfo)) {
|
if (!checkForDuplicate(new FileInputStream(file), metaInfo)) {
|
||||||
saveFile(file, metaInfo);
|
saveFile(file, metaInfo);
|
||||||
|
extractContent(new FileInputStream(file), metaInfo);
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
//TODO: ošetřit
|
//TODO: ošetřit
|
||||||
@@ -192,9 +205,8 @@ public class DocumentFileStorageImpl extends AbstractFileStorage<FileMetainfo> i
|
|||||||
|
|
||||||
if (!found.isEmpty()) {
|
if (!found.isEmpty()) {
|
||||||
FileMetainfo foundInfo = found.get(0);
|
FileMetainfo foundInfo = found.get(0);
|
||||||
info.setPathInFilesystem(foundInfo.getPathInFilesystem());
|
|
||||||
info.setMd5(foundInfo.getMd5());
|
info.setMd5(foundInfo.getMd5());
|
||||||
info.setContentType(foundInfo.getContentType());
|
info.setContent(foundInfo.getContent());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -231,7 +231,7 @@ public class MimeTypes {
|
|||||||
if (mimeTypes.size() == 0)
|
if (mimeTypes.size() == 0)
|
||||||
{
|
{
|
||||||
HashMap<String, String> tempMap = new HashMap<String, String>();
|
HashMap<String, String> tempMap = new HashMap<String, String>();
|
||||||
InputStream is = MimeTypes.class.getResourceAsStream("mime.types.properties");
|
InputStream is = MimeTypes.class.getResourceAsStream("/mime.types.properties");
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
Properties properties = new Properties();
|
Properties properties = new Properties();
|
||||||
@@ -248,6 +248,7 @@ public class MimeTypes {
|
|||||||
}
|
}
|
||||||
catch (IOException e)
|
catch (IOException e)
|
||||||
{
|
{
|
||||||
|
//ToDo: ošetřit
|
||||||
//Debug.error(e);
|
//Debug.error(e);
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
@@ -258,6 +259,7 @@ public class MimeTypes {
|
|||||||
}
|
}
|
||||||
catch (IOException e)
|
catch (IOException e)
|
||||||
{
|
{
|
||||||
|
//ToDo: ošetřit
|
||||||
//Debug.error(e);
|
//Debug.error(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
package info.bukova.isspst.ui.search;
|
package info.bukova.isspst.ui.search;
|
||||||
|
|
||||||
import info.bukova.isspst.UrlResolverHolder;
|
import info.bukova.isspst.UrlResolverHolder;
|
||||||
import info.bukova.isspst.services.FullTextService;
|
import info.bukova.isspst.services.fulltext.FullTextService;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.zkoss.bind.annotation.Command;
|
import org.zkoss.bind.annotation.Command;
|
||||||
import org.zkoss.bind.annotation.NotifyChange;
|
import org.zkoss.bind.annotation.NotifyChange;
|
||||||
import org.zkoss.zk.ui.select.annotation.WireVariable;
|
import org.zkoss.zk.ui.select.annotation.WireVariable;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class SearchForm {
|
public class SearchForm {
|
||||||
|
|
||||||
@WireVariable
|
@WireVariable
|
||||||
|
|||||||
@@ -34,5 +34,6 @@
|
|||||||
<mapping class="info.bukova.isspst.data.Invoicing"></mapping>
|
<mapping class="info.bukova.isspst.data.Invoicing"></mapping>
|
||||||
<mapping class="info.bukova.isspst.data.InvoicingItem"></mapping>
|
<mapping class="info.bukova.isspst.data.InvoicingItem"></mapping>
|
||||||
<mapping class="info.bukova.isspst.data.FileMetainfo"></mapping>
|
<mapping class="info.bukova.isspst.data.FileMetainfo"></mapping>
|
||||||
|
<mapping class="info.bukova.isspst.data.FileContent"></mapping>
|
||||||
</session-factory>
|
</session-factory>
|
||||||
</hibernate-configuration>
|
</hibernate-configuration>
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -107,7 +107,7 @@
|
|||||||
<entry key="#{T(info.bukova.isspst.data.Requirement)}" value-ref="reqEditEval"/>
|
<entry key="#{T(info.bukova.isspst.data.Requirement)}" value-ref="reqEditEval"/>
|
||||||
<entry key="#{T(info.bukova.isspst.data.TripRequirement)}" value-ref="tripReqEditEval"/>
|
<entry key="#{T(info.bukova.isspst.data.TripRequirement)}" value-ref="tripReqEditEval"/>
|
||||||
<entry key="#{T(info.bukova.isspst.data.User)}" value-ref="userEvaluator"/>
|
<entry key="#{T(info.bukova.isspst.data.User)}" value-ref="userEvaluator"/>
|
||||||
<entry key="#{T(info.bukova.isspst.services.FullTextService)}" value-ref="serviceEval"/>
|
<entry key="#{T(info.bukova.isspst.services.fulltext.FullTextService)}" value-ref="serviceEval"/>
|
||||||
</map>
|
</map>
|
||||||
</property>
|
</property>
|
||||||
<property name="specialEvaluators">
|
<property name="specialEvaluators">
|
||||||
@@ -181,7 +181,7 @@
|
|||||||
<bean id="documentStorage" class="info.bukova.isspst.storage.DocumentFileStorageImpl">
|
<bean id="documentStorage" class="info.bukova.isspst.storage.DocumentFileStorageImpl">
|
||||||
<property name="rootPath" value="${storage.root}"/>
|
<property name="rootPath" value="${storage.root}"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<!-- Session data -->
|
<!-- Session data -->
|
||||||
<bean id="sessionData" class="info.bukova.isspst.SessionData" scope="session">
|
<bean id="sessionData" class="info.bukova.isspst.SessionData" scope="session">
|
||||||
<aop:scoped-proxy/>
|
<aop:scoped-proxy/>
|
||||||
@@ -440,6 +440,6 @@
|
|||||||
<property name="validator" ref="validator"/>
|
<property name="validator" ref="validator"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<bean id="fulltextService" class="info.bukova.isspst.services.FullTextServiceImpl"/>
|
<bean id="fulltextService" class="info.bukova.isspst.services.fulltext.FullTextServiceImpl"/>
|
||||||
|
|
||||||
</beans>
|
</beans>
|
||||||
|
|||||||
Reference in New Issue
Block a user