diff --git a/RELEASE.md b/RELEASE.md
index 89a99856..66e12490 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -13,6 +13,9 @@ Visit https://github.com/OpenSextant/Xponents/releases for latest library releas
were replaced.
* Migrated to a separate repo: Find this now maintained separately at [`Xponents-Core` repo](https://github.com/OpenSextant/Xponents-Core)
This is the first release ./Core/ folder will not appear in this source tree.
+* **Xponents REST**:
+ * Noise filtering; added `minlen` parameter to Xlayer (Xponents REST) service. By default it filters out trivial stuff. About 3-5% reduction in
+ noise tags from codes, short names, etc, -- things typically not useful geocodings nor typically names of places.
**Xponents 3.6.7**: Springy
* **Core API**
diff --git a/pom.xml b/pom.xml
index 3c94e50d..c50bd36a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
4.0.0
org.opensextant
opensextant-xponents
- 3.7.3
+ 3.7.4
jar
OpenSextant Xponents
An information extraction toolkit focused on geography and temporal entities
diff --git a/src/main/java/org/opensextant/output/Transforms.java b/src/main/java/org/opensextant/output/Transforms.java
index 5660f508..9861c370 100644
--- a/src/main/java/org/opensextant/output/Transforms.java
+++ b/src/main/java/org/opensextant/output/Transforms.java
@@ -296,6 +296,17 @@ private static JsonObject populateMatch(final TextMatch m) {
return o;
}
+ private static JsonArray populateTaxon(List taxons) {
+ JsonArray arr = new JsonArray();
+ for (Taxon t : taxons) {
+ JsonObject tj = new JsonObject();
+ tj.put("cat", t.catalog);
+ tj.put("taxon", t.name);
+ arr.add(tj);
+ }
+ return arr;
+ }
+
/**
* Return seconds of epoch.
*
@@ -349,8 +360,11 @@ public static JsonObject toJSON(final List matches, final Parameters
// Only get one taxon from this match. That is sufficient, but not perfect.
Taxon n = match.getTaxons().get(0);
JsonObject node = populateMatch(name);
- node.put("taxon", n.name); // Name of taxon
+ node.put("taxon", n.name); // Name of taxon, just the first and most common.
node.put("catalog", n.catalog); // Name of catalog or source
+ if (match.getTaxons().size() > 1) {
+ node.put("all-taxons", populateTaxon(match.getTaxons())); // Name of taxon, just the first and most common.
+ }
node.put("method", "TaxonMatcher");
resultArray.add(node);
}
diff --git a/src/main/java/org/opensextant/xlayer/server/TaggerResource.java b/src/main/java/org/opensextant/xlayer/server/TaggerResource.java
index a3a8fedf..33fec9c3 100644
--- a/src/main/java/org/opensextant/xlayer/server/TaggerResource.java
+++ b/src/main/java/org/opensextant/xlayer/server/TaggerResource.java
@@ -128,7 +128,7 @@ protected void parseParameters(Parameters p, Set kv) {
}
// Request tagging on demand.
- p.tag_all_taxons = kv.contains("all-taxons");
+ p.tag_all_taxons = kv.contains("all_taxons");
p.tag_taxons = (kv.contains("taxons") || kv.contains("orgs") || kv.contains("persons"));
p.tag_patterns = kv.contains("patterns") || kv.contains("dates");
diff --git a/src/main/java/org/opensextant/xlayer/server/xgeo/XlayerRestlet.java b/src/main/java/org/opensextant/xlayer/server/xgeo/XlayerRestlet.java
index 557a649b..36366bec 100644
--- a/src/main/java/org/opensextant/xlayer/server/xgeo/XlayerRestlet.java
+++ b/src/main/java/org/opensextant/xlayer/server/xgeo/XlayerRestlet.java
@@ -7,6 +7,7 @@
import org.opensextant.extraction.MatchFilter;
import org.opensextant.extractors.geo.PlaceGeocoder;
import org.opensextant.extractors.geo.PostalGeocoder;
+import org.opensextant.extractors.xtax.TaxonMatcher;
import org.opensextant.extractors.xtemporal.XTemporal;
import org.opensextant.processing.Parameters;
import org.opensextant.xlayer.server.XlayerApp;
@@ -39,9 +40,10 @@ public synchronized Restlet createInboundRoot() {
banner();
configure();
Context ctx = getContext();
- ctx.getAttributes().put("xgeo", tagger);
- ctx.getAttributes().put("xtemp", dateTagger);
- ctx.getAttributes().put("xpostal", postalGeocoder);
+ ctx.getAttributes().put(XponentsGeotagger.GEO_TAGGER, tagger);
+ ctx.getAttributes().put(XponentsGeotagger.DATE_TAGGER, dateTagger);
+ ctx.getAttributes().put(XponentsGeotagger.POSTAL_TAGGER, postalGeocoder);
+ ctx.getAttributes().put(XponentsGeotagger.TAXON_TAGGER, phraseTagger);
ctx.getAttributes().put("version", this.version);
info("%%%% Xponents Geo Phase Configured");
@@ -59,6 +61,7 @@ public synchronized Restlet createInboundRoot() {
private PlaceGeocoder tagger = null;
private XTemporal dateTagger = null;
private PostalGeocoder postalGeocoder = null;
+ private TaxonMatcher phraseTagger = null;
/**
* @throws ConfigException
@@ -73,6 +76,8 @@ public void configure() throws ConfigException {
tagger.setParameters(taggerParams);
tagger.configure();
+ phraseTagger = new TaxonMatcher();
+
// TODO: refine this filter list. Use "/filters/non-placenames,user.csv" going forward.
//
String userFilterPath = "/filters/non-placenames,user.csv";
diff --git a/src/main/java/org/opensextant/xlayer/server/xgeo/XponentsGeotagger.java b/src/main/java/org/opensextant/xlayer/server/xgeo/XponentsGeotagger.java
index f7e658cf..23321141 100644
--- a/src/main/java/org/opensextant/xlayer/server/xgeo/XponentsGeotagger.java
+++ b/src/main/java/org/opensextant/xlayer/server/xgeo/XponentsGeotagger.java
@@ -12,6 +12,7 @@
import org.opensextant.extractors.geo.PlaceCandidate;
import org.opensextant.extractors.geo.PlaceGeocoder;
import org.opensextant.extractors.geo.PostalGeocoder;
+import org.opensextant.extractors.xtax.TaxonMatcher;
import org.opensextant.extractors.xtemporal.XTemporal;
import org.opensextant.output.Transforms;
import org.opensextant.processing.Parameters;
@@ -157,25 +158,28 @@ public Representation process(TextInput input, Parameters jobParams) {
List matches = new ArrayList<>();
// BOTH geo and taxons could be requested: features = "geo", "all-taxons"
- // `geo` tagging
- if (tag_geo(jobParams) || tag_taxons(jobParams)) {
+ if (!tag_geo(jobParams) && tag_taxons(jobParams)) {
+ // Taxonomic tags only
+ TaxonMatcher phraseTagger = (TaxonMatcher) getExtractor(TAXON_TAGGER);
+ matches.addAll(phraseTagger.extract(input, jobParams));
+ } else if (tag_geo(jobParams) || tag_taxons(jobParams)) {
+ // Geotagging
PlaceGeocoder xgeo = (PlaceGeocoder) getExtractor(GEO_TAGGER);
matches.addAll(xgeo.extract(input, jobParams));
}
+
if (jobParams.tag_patterns) {
XTemporal xt = (XTemporal) getExtractor(DATE_TAGGER);
matches.addAll(xt.extract(input));
}
if (jobParams.tag_postal) {
PostalGeocoder pg = (PostalGeocoder) getExtractor(POSTAL_TAGGER);
- if (pg != null) {
- if (tag_geo(jobParams)) {
- // OPTIMIZATION: reuse matches accumulated so far to prevent
- // PostalGeocoder from repeating extract()
- pg.setGeneralMatches(matches);
- }
- matches.addAll(pg.extract(input));
+ if (tag_geo(jobParams)) {
+ // OPTIMIZATION: reuse matches accumulated so far to prevent
+ // PostalGeocoder from repeating extract()
+ pg.setGeneralMatches(matches);
}
+ matches.addAll(pg.extract(input));
}
if (isDebug()) {
debug(String.format("CURRENT MEM USAGE(K)=%d", RuntimeTools.reportMemory()));
diff --git a/src/main/resources/banner.txt b/src/main/resources/banner.txt
index 35c8171b..c774d196 100644
--- a/src/main/resources/banner.txt
+++ b/src/main/resources/banner.txt
@@ -12,5 +12,5 @@
//
// Xponents Geotagger version 3.7 (2024-Q1)
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
-VERSION: 3.7.3
+VERSION: 3.7.4