Skip to content
5 changes: 5 additions & 0 deletions docs/changelog/120824.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 120824
summary: Optimize some per-document hot paths in the geoip processor
area: Ingest Node
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDefaultDatabases;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
Expand Down Expand Up @@ -172,10 +173,15 @@ public void testInvalidTimestamp() throws Exception {
for (Path geoIpTmpDir : geoIpTmpDirs) {
try (Stream<Path> files = Files.list(geoIpTmpDir)) {
Set<String> names = files.map(f -> f.getFileName().toString()).collect(Collectors.toSet());
assertThat(names, not(hasItem("GeoLite2-ASN.mmdb")));
assertThat(names, not(hasItem("GeoLite2-City.mmdb")));
assertThat(names, not(hasItem("GeoLite2-Country.mmdb")));
assertThat(names, not(hasItem("MyCustomGeoLite2-City.mmdb")));
assertThat(
names,
allOf(
not(hasItem("GeoLite2-ASN.mmdb")),
not(hasItem("GeoLite2-City.mmdb")),
not(hasItem("GeoLite2-Country.mmdb")),
not(hasItem("MyCustomGeoLite2-City.mmdb"))
)
);
}
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ public class DatabaseReaderLazyLoader implements IpDatabase {
private volatile boolean deleteDatabaseFileOnShutdown;
private final AtomicInteger currentUsages = new AtomicInteger(0);

// it seems insane, especially if you read the code for UnixPath, but calling toString on a path in advance here is faster enough
// than calling it on every call to cache.putIfAbsent that it makes the slight additional internal complication worth it
private final String cachedDatabasePathToString;

DatabaseReaderLazyLoader(GeoIpCache cache, Path databasePath, String md5) {
this.cache = cache;
this.databasePath = Objects.requireNonNull(databasePath);
Expand All @@ -61,6 +65,9 @@ public class DatabaseReaderLazyLoader implements IpDatabase {
this.databaseReader = new SetOnce<>();
this.databaseType = new SetOnce<>();
this.buildDate = new SetOnce<>();

// cache the toString on construction
this.cachedDatabasePathToString = databasePath.toString();
}

/**
Expand Down Expand Up @@ -99,7 +106,7 @@ int current() {
@Override
@Nullable
public <RESPONSE> RESPONSE getResponse(String ipAddress, CheckedBiFunction<Reader, String, RESPONSE, Exception> responseProvider) {
return cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
return cache.putIfAbsent(ipAddress, cachedDatabasePathToString, ip -> {
try {
return responseProvider.apply(get(), ipAddress);
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,32 @@ public static Metadata fromXContent(XContentParser parser) {
}

public boolean isCloseToExpiration() {
return Instant.ofEpochMilli(lastCheck).isBefore(Instant.now().minus(25, ChronoUnit.DAYS));
final Instant now = Instant.ofEpochMilli(System.currentTimeMillis()); // millisecond precision is sufficient (and faster)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could pass in a ThreadPool, and use ThreadPool::absoluteTimeInMillis, which would be significantly faster since it's cached. Same goes in the other place below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. I'll see if I can make that happen.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't make it pass tests! I'm going to open a new ticket to track it.

return Instant.ofEpochMilli(lastCheck).isBefore(now.minus(25, ChronoUnit.DAYS));
}

// these constants support the micro optimization below, see that note
private static final TimeValue THIRTY_DAYS = TimeValue.timeValueDays(30);
private static final long THIRTY_DAYS_MILLIS = THIRTY_DAYS.millis();

public boolean isNewEnough(Settings settings) {
TimeValue valid = settings.getAsTime("ingest.geoip.database_validity", TimeValue.timeValueDays(30));
return Instant.ofEpochMilli(lastCheck).isAfter(Instant.now().minus(valid.getMillis(), ChronoUnit.MILLIS));
// micro optimization: this looks a little silly, but the expected case is that database_validity is only used in tests.
// we run this code on every document, though, so the argument checking and other bits that getAsTime does is enough
// to show up in a flame graph.

// if you grep for "ingest.geoip.database_validity" and you'll see that it's not a 'real' setting -- it's only defined in
// AbstractGeoIpIT, that's why it's an inline string constant here and no some static final, and also why it cannot
// be the case that this setting exists in a real running cluster

final long valid;
if (settings.hasValue("ingest.geoip.database_validity")) {
valid = settings.getAsTime("ingest.geoip.database_validity", THIRTY_DAYS).millis();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this slow only when there is no value (b/c we generate an exception or something)? Or is it always slow? If the latter, should we calculate this outside of this method, and have a settings listener listening for updates to the setting?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added another comment here in the code -- the setting itself is kindof a hack to get around the situation where we want to be able to test this code but we can't make it possible to actually change the value of the setting in 'the real world'.

I'm not at all opposed to continuing to grind on this, but I'd love to do that in a different PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the latter, should we calculate this outside of this method, and have a settings listener listening for updates to the setting?

I took a closer look at the idea of the settings listener, and I think the reason we're not doing any of that right now is specifically because the setting in question isn't a 'real setting' -- AFAICT all the settings update listener type methods require a proper Setting object to hang off of, which we don't have in this case. 😬

Somebody could probably get pretty far by extracting our current code and manually watching for settings changes like this:

+ clusterService.addListener(event -> { + if (event.metadataChanged() && event.state().metadata().settings() != event.previousState().metadata().settings()) { + logger.info("metadata changed and settings changed"); + [... process the settings update here, check if the database validity has changed, etc ...] + } + }); 
} else {
valid = THIRTY_DAYS_MILLIS;
}

final Instant now = Instant.ofEpochMilli(System.currentTimeMillis()); // millisecond precision is sufficient (and faster)
return Instant.ofEpochMilli(lastCheck).isAfter(now.minus(valid, ChronoUnit.MILLIS));
}

@Override
Expand Down