Skip to content
This repository was archived by the owner on Oct 18, 2024. It is now read-only.

Commit dfb4f78

Browse files
committed
fix(level-hash/persistent): auto-expand values file in case of overflow
1 parent c300fc8 commit dfb4f78

File tree

8 files changed

+165
-51
lines changed

8 files changed

+165
-51
lines changed

utilities/level-hash/src/main/java/com/itsaky/androidide/levelhash/DataIO.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ interface RandomAccessIO {
5555
*/
5656
fun position(position: Long)
5757

58+
/**
59+
* Try to move to the given position.
60+
*
61+
* @return `true` if the position was moved successfully, `false` otherwise.
62+
*/
63+
fun tryPosition(position: Long): Boolean
64+
5865
/**
5966
* Seek the position by given count.
6067
*/

utilities/level-hash/src/main/java/com/itsaky/androidide/levelhash/LevelHash.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package com.itsaky.androidide.levelhash
1919

20+
import androidx.annotation.FloatRange
21+
2022
/**
2123
* Level hash is a write-optimized and high-performance hashing index scheme with cost-efficient
2224
* resizing and low-overhead consistency guarantee for persistent memory.
@@ -136,6 +138,17 @@ interface LevelHash<K : Any, V : Any?> : AutoCloseable {
136138
*/
137139
fun expand(addtionalLevelSize: Int): Boolean
138140

141+
/**
142+
* Get the load factor of the level hash.
143+
*
144+
* @return The load factor. Load factor is a measure of how full the level
145+
* hash is. It is calculated as the ratio of the number of occupied slots to
146+
* the total number of slots in the level hash. Return value lies between 0.0
147+
* and 1.0.
148+
*/
149+
@FloatRange(from = 0.0, to = 1.0)
150+
fun loadFactor(): Float
151+
139152
/**
140153
* The two levels in a [LevelHash].
141154
*/

utilities/level-hash/src/main/java/com/itsaky/androidide/levelhash/internal/AbstractLevelHash.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ internal abstract class AbstractLevelHash<K : Any, V : Any?> internal constructo
102102
return getSlot(level.index, bucketIdx, slotIdx)
103103
}
104104

105+
override fun loadFactor(): Float {
106+
return levelItemCounts.sum() / totalSlotCount.toFloat()
107+
}
108+
105109
override fun get(key: K): V? {
106110
return findSlot(key)?.value
107111
}
@@ -186,7 +190,6 @@ internal abstract class AbstractLevelHash<K : Any, V : Any?> internal constructo
186190
val emptySlot = getSlot(Level.TOP, sidx, emptyLocation)
187191
emptySlot.reset(key, value)
188192
levelItemCounts[Level.TOP.index]++
189-
190193
return true
191194
}
192195
}
@@ -201,7 +204,6 @@ internal abstract class AbstractLevelHash<K : Any, V : Any?> internal constructo
201204
slot.reset(null, null)
202205
return oldValue
203206
}
204-
205207
return null
206208
}
207209

utilities/level-hash/src/main/java/com/itsaky/androidide/levelhash/internal/PersistentLevelHashIO.kt

Lines changed: 114 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@ package com.itsaky.androidide.levelhash.internal
2020
import com.itsaky.androidide.levelhash.DataExternalizer
2121
import com.itsaky.androidide.levelhash.LevelHash.ResizeFailure
2222
import com.itsaky.androidide.levelhash.seekInt
23-
import com.itsaky.androidide.levelhash.seekShort
2423
import com.itsaky.androidide.levelhash.util.DataExternalizers.SIZE_INT
2524
import com.itsaky.androidide.levelhash.util.DataExternalizers.SIZE_LONG
26-
import com.itsaky.androidide.levelhash.util.DataExternalizers.SIZE_SHORT
2725
import com.itsaky.androidide.levelhash.util.FileUtils
2826
import com.itsaky.androidide.levelhash.util.MappedRandomAccessIO
2927
import org.slf4j.LoggerFactory
@@ -98,7 +96,7 @@ private val logger = LoggerFactory.getLogger(PersistentLevelHashIO::class.java)
9896
* value values[];
9997
*
10098
* value {
101-
* u16 entry_size;
99+
* u32 entry_size;
102100
* u32 key_size;
103101
* u8 key[key_size];
104102
* u32 value_size;
@@ -165,10 +163,11 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
165163
// even multiple threads, the current position cannot be determined
166164

167165
internal val metaIo =
168-
PersistentMetaIO(File(indexFile.parentFile, "${indexFile.name}._meta"), levelSize, bucketSize)
166+
PersistentMetaIO(File(indexFile.parentFile, "${indexFile.name}._meta"),
167+
levelSize, bucketSize)
169168
private val keymapFile = File(indexFile.parentFile, "${indexFile.name}._i")
170169
private val raKeymapFile by lazy { RandomAccessFile(keymapFile, "rw") }
171-
private val raIndexFile by lazy { RandomAccessFile(indexFile, "rw") }
170+
private val raValuesFile by lazy { RandomAccessFile(indexFile, "rw") }
172171

173172
private val keymapIo = MappedRandomAccessIO()
174173
private var valIo = MappedRandomAccessIO()
@@ -192,26 +191,54 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
192191
magicNumber = VALUES_MAGIC_NUMBER)
193192

194193
// the header region is not memory mapped
195-
var buffer = raIndexFile.channel.map(FileChannel.MapMode.READ_WRITE,
196-
VALUES_HEADER_SIZE_BYTES, metaIo.valuesFileSize)
197-
valIo.reset(buffer, 0L, metaIo.valuesFileSize)
198-
199-
val keymapSize = metaIo.keymapSize
200-
buffer = raKeymapFile.channel.map(FileChannel.MapMode.READ_WRITE,
201-
KEYMAP_HEADER_SIZE_BYTES, keymapSize)
202-
keymapIo.reset(buffer, 0L, keymapSize)
194+
valuesRemap(size = metaIo.valuesFileSize)
195+
keymapResize(newSize = metaIo.keymapSize)
203196
}
204197

205198
private fun realValueOffset(offset: Long) = VALUES_HEADER_SIZE_BYTES + offset
206199

207200
private fun realKeymapOffset(offset: Long) = KEYMAP_HEADER_SIZE_BYTES + offset
208201

209202
private fun valuesDeallocate(offset: Long, len: Long) =
210-
FileUtils.deallocate(raIndexFile, realValueOffset(offset), len)
203+
FileUtils.deallocate(raValuesFile, realValueOffset(offset), len)
211204

212205
private fun keymapDeallocate(offset: Long, len: Long) =
213206
FileUtils.deallocate(raKeymapFile, realKeymapOffset(offset), len)
214207

208+
/**
209+
* Resize the values file so that its size becomes [newSize] in bytes. This is
210+
* risky and should be used carefully. Resizing the file incorrectly may lead
211+
* to data loss. The [valIo] is reset to position 0 after this operation.
212+
*
213+
* @param newSize The new size of the values file, in bytes.
214+
*/
215+
private fun valuesResize(newSize: Long) {
216+
metaIo.valuesFileSize = newSize
217+
raValuesFile.setLength(newSize)
218+
valuesRemap(newSize)
219+
}
220+
221+
private fun keymapResize(newSize: Long) {
222+
if (raKeymapFile.length() != newSize) {
223+
raKeymapFile.setLength(newSize)
224+
}
225+
keymapRemap(newSize)
226+
}
227+
228+
private fun valuesRemap(size: Long, offset: Long = VALUES_HEADER_SIZE_BYTES) =
229+
fileRemap(raValuesFile, valIo, size, offset)
230+
231+
private fun keymapRemap(size: Long, offset: Long = KEYMAP_HEADER_SIZE_BYTES) =
232+
fileRemap(raKeymapFile, keymapIo, size, offset)
233+
234+
private fun fileRemap(file: RandomAccessFile, io: MappedRandomAccessIO,
235+
size: Long, offset: Long = 0L
236+
) {
237+
val buffer = file.channel.map(FileChannel.MapMode.READ_WRITE, offset, size)
238+
io.close()
239+
io.reset(buffer, 0L, size)
240+
}
241+
215242
private fun slotAddress(
216243
levelIdx: Int,
217244
bucketIdx: Int,
@@ -223,8 +250,9 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
223250
else -> throw IllegalArgumentException("Invalid level index: $levelIdx")
224251
}
225252

226-
return levelPos + (KEYMAP_ENTRY_SIZE_BYTES * metaIo.bucketSize * bucketIdx) +
227-
(KEYMAP_ENTRY_SIZE_BYTES * slotIdx)
253+
return levelPos + // start position of level
254+
(KEYMAP_ENTRY_SIZE_BYTES * metaIo.bucketSize * bucketIdx) + // bucket position
255+
(KEYMAP_ENTRY_SIZE_BYTES * slotIdx) // slot position in bucket
228256
}
229257

230258
/**
@@ -269,7 +297,9 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
269297
slotIdx: Int,
270298
): Boolean {
271299
// 0 == no value entry
272-
return valueAddressForPos(levelNum, bucketIdx, slotIdx) > 0L
300+
return valueAddressForPos(levelNum, bucketIdx, slotIdx).let { valAddr ->
301+
valAddr > 0L && valIo.tryPosition(valAddr - 1) && valIo.readInt() > 0
302+
}
273303
}
274304

275305
/**
@@ -286,7 +316,7 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
286316
return null
287317
}
288318

289-
if (valIo.readUnsignedShort() <= 0L) {
319+
if (valIo.readInt() <= 0L) {
290320
// entry size is 0, so the slot is empty
291321
return null
292322
}
@@ -361,14 +391,15 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
361391
return value
362392
}
363393

394+
395+
364396
fun writeEntry(
365397
levelNum: Int,
366398
bucketIdx: Int,
367399
slotIdx: Int,
368400
key: K?,
369401
value: V?,
370402
) {
371-
// TODO: expand values file in case of buffer overflow
372403
val slotAddr = slotAddress(levelNum, bucketIdx, slotIdx)
373404

374405
if (key == null) {
@@ -382,17 +413,23 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
382413
val (isUpdate, existingEntrySize) = run {
383414
if (existingValueAddr > 0L) {
384415
valIo.position(existingValueAddr - 1)
385-
true to valIo.readUnsignedShort().toLong()
416+
true to valIo.readInt().toLong()
386417
} else {
387418
false to 0L
388419
}
389420
}
390421

391-
val tokenAddr = metaIo.valuesNextEntry
392-
valIo.position(tokenAddr)
422+
val thisValueAddr = metaIo.valuesNextEntry
423+
if (thisValueAddr + 4 * KB_1 > metaIo.valuesFileSize) {
424+
valuesResize(thisValueAddr + VALUES_SEGMENT_SIZE_BYTES)
425+
}
393426

394-
// leave space for entry size
395-
valIo.seekShort()
427+
// valuesResize will reset the position to 0
428+
valIo.position(thisValueAddr)
429+
430+
if (valIo.readInt() > 0) {
431+
return
432+
}
396433

397434
val keyValPos = valIo.position()
398435

@@ -402,25 +439,25 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
402439

403440
val finalAddr = valIo.position()
404441
val entrySize = finalAddr - keyValPos
405-
check(entrySize <= Short.MAX_VALUE) {
442+
check(entrySize <= Int.MAX_VALUE) {
406443
"Entry size is too large: $entrySize"
407444
}
408445

409446
// then set the token
410-
valIo.position(tokenAddr)
411-
valIo.writeShort((entrySize and 0xFFFF).toInt())
447+
valIo.position(thisValueAddr)
448+
valIo.writeInt(entrySize.toInt())
412449

413450
// reset to the final position
414451
valIo.position(finalAddr)
415452

416453
// then update the address in the keymap
417454
keymapIo.position(slotAddr)
418-
keymapIo.writeLong(tokenAddr + 1)
455+
keymapIo.writeLong(thisValueAddr + 1)
419456

420457
metaIo.valuesNextEntry = finalAddr
421458

422459
if (isUpdate) {
423-
var size: Long = SIZE_SHORT
460+
var size = SIZE_INT
424461
if (existingEntrySize > 0L) {
425462
size += existingEntrySize
426463
}
@@ -437,18 +474,18 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
437474
keymapIo.position(slotAddr)
438475
val valueAddr = keymapIo.readLong()
439476

440-
if (valueAddr == 0L) {
441-
return null
442-
}
443-
444477
// reading this region again will return 0, which is considered
445478
// a null pointer
446479
keymapDeallocate(slotAddr, SIZE_LONG)
447480

481+
if (valueAddr == 0L) {
482+
return null
483+
}
484+
448485
valIo.position(valueAddr - 1)
449-
valIo.seekShort() // seek over entrySize
486+
valIo.seekInt() // seek over entrySize
450487

451-
var entrySize = SIZE_SHORT
488+
var entrySize = SIZE_INT
452489

453490
val keySize = valIo.readInt()
454491
entrySize += SIZE_INT
@@ -514,7 +551,41 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
514551
}
515552

516553
override fun close() {
517-
closeAndLogErrs(metaIo, keymapIo, valIo, raKeymapFile, raIndexFile)
554+
closeAndLogErrs(metaIo, keymapIo, valIo, raKeymapFile, raValuesFile)
555+
}
556+
557+
internal fun dmpSlotAddrs(
558+
levelNum: Int,
559+
bucketIdx: Int,
560+
slotIdx: Int,
561+
) {
562+
val sb = StringBuilder()
563+
sb.append("level: ")
564+
sb.appendLine(levelNum)
565+
sb.append("bucket: ")
566+
sb.appendLine(bucketIdx)
567+
sb.append("slot: ")
568+
sb.appendLine(slotIdx)
569+
570+
val slotAddr = slotAddress(levelNum, bucketIdx, slotIdx)
571+
sb.append("keymapAddr: ")
572+
appendAddress(sb, slotAddr)
573+
574+
keymapIo.position(slotAddr)
575+
576+
val valueAddr = keymapIo.readLong()
577+
sb.append("valueAddr: ")
578+
appendAddress(sb, valueAddr)
579+
580+
println(sb.toString())
581+
}
582+
583+
@OptIn(ExperimentalStdlibApi::class)
584+
private fun appendAddress(sb: StringBuilder, value: Long) {
585+
sb.append(value)
586+
sb.append(" [")
587+
sb.append(value.toHexString(HexFormat.UpperCase))
588+
sb.appendLine("]")
518589
}
519590

520591
companion object {
@@ -531,6 +602,11 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
531602

532603
private val log = LoggerFactory.getLogger(PersistentLevelHashIO::class.java)
533604

605+
/**
606+
* 1 Kilobyte.
607+
*/
608+
internal const val KB_1 = 1024
609+
534610
/**
535611
* The number of bytes it takes to store the magic number of the keymap/values
536612
* file.
@@ -548,10 +624,9 @@ internal class PersistentLevelHashIO<K : Any, V : Any?>(
548624
internal const val VALUES_HEADER_SIZE_BYTES: Long = MAGIC_NUMBER_SIZE_BYTES
549625

550626
/**
551-
* The maximum size of the region of the values file that can be mapped into
552-
* memory at a given time.
627+
* The size of one segment region in the values file.
553628
*/
554-
internal const val VALUES_INITIAL_SIZE_BYTES = 512L * 1024L
629+
internal const val VALUES_SEGMENT_SIZE_BYTES = 512L * 1024L
555630

556631
/**
557632
* The number of bytes used to store the header of the keymap file.

utilities/level-hash/src/main/java/com/itsaky/androidide/levelhash/internal/PersistentMetaIO.kt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@
1818
package com.itsaky.androidide.levelhash.internal
1919

2020
import androidx.collection.MutableLongLongMap
21-
import com.itsaky.androidide.levelhash.LevelHash
2221
import com.itsaky.androidide.levelhash.internal.PersistentLevelHashIO.Companion.KEYMAP_ENTRY_SIZE_BYTES
2322
import com.itsaky.androidide.levelhash.internal.PersistentLevelHashIO.Companion.LEVEL_KEYMAP_VERSION
2423
import com.itsaky.androidide.levelhash.internal.PersistentLevelHashIO.Companion.LEVEL_VALUES_VERSION
25-
import com.itsaky.androidide.levelhash.internal.PersistentLevelHashIO.Companion.VALUES_HEADER_SIZE_BYTES
26-
import com.itsaky.androidide.levelhash.internal.PersistentLevelHashIO.Companion.VALUES_INITIAL_SIZE_BYTES
24+
import com.itsaky.androidide.levelhash.internal.PersistentLevelHashIO.Companion.VALUES_SEGMENT_SIZE_BYTES
2725
import com.itsaky.androidide.levelhash.util.DataExternalizers.SIZE_INT
2826
import com.itsaky.androidide.levelhash.util.DataExternalizers.SIZE_LONG
2927
import org.slf4j.LoggerFactory
@@ -105,7 +103,7 @@ internal class PersistentMetaIO(private val metaFile: File,
105103
keymapVersion = LEVEL_KEYMAP_VERSION
106104
}
107105
if (valuesFileSize == 0L) {
108-
valuesFileSize = VALUES_INITIAL_SIZE_BYTES
106+
valuesFileSize = VALUES_SEGMENT_SIZE_BYTES
109107
}
110108
if (valuesFirstEntry == 0L) {
111109
valuesFirstEntry = 0

0 commit comments

Comments
 (0)