-
Notifications
You must be signed in to change notification settings - Fork 112
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Store compressed vectors in dense ByteSequence for PQVectors
- Loading branch information
1 parent
da08d40
commit 650b4d2
Showing
11 changed files
with
274 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
148 changes: 148 additions & 0 deletions
148
jvector-base/src/main/java/io/github/jbellis/jvector/vector/ArraySliceByteSequence.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
/* | ||
* Copyright DataStax, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.github.jbellis.jvector.vector; | ||
|
||
import io.github.jbellis.jvector.util.RamUsageEstimator; | ||
import io.github.jbellis.jvector.vector.types.ByteSequence; | ||
import java.util.Arrays; | ||
|
||
/** | ||
* A read only {@link ByteSequence} implementation that wraps an array and provides a view into a slice of it. | ||
*/ | ||
public class ArraySliceByteSequence implements ByteSequence<byte[]> { | ||
private final byte[] data; | ||
private final int offset; | ||
private final int length; | ||
|
||
public ArraySliceByteSequence(byte[] data, int offset, int length) { | ||
if (offset < 0 || length < 0 || offset + length > data.length) { | ||
throw new IllegalArgumentException("Invalid offset or length"); | ||
} | ||
this.data = data; | ||
this.offset = offset; | ||
this.length = length; | ||
} | ||
|
||
@Override | ||
public byte[] get() { | ||
return data; | ||
} | ||
|
||
@Override | ||
public int offset() { | ||
return offset; | ||
} | ||
|
||
@Override | ||
public byte get(int n) { | ||
if (n < 0 || n >= length) { | ||
throw new IndexOutOfBoundsException("Index: " + n + ", Length: " + length); | ||
} | ||
return data[offset + n]; | ||
} | ||
|
||
@Override | ||
public void set(int n, byte value) { | ||
if (n < 0 || n >= length) { | ||
throw new IndexOutOfBoundsException("Index: " + n + ", Length: " + length); | ||
} | ||
data[offset + n] = value; | ||
} | ||
|
||
@Override | ||
public void setLittleEndianShort(int shortIndex, short value) { | ||
throw new UnsupportedOperationException("Not supported on slices"); | ||
} | ||
|
||
@Override | ||
public void zero() { | ||
throw new UnsupportedOperationException("Not supported on slices"); | ||
} | ||
|
||
@Override | ||
public int length() { | ||
return length; | ||
} | ||
|
||
@Override | ||
public ByteSequence<byte[]> copy() { | ||
byte[] newData = Arrays.copyOfRange(data, offset, offset + length); | ||
return new ArrayByteSequence(newData); | ||
} | ||
|
||
@Override | ||
public ByteSequence<byte[]> slice(int sliceOffset, int sliceLength) { | ||
if (sliceOffset < 0 || sliceLength < 0 || sliceOffset + sliceLength > length) { | ||
throw new IllegalArgumentException("Invalid slice parameters"); | ||
} | ||
if (sliceOffset == 0 && sliceLength == length) { | ||
return this; | ||
} | ||
return new ArraySliceByteSequence(data, offset + sliceOffset, sliceLength); | ||
} | ||
|
||
@Override | ||
public long ramBytesUsed() { | ||
// Only count the overhead of this slice object, not the underlying array | ||
// since that's shared and counted elsewhere | ||
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + | ||
(3 * Integer.BYTES); // offset, length, and reference to data | ||
} | ||
|
||
@Override | ||
public void copyFrom(ByteSequence<?> src, int srcOffset, int destOffset, int copyLength) { | ||
throw new UnsupportedOperationException("Not supported on slices"); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
StringBuilder sb = new StringBuilder(); | ||
sb.append("["); | ||
for (int i = 0; i < Math.min(length, 25); i++) { | ||
sb.append(get(i)); | ||
if (i < length - 1) { | ||
sb.append(", "); | ||
} | ||
} | ||
if (length > 25) { | ||
sb.append("..."); | ||
} | ||
sb.append("]"); | ||
return sb.toString(); | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (o == null || getClass() != o.getClass()) return false; | ||
ArraySliceByteSequence that = (ArraySliceByteSequence) o; | ||
if (this.length != that.length) return false; | ||
for (int i = 0; i < length; i++) { | ||
if (this.get(i) != that.get(i)) return false; | ||
} | ||
return true; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
int result = 1; | ||
for (int i = 0; i < length; i++) { | ||
result = 31 * result + get(i); | ||
} | ||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.