Fix project isolation: Make loadChatHistory respect active project sessions
- Modified loadChatHistory() to check for active project before fetching all sessions - When active project exists, use project.sessions instead of fetching from API - Added detailed console logging to debug session filtering - This prevents ALL sessions from appearing in every project's sidebar Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,571 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2022 Max Bachmann
|
||||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz._common_py import common_affix, conv_sequences
|
||||
from rapidfuzz._utils import is_none, setupPandas
|
||||
from rapidfuzz.distance import Indel_py as Indel
|
||||
from rapidfuzz.distance._initialize_py import Editop, Editops
|
||||
|
||||
|
||||
def _levenshtein_maximum(s1, s2, weights):
|
||||
len1 = len(s1)
|
||||
len2 = len(s2)
|
||||
insert, delete, replace = weights
|
||||
|
||||
max_dist = len1 * delete + len2 * insert
|
||||
|
||||
if len1 >= len2:
|
||||
max_dist = min(max_dist, len2 * replace + (len1 - len2) * delete)
|
||||
else:
|
||||
max_dist = min(max_dist, len1 * replace + (len2 - len1) * insert)
|
||||
|
||||
return max_dist
|
||||
|
||||
|
||||
def _uniform_generic(s1, s2, weights):
|
||||
len1 = len(s1)
|
||||
insert, delete, replace = weights
|
||||
cache = list(range(0, (len1 + 1) * delete, delete))
|
||||
|
||||
for ch2 in s2:
|
||||
temp = cache[0]
|
||||
cache[0] += insert
|
||||
for i in range(len1):
|
||||
x = temp
|
||||
if s1[i] != ch2:
|
||||
x = min(cache[i] + delete, cache[i + 1] + insert, temp + replace)
|
||||
temp = cache[i + 1]
|
||||
cache[i + 1] = x
|
||||
|
||||
return cache[-1]
|
||||
|
||||
|
||||
def _uniform_distance(s1, s2):
|
||||
if not s1:
|
||||
return len(s2)
|
||||
|
||||
VP = (1 << len(s1)) - 1
|
||||
VN = 0
|
||||
currDist = len(s1)
|
||||
mask = 1 << (len(s1) - 1)
|
||||
|
||||
block = {}
|
||||
block_get = block.get
|
||||
x = 1
|
||||
for ch1 in s1:
|
||||
block[ch1] = block_get(ch1, 0) | x
|
||||
x <<= 1
|
||||
|
||||
for ch2 in s2:
|
||||
# Step 1: Computing D0
|
||||
PM_j = block_get(ch2, 0)
|
||||
X = PM_j
|
||||
D0 = (((X & VP) + VP) ^ VP) | X | VN
|
||||
# Step 2: Computing HP and HN
|
||||
HP = VN | ~(D0 | VP)
|
||||
HN = D0 & VP
|
||||
# Step 3: Computing the value D[m,j]
|
||||
currDist += (HP & mask) != 0
|
||||
currDist -= (HN & mask) != 0
|
||||
# Step 4: Computing Vp and VN
|
||||
HP = (HP << 1) | 1
|
||||
HN = HN << 1
|
||||
VP = HN | ~(D0 | HP)
|
||||
VN = HP & D0
|
||||
|
||||
return currDist
|
||||
|
||||
|
||||
def distance(
|
||||
s1,
|
||||
s2,
|
||||
*,
|
||||
weights=(1, 1, 1),
|
||||
processor=None,
|
||||
score_cutoff=None,
|
||||
score_hint=None,
|
||||
):
|
||||
"""
|
||||
Calculates the minimum number of insertions, deletions, and substitutions
|
||||
required to change one sequence into the other according to Levenshtein with custom
|
||||
costs for insertion, deletion and substitution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
weights : tuple[int, int, int] or None, optional
|
||||
The weights for the three operations in the form
|
||||
(insertion, deletion, substitution). Default is (1, 1, 1),
|
||||
which gives all three operations a weight of 1.
|
||||
processor : callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
score_cutoff : int, optional
|
||||
Maximum distance between s1 and s2, that is
|
||||
considered as a result. If the distance is bigger than score_cutoff,
|
||||
score_cutoff + 1 is returned instead. Default is None, which deactivates
|
||||
this behaviour.
|
||||
score_hint : int, optional
|
||||
Expected distance between s1 and s2. This is used to select a
|
||||
faster implementation. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
distance : int
|
||||
distance between s1 and s2
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If unsupported weights are provided a ValueError is thrown
|
||||
|
||||
Examples
|
||||
--------
|
||||
Find the Levenshtein distance between two strings:
|
||||
|
||||
>>> from rapidfuzz.distance import Levenshtein
|
||||
>>> Levenshtein.distance("lewenstein", "levenshtein")
|
||||
2
|
||||
|
||||
Setting a maximum distance allows the implementation to select
|
||||
a more efficient implementation:
|
||||
|
||||
>>> Levenshtein.distance("lewenstein", "levenshtein", score_cutoff=1)
|
||||
2
|
||||
|
||||
It is possible to select different weights by passing a `weight`
|
||||
tuple.
|
||||
|
||||
>>> Levenshtein.distance("lewenstein", "levenshtein", weights=(1,1,2))
|
||||
3
|
||||
"""
|
||||
_ = score_hint
|
||||
if processor is not None:
|
||||
s1 = processor(s1)
|
||||
s2 = processor(s2)
|
||||
|
||||
s1, s2 = conv_sequences(s1, s2)
|
||||
if weights is None or weights == (1, 1, 1):
|
||||
dist = _uniform_distance(s1, s2)
|
||||
elif weights == (1, 1, 2):
|
||||
dist = Indel.distance(s1, s2)
|
||||
else:
|
||||
dist = _uniform_generic(s1, s2, weights)
|
||||
|
||||
return dist if (score_cutoff is None or dist <= score_cutoff) else score_cutoff + 1
|
||||
|
||||
|
||||
def similarity(
|
||||
s1,
|
||||
s2,
|
||||
*,
|
||||
weights=(1, 1, 1),
|
||||
processor=None,
|
||||
score_cutoff=None,
|
||||
score_hint=None,
|
||||
):
|
||||
"""
|
||||
Calculates the levenshtein similarity in the range [max, 0] using custom
|
||||
costs for insertion, deletion and substitution.
|
||||
|
||||
This is calculated as ``max - distance``, where max is the maximal possible
|
||||
Levenshtein distance given the lengths of the sequences s1/s2 and the weights.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
weights : tuple[int, int, int] or None, optional
|
||||
The weights for the three operations in the form
|
||||
(insertion, deletion, substitution). Default is (1, 1, 1),
|
||||
which gives all three operations a weight of 1.
|
||||
processor : callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
score_cutoff : int, optional
|
||||
Maximum distance between s1 and s2, that is
|
||||
considered as a result. If the similarity is smaller than score_cutoff,
|
||||
0 is returned instead. Default is None, which deactivates
|
||||
this behaviour.
|
||||
score_hint : int, optional
|
||||
Expected similarity between s1 and s2. This is used to select a
|
||||
faster implementation. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
similarity : int
|
||||
similarity between s1 and s2
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If unsupported weights are provided a ValueError is thrown
|
||||
"""
|
||||
_ = score_hint
|
||||
if processor is not None:
|
||||
s1 = processor(s1)
|
||||
s2 = processor(s2)
|
||||
|
||||
s1, s2 = conv_sequences(s1, s2)
|
||||
weights = weights or (1, 1, 1)
|
||||
maximum = _levenshtein_maximum(s1, s2, weights)
|
||||
dist = distance(s1, s2, weights=weights)
|
||||
sim = maximum - dist
|
||||
return sim if (score_cutoff is None or sim >= score_cutoff) else 0
|
||||
|
||||
|
||||
def normalized_distance(
|
||||
s1,
|
||||
s2,
|
||||
*,
|
||||
weights=(1, 1, 1),
|
||||
processor=None,
|
||||
score_cutoff=None,
|
||||
score_hint=None,
|
||||
):
|
||||
"""
|
||||
Calculates a normalized levenshtein distance in the range [1, 0] using custom
|
||||
costs for insertion, deletion and substitution.
|
||||
|
||||
This is calculated as ``distance / max``, where max is the maximal possible
|
||||
Levenshtein distance given the lengths of the sequences s1/s2 and the weights.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
weights : tuple[int, int, int] or None, optional
|
||||
The weights for the three operations in the form
|
||||
(insertion, deletion, substitution). Default is (1, 1, 1),
|
||||
which gives all three operations a weight of 1.
|
||||
processor : callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
score_cutoff : float, optional
|
||||
Optional argument for a score threshold as a float between 0 and 1.0.
|
||||
For norm_dist > score_cutoff 1.0 is returned instead. Default is None,
|
||||
which deactivates this behaviour.
|
||||
score_hint : float, optional
|
||||
Expected normalized distance between s1 and s2. This is used to select a
|
||||
faster implementation. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
norm_dist : float
|
||||
normalized distance between s1 and s2 as a float between 1.0 and 0.0
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If unsupported weights are provided a ValueError is thrown
|
||||
"""
|
||||
_ = score_hint
|
||||
setupPandas()
|
||||
if is_none(s1) or is_none(s2):
|
||||
return 1.0
|
||||
|
||||
if processor is not None:
|
||||
s1 = processor(s1)
|
||||
s2 = processor(s2)
|
||||
|
||||
s1, s2 = conv_sequences(s1, s2)
|
||||
weights = weights or (1, 1, 1)
|
||||
maximum = _levenshtein_maximum(s1, s2, weights)
|
||||
dist = distance(s1, s2, weights=weights)
|
||||
norm_dist = dist / maximum if maximum else 0
|
||||
return norm_dist if (score_cutoff is None or norm_dist <= score_cutoff) else 1
|
||||
|
||||
|
||||
def normalized_similarity(
|
||||
s1,
|
||||
s2,
|
||||
*,
|
||||
weights=(1, 1, 1),
|
||||
processor=None,
|
||||
score_cutoff=None,
|
||||
score_hint=None,
|
||||
):
|
||||
"""
|
||||
Calculates a normalized levenshtein similarity in the range [0, 1] using custom
|
||||
costs for insertion, deletion and substitution.
|
||||
|
||||
This is calculated as ``1 - normalized_distance``
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
weights : tuple[int, int, int] or None, optional
|
||||
The weights for the three operations in the form
|
||||
(insertion, deletion, substitution). Default is (1, 1, 1),
|
||||
which gives all three operations a weight of 1.
|
||||
processor : callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
score_cutoff : float, optional
|
||||
Optional argument for a score threshold as a float between 0 and 1.0.
|
||||
For norm_sim < score_cutoff 0 is returned instead. Default is None,
|
||||
which deactivates this behaviour.
|
||||
score_hint : int, optional
|
||||
Expected normalized similarity between s1 and s2. This is used to select a
|
||||
faster implementation. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
norm_sim : float
|
||||
normalized similarity between s1 and s2 as a float between 0 and 1.0
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If unsupported weights are provided a ValueError is thrown
|
||||
|
||||
Examples
|
||||
--------
|
||||
Find the normalized Levenshtein similarity between two strings:
|
||||
|
||||
>>> from rapidfuzz.distance import Levenshtein
|
||||
>>> Levenshtein.normalized_similarity("lewenstein", "levenshtein")
|
||||
0.81818181818181
|
||||
|
||||
Setting a score_cutoff allows the implementation to select
|
||||
a more efficient implementation:
|
||||
|
||||
>>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.85)
|
||||
0.0
|
||||
|
||||
It is possible to select different weights by passing a `weight`
|
||||
tuple.
|
||||
|
||||
>>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", weights=(1,1,2))
|
||||
0.85714285714285
|
||||
|
||||
When a different processor is used s1 and s2 do not have to be strings
|
||||
|
||||
>>> Levenshtein.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
|
||||
0.81818181818181
|
||||
"""
|
||||
_ = score_hint
|
||||
setupPandas()
|
||||
if is_none(s1) or is_none(s2):
|
||||
return 0.0
|
||||
|
||||
if processor is not None:
|
||||
s1 = processor(s1)
|
||||
s2 = processor(s2)
|
||||
|
||||
s1, s2 = conv_sequences(s1, s2)
|
||||
weights = weights or (1, 1, 1)
|
||||
norm_dist = normalized_distance(s1, s2, weights=weights)
|
||||
norm_sim = 1.0 - norm_dist
|
||||
return norm_sim if (score_cutoff is None or norm_sim >= score_cutoff) else 0
|
||||
|
||||
|
||||
def _matrix(s1, s2):
|
||||
if not s1:
|
||||
return (len(s2), [], [])
|
||||
|
||||
VP = (1 << len(s1)) - 1
|
||||
VN = 0
|
||||
currDist = len(s1)
|
||||
mask = 1 << (len(s1) - 1)
|
||||
|
||||
block = {}
|
||||
block_get = block.get
|
||||
x = 1
|
||||
for ch1 in s1:
|
||||
block[ch1] = block_get(ch1, 0) | x
|
||||
x <<= 1
|
||||
|
||||
matrix_VP = []
|
||||
matrix_VN = []
|
||||
for ch2 in s2:
|
||||
# Step 1: Computing D0
|
||||
PM_j = block_get(ch2, 0)
|
||||
X = PM_j
|
||||
D0 = (((X & VP) + VP) ^ VP) | X | VN
|
||||
# Step 2: Computing HP and HN
|
||||
HP = VN | ~(D0 | VP)
|
||||
HN = D0 & VP
|
||||
# Step 3: Computing the value D[m,j]
|
||||
currDist += (HP & mask) != 0
|
||||
currDist -= (HN & mask) != 0
|
||||
# Step 4: Computing Vp and VN
|
||||
HP = (HP << 1) | 1
|
||||
HN = HN << 1
|
||||
VP = HN | ~(D0 | HP)
|
||||
VN = HP & D0
|
||||
|
||||
matrix_VP.append(VP)
|
||||
matrix_VN.append(VN)
|
||||
|
||||
return (currDist, matrix_VP, matrix_VN)
|
||||
|
||||
|
||||
def editops(
|
||||
s1,
|
||||
s2,
|
||||
*,
|
||||
processor=None,
|
||||
score_hint=None,
|
||||
):
|
||||
"""
|
||||
Return Editops describing how to turn s1 into s2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
processor : callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
score_hint : int, optional
|
||||
Expected distance between s1 and s2. This is used to select a
|
||||
faster implementation. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
editops : Editops
|
||||
edit operations required to turn s1 into s2
|
||||
|
||||
Notes
|
||||
-----
|
||||
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
|
||||
described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [8] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
Stringology (2004).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from rapidfuzz.distance import Levenshtein
|
||||
>>> for tag, src_pos, dest_pos in Levenshtein.editops("qabxcd", "abycdf"):
|
||||
... print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
|
||||
delete s1[1] s2[0]
|
||||
replace s1[3] s2[2]
|
||||
insert s1[6] s2[5]
|
||||
"""
|
||||
_ = score_hint
|
||||
if processor is not None:
|
||||
s1 = processor(s1)
|
||||
s2 = processor(s2)
|
||||
|
||||
s1, s2 = conv_sequences(s1, s2)
|
||||
prefix_len, suffix_len = common_affix(s1, s2)
|
||||
s1 = s1[prefix_len : len(s1) - suffix_len]
|
||||
s2 = s2[prefix_len : len(s2) - suffix_len]
|
||||
dist, VP, VN = _matrix(s1, s2)
|
||||
|
||||
editops = Editops([], 0, 0)
|
||||
editops._src_len = len(s1) + prefix_len + suffix_len
|
||||
editops._dest_len = len(s2) + prefix_len + suffix_len
|
||||
|
||||
if dist == 0:
|
||||
return editops
|
||||
|
||||
editop_list = [None] * dist
|
||||
col = len(s1)
|
||||
row = len(s2)
|
||||
while row != 0 and col != 0:
|
||||
# deletion
|
||||
if VP[row - 1] & (1 << (col - 1)):
|
||||
dist -= 1
|
||||
col -= 1
|
||||
editop_list[dist] = Editop("delete", col + prefix_len, row + prefix_len)
|
||||
else:
|
||||
row -= 1
|
||||
|
||||
# insertion
|
||||
if row and (VN[row - 1] & (1 << (col - 1))):
|
||||
dist -= 1
|
||||
editop_list[dist] = Editop("insert", col + prefix_len, row + prefix_len)
|
||||
else:
|
||||
col -= 1
|
||||
|
||||
# replace (Matches are not recorded)
|
||||
if s1[col] != s2[row]:
|
||||
dist -= 1
|
||||
editop_list[dist] = Editop("replace", col + prefix_len, row + prefix_len)
|
||||
|
||||
while col != 0:
|
||||
dist -= 1
|
||||
col -= 1
|
||||
editop_list[dist] = Editop("delete", col + prefix_len, row + prefix_len)
|
||||
|
||||
while row != 0:
|
||||
dist -= 1
|
||||
row -= 1
|
||||
editop_list[dist] = Editop("insert", col + prefix_len, row + prefix_len)
|
||||
|
||||
editops._editops = editop_list
|
||||
return editops
|
||||
|
||||
|
||||
def opcodes(
|
||||
s1,
|
||||
s2,
|
||||
*,
|
||||
processor=None,
|
||||
score_hint=None,
|
||||
):
|
||||
"""
|
||||
Return Opcodes describing how to turn s1 into s2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
processor : callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
score_hint : int, optional
|
||||
Expected distance between s1 and s2. This is used to select a
|
||||
faster implementation. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
opcodes : Opcodes
|
||||
edit operations required to turn s1 into s2
|
||||
|
||||
Notes
|
||||
-----
|
||||
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
|
||||
described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [9] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
Stringology (2004).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from rapidfuzz.distance import Levenshtein
|
||||
|
||||
>>> a = "qabxcd"
|
||||
>>> b = "abycdf"
|
||||
>>> for tag, i1, i2, j1, j2 in Levenshtein.opcodes("qabxcd", "abycdf"):
|
||||
... print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
|
||||
... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
|
||||
delete a[0:1] (q) b[0:0] ()
|
||||
equal a[1:3] (ab) b[0:2] (ab)
|
||||
replace a[3:4] (x) b[2:3] (y)
|
||||
equal a[4:6] (cd) b[3:5] (cd)
|
||||
insert a[6:6] () b[5:6] (f)
|
||||
"""
|
||||
return editops(s1, s2, processor=processor, score_hint=score_hint).as_opcodes()
|
||||
Reference in New Issue
Block a user