musicdiff.comparison
1# ------------------------------------------------------------------------------ 2# Purpose: comparison is a music comparison package for use by musicdiff. 3# musicdiff is a package for comparing music scores using music21. 4# 5# Authors: Greg Chapman <gregc@mac.com> 6# musicdiff is derived from: 7# https://github.com/fosfrancesco/music-score-diff.git 8# by Francesco Foscarin <foscarin.francesco@gmail.com> 9# 10# Copyright: (c) 2022-2025 Francesco Foscarin, Greg Chapman 11# License: MIT, see LICENSE 12# ------------------------------------------------------------------------------ 13 14__docformat__ = "google" 15 16import copy 17from collections import namedtuple 18from difflib import ndiff 19from pathlib import Path 20 21# import typing as t 22import numpy as np 23 24from music21.common import OffsetQL 25from musicdiff.annotation import AnnScore, AnnNote, AnnVoice, AnnExtra, AnnLyric 26from musicdiff.annotation import AnnStaffGroup, AnnMetadataItem 27from musicdiff import M21Utils 28 29class EvaluationMetrics: 30 def __init__( 31 self, 32 gt_path: Path, 33 pred_path: Path, 34 gt_numsyms: int, 35 pred_numsyms: int, 36 omr_edit_distance: int, 37 edit_distances_dict: dict[str, int], 38 omr_ned: float 39 ): 40 self.gt_path: Path = gt_path 41 self.pred_path: Path = pred_path 42 self.gt_numsyms: int = gt_numsyms 43 self.pred_numsyms: int = pred_numsyms 44 self.omr_edit_distance: int = omr_edit_distance 45 self.edit_distances_dict: dict[str, int] = edit_distances_dict 46 self.omr_ned: float = omr_ned 47 48# memoizers to speed up the recursive computation 49def _memoize_notes_set_distance(func): 50 def memoizer(original, compare_to): 51 key = repr(original) + repr(compare_to) 52 if key not in Comparison._memoizer_mem: 53 Comparison._memoizer_mem[key] = func(original, compare_to) 54 return copy.deepcopy(Comparison._memoizer_mem[key]) 55 56 return memoizer 57 58def _memoize_extras_set_distance(func): 59 def memoizer(original, compare_to): 60 key = repr(original) + repr(compare_to) 61 if key not in Comparison._memoizer_mem: 62 Comparison._memoizer_mem[key] = func(original, compare_to) 63 return copy.deepcopy(Comparison._memoizer_mem[key]) 64 65 return memoizer 66 67def _memoize_staff_groups_set_distance(func): 68 def memoizer(original, compare_to): 69 key = repr(original) + repr(compare_to) 70 if key not in Comparison._memoizer_mem: 71 Comparison._memoizer_mem[key] = func(original, compare_to) 72 return copy.deepcopy(Comparison._memoizer_mem[key]) 73 74 return memoizer 75 76def _memoize_metadata_items_set_distance(func): 77 def memoizer(original, compare_to): 78 key = repr(original) + repr(compare_to) 79 if key not in Comparison._memoizer_mem: 80 Comparison._memoizer_mem[key] = func(original, compare_to) 81 return copy.deepcopy(Comparison._memoizer_mem[key]) 82 83 return memoizer 84 85def _memoize_inside_bars_diff_lin(func): 86 def memoizer(original, compare_to): 87 key = repr(original) + repr(compare_to) 88 if key not in Comparison._memoizer_mem: 89 Comparison._memoizer_mem[key] = func(original, compare_to) 90 return copy.deepcopy(Comparison._memoizer_mem[key]) 91 92 return memoizer 93 94def _memoize_lyrics_diff_lin(func): 95 def memoizer(original, compare_to): 96 key = repr(original) + repr(compare_to) 97 if key not in Comparison._memoizer_mem: 98 Comparison._memoizer_mem[key] = func(original, compare_to) 99 return copy.deepcopy(Comparison._memoizer_mem[key]) 100 101 return memoizer 102 103def _memoize_block_diff_lin(func): 104 def memoizer(original, compare_to): 105 key = repr(original) + repr(compare_to) 106 if key not in Comparison._memoizer_mem: 107 Comparison._memoizer_mem[key] = func(original, compare_to) 108 return copy.deepcopy(Comparison._memoizer_mem[key]) 109 110 return memoizer 111 112def _memoize_pitches_lev_diff(func): 113 def memoizer(original, compare_to, noteNode1, noteNode2, ids): 114 key = ( 115 repr(original) 116 + repr(compare_to) 117 + repr(noteNode1) 118 + repr(noteNode2) 119 + repr(ids) 120 ) 121 if key not in Comparison._memoizer_mem: 122 Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, ids) 123 return copy.deepcopy(Comparison._memoizer_mem[key]) 124 125 return memoizer 126 127def _memoize_beamtuplet_lev_diff(func): 128 def memoizer(original, compare_to, noteNode1, noteNode2, which): 129 key = ( 130 repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which 131 ) 132 if key not in Comparison._memoizer_mem: 133 Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which) 134 return copy.deepcopy(Comparison._memoizer_mem[key]) 135 136 return memoizer 137 138def _memoize_generic_lev_diff(func): 139 def memoizer(original, compare_to, noteNode1, noteNode2, which): 140 key = ( 141 repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which 142 ) 143 if key not in Comparison._memoizer_mem: 144 Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which) 145 return copy.deepcopy(Comparison._memoizer_mem[key]) 146 147 return memoizer 148 149class Comparison: 150 _memoizer_mem: dict = {} 151 152 @staticmethod 153 def _clear_memoizer_caches(): 154 Comparison._memoizer_mem = {} 155 156 @staticmethod 157 def _myers_diff(a_lines, b_lines): 158 # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2) 159 # This marks the farthest-right point along each diagonal in the edit 160 # graph, along with the history that got it there 161 Frontier = namedtuple("Frontier", ["x", "history"]) 162 frontier = {1: Frontier(0, [])} 163 164 a_max = len(a_lines) 165 b_max = len(b_lines) 166 for d in range(0, a_max + b_max + 1): 167 for k in range(-d, d + 1, 2): 168 # This determines whether our next search point will be going down 169 # in the edit graph, or to the right. 170 # 171 # The intuition for this is that we should go down if we're on the 172 # left edge (k == -d) to make sure that the left edge is fully 173 # explored. 174 # 175 # If we aren't on the top (k != d), then only go down if going down 176 # would take us to territory that hasn't sufficiently been explored 177 # yet. 178 go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x) 179 180 # Figure out the starting point of this iteration. The diagonal 181 # offsets come from the geometry of the edit grid - if you're going 182 # down, your diagonal is lower, and if you're going right, your 183 # diagonal is higher. 184 if go_down: 185 old_x, history = frontier[k + 1] 186 x = old_x 187 else: 188 old_x, history = frontier[k - 1] 189 x = old_x + 1 190 191 # We want to avoid modifying the old history, since some other step 192 # may decide to use it. 193 history = history[:] 194 y = x - k 195 196 # We start at the invalid point (0, 0) - we should only start building 197 # up history when we move off of it. 198 if 1 <= y <= b_max and go_down: 199 history.append((1, b_lines[y - 1][1])) # add comparetostep 200 elif 1 <= x <= a_max: 201 history.append((0, a_lines[x - 1][1])) # add originalstep 202 203 # Chew up as many diagonal moves as we can - these correspond to common lines, 204 # and they're considered "free" by the algorithm because we want to maximize 205 # the number of these in the output. 206 while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]: 207 x += 1 208 y += 1 209 history.append((2, a_lines[x - 1][1])) # add equal step 210 211 if x >= a_max and y >= b_max: 212 # If we're here, then we've traversed through the bottom-left corner, 213 # and are done. 214 return np.array(history) 215 216 frontier[k] = Frontier(x, history) 217 218 assert False, "Could not find edit script" 219 220 @staticmethod 221 def _non_common_subsequences_myers(original, compare_to): 222 # Both original and compare_to are list of lists, or numpy arrays with 2 columns. 223 # This is necessary because bars need two representation at the same time. 224 # One without the id (for comparison), and one with the id (to retrieve the bar 225 # at the end). 226 227 # get the list of operations 228 op_list = Comparison._myers_diff( 229 np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64) 230 )[::-1] 231 # retrieve the non common subsequences 232 non_common_subsequences = [] 233 non_common_subsequences.append({"original": [], "compare_to": []}) 234 ind = 0 235 for op in op_list[::-1]: 236 if op[0] == 2: # equal 237 non_common_subsequences.append({"original": [], "compare_to": []}) 238 ind += 1 239 elif op[0] == 0: # original step 240 non_common_subsequences[ind]["original"].append(op[1]) 241 elif op[0] == 1: # compare to step 242 non_common_subsequences[ind]["compare_to"].append(op[1]) 243 # remove the empty dict from the list 244 non_common_subsequences = [ 245 s for s in non_common_subsequences if s != {"original": [], "compare_to": []} 246 ] 247 return non_common_subsequences 248 249 @staticmethod 250 def _non_common_subsequences_of_measures(original_m, compare_to_m): 251 # Take the hash for each measure to run faster comparison 252 # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison), 253 # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after 254 # computation) 255 original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m] 256 compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m] 257 ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int) 258 # retrieve the original pointers to measures 259 new_out = [] 260 for e in ncs: 261 new_out.append({}) 262 for k in e.keys(): 263 new_out[-1][k] = [] 264 for repr_hash in e[k]: 265 if k == "original": 266 new_out[-1][k].append( 267 next(m for m in original_m if m.precomputed_repr == repr_hash) 268 ) 269 else: 270 new_out[-1][k].append( 271 next(m for m in compare_to_m if m.precomputed_repr == repr_hash) 272 ) 273 274 return new_out 275 276 @staticmethod 277 @_memoize_pitches_lev_diff 278 def _pitches_levenshtein_diff( 279 original: list[tuple[str, str, bool]], 280 compare_to: list[tuple[str, str, bool]], 281 noteNode1: AnnNote, 282 noteNode2: AnnNote, 283 ids: tuple[int, int] 284 ): 285 """ 286 Compute the levenshtein distance between two sequences of pitches. 287 Arguments: 288 original {list} -- list of pitches 289 compare_to {list} -- list of pitches 290 noteNode1 {annotatedNote} --for referencing 291 noteNode2 {annotatedNote} --for referencing 292 ids {tuple} -- a tuple of 2 elements with the indices of the notes considered 293 """ 294 if len(original) == 0 and len(compare_to) == 0: 295 return [], 0 296 297 if len(original) == 0: 298 op_list, cost = Comparison._pitches_levenshtein_diff( 299 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 300 ) 301 op_list.append( 302 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 303 ) 304 cost += M21Utils.pitch_size(compare_to[0]) 305 return op_list, cost 306 307 if len(compare_to) == 0: 308 op_list, cost = Comparison._pitches_levenshtein_diff( 309 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 310 ) 311 op_list.append( 312 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 313 ) 314 cost += M21Utils.pitch_size(original[0]) 315 return op_list, cost 316 317 # compute the cost and the op_list for the many possibilities of recursion 318 cost_dict = {} 319 op_list_dict = {} 320 # del-pitch 321 op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff( 322 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 323 ) 324 cost_dict["delpitch"] += M21Utils.pitch_size(original[0]) 325 op_list_dict["delpitch"].append( 326 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 327 ) 328 # ins-pitch 329 op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff( 330 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 331 ) 332 cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0]) 333 op_list_dict["inspitch"].append( 334 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 335 ) 336 # edit-pitch 337 op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff( 338 original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1) 339 ) 340 if original[0] == compare_to[0]: # to avoid perform the pitch_diff 341 pitch_diff_op_list = [] 342 pitch_diff_cost = 0 343 else: 344 pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff( 345 original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1]) 346 ) 347 cost_dict["editpitch"] += pitch_diff_cost 348 op_list_dict["editpitch"].extend(pitch_diff_op_list) 349 # compute the minimum of the possibilities 350 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 351 out = op_list_dict[min_key], cost_dict[min_key] 352 return out 353 354 @staticmethod 355 def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids): 356 """ 357 Compute the differences between two pitch (definition from the paper). 358 a pitch consist of a tuple: pitch name (letter+number), accidental, tie. 359 param : pitch1. The music_notation_repr tuple of note1 360 param : pitch2. The music_notation_repr tuple of note2 361 param : noteNode1. The noteNode where pitch1 belongs 362 param : noteNode2. The noteNode where pitch2 belongs 363 param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord 364 Returns: 365 [list] -- the list of differences 366 [int] -- the cost of diff 367 """ 368 cost = 0 369 op_list = [] 370 # add for pitch name differences 371 if pitch1[0] != pitch2[0]: 372 cost += 1 373 # TODO: select the note in a more precise way in case of a chord 374 # rest to note 375 if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"): # xor 376 op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids)) 377 else: # they are two notes 378 op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids)) 379 380 # add for the accidentals 381 if pitch1[1] != pitch2[1]: # if the accidental is different 382 if pitch1[1] == "None": 383 assert pitch2[1] != "None" 384 cost += 1 385 op_list.append(("accidentins", noteNode1, noteNode2, 1, ids)) 386 elif pitch2[1] == "None": 387 assert pitch1[1] != "None" 388 cost += 1 389 op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids)) 390 else: # a different type of alteration is present 391 cost += 2 # delete then add 392 op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids)) 393 # add for the ties 394 if pitch1[2] != pitch2[2]: 395 # exclusive or. Add if one is tied and not the other. 396 # probably to revise for chords 397 cost += 1 398 if pitch1[2]: 399 assert not pitch2[2] 400 op_list.append(("tiedel", noteNode1, noteNode2, 1, ids)) 401 elif pitch2[2]: 402 assert not pitch1[2] 403 op_list.append(("tieins", noteNode1, noteNode2, 1, ids)) 404 return op_list, cost 405 406 @staticmethod 407 @_memoize_block_diff_lin 408 def _block_diff_lin(original, compare_to): 409 if len(original) == 0 and len(compare_to) == 0: 410 return [], 0 411 412 if len(original) == 0: 413 op_list, cost = Comparison._block_diff_lin(original, compare_to[1:]) 414 cost += compare_to[0].notation_size() 415 op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size())) 416 return op_list, cost 417 418 if len(compare_to) == 0: 419 op_list, cost = Comparison._block_diff_lin(original[1:], compare_to) 420 cost += original[0].notation_size() 421 op_list.append(("delbar", original[0], None, original[0].notation_size())) 422 return op_list, cost 423 424 # compute the cost and the op_list for the many possibilities of recursion 425 cost_dict = {} 426 op_list_dict = {} 427 # del-bar 428 op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin( 429 original[1:], compare_to 430 ) 431 cost_dict["delbar"] += original[0].notation_size() 432 op_list_dict["delbar"].append( 433 ("delbar", original[0], None, original[0].notation_size()) 434 ) 435 # ins-bar 436 op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin( 437 original, compare_to[1:] 438 ) 439 cost_dict["insbar"] += compare_to[0].notation_size() 440 op_list_dict["insbar"].append( 441 ("insbar", None, compare_to[0], compare_to[0].notation_size()) 442 ) 443 # edit-bar 444 op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin( 445 original[1:], compare_to[1:] 446 ) 447 if ( 448 original[0] == compare_to[0] 449 ): # to avoid performing the _voices_coupling_recursive/_notes_set_distance 450 # if it's not needed 451 inside_bar_op_list = [] 452 inside_bar_cost = 0 453 else: 454 # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras 455 # instead of lists of AnnNotes) 456 extras_op_list, extras_cost = Comparison._extras_set_distance( 457 original[0].extras_list, compare_to[0].extras_list 458 ) 459 460 # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras) 461 lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin( 462 original[0].lyrics_list, compare_to[0].lyrics_list 463 ) 464 465 if original[0].includes_voicing: 466 # run the voice coupling algorithm, and add to inside_bar_op_list 467 # and inside_bar_cost 468 inside_bar_op_list, inside_bar_cost = ( 469 Comparison._voices_coupling_recursive( 470 original[0].voices_list, compare_to[0].voices_list 471 ) 472 ) 473 else: 474 # run the set distance algorithm, and add to inside_bar_op_list 475 # and inside_bar_cost 476 inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance( 477 original[0].annot_notes, compare_to[0].annot_notes 478 ) 479 480 inside_bar_op_list.extend(extras_op_list) 481 inside_bar_cost += extras_cost 482 inside_bar_op_list.extend(lyrics_op_list) 483 inside_bar_cost += lyrics_cost 484 485 cost_dict["editbar"] += inside_bar_cost 486 op_list_dict["editbar"].extend(inside_bar_op_list) 487 # compute the minimum of the possibilities 488 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 489 out = op_list_dict[min_key], cost_dict[min_key] 490 return out 491 492 @staticmethod 493 @_memoize_lyrics_diff_lin 494 def _lyrics_diff_lin(original, compare_to): 495 # original and compare to are two lists of AnnLyric 496 if len(original) == 0 and len(compare_to) == 0: 497 return [], 0 498 499 if len(original) == 0: 500 cost = 0 501 op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:]) 502 op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size())) 503 cost += compare_to[0].notation_size() 504 return op_list, cost 505 506 if len(compare_to) == 0: 507 cost = 0 508 op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to) 509 op_list.append(("lyricdel", original[0], None, original[0].notation_size())) 510 cost += original[0].notation_size() 511 return op_list, cost 512 513 # compute the cost and the op_list for the many possibilities of recursion 514 cost = {} 515 op_list = {} 516 # lyricdel 517 op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin( 518 original[1:], compare_to 519 ) 520 cost["lyricdel"] += original[0].notation_size() 521 op_list["lyricdel"].append( 522 ("lyricdel", original[0], None, original[0].notation_size()) 523 ) 524 # lyricins 525 op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin( 526 original, compare_to[1:] 527 ) 528 cost["lyricins"] += compare_to[0].notation_size() 529 op_list["lyricins"].append( 530 ("lyricins", None, compare_to[0], compare_to[0].notation_size()) 531 ) 532 # lyricsub 533 op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin( 534 original[1:], compare_to[1:] 535 ) 536 if ( 537 original[0] == compare_to[0] 538 ): # avoid call another function if they are equal 539 lyricsub_op, lyricsub_cost = [], 0 540 else: 541 lyricsub_op, lyricsub_cost = ( 542 Comparison._annotated_lyric_diff(original[0], compare_to[0]) 543 ) 544 cost["lyricsub"] += lyricsub_cost 545 op_list["lyricsub"].extend(lyricsub_op) 546 # compute the minimum of the possibilities 547 min_key = min(cost, key=cost.get) 548 out = op_list[min_key], cost[min_key] 549 return out 550 551 @staticmethod 552 def _strings_levenshtein_distance(str1: str, str2: str): 553 counter: dict = {"+": 0, "-": 0} 554 distance: int = 0 555 for edit_code in ndiff(str1, str2): 556 if edit_code[0] == " ": 557 distance += max(counter.values()) 558 counter = {"+": 0, "-": 0} 559 else: 560 counter[edit_code[0]] += 1 561 distance += max(counter.values()) 562 return distance 563 564 @staticmethod 565 def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool: 566 if off1 == off2: 567 return False 568 569 # this should never happen, but... 570 if off1 is None or off2 is None: 571 if off1 is None and off2 is not None: 572 return True 573 if off1 is not None and off2 is None: 574 return True 575 return False # both are None, therefore not different at all 576 577 diff: OffsetQL = off1 - off2 578 if diff < 0: 579 diff = -diff 580 581 if diff > 0.0001: 582 return True 583 return False 584 585 @staticmethod 586 def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra): 587 """ 588 Compute the differences between two annotated extras. 589 Each annotated extra consists of three values: content, offset, and duration 590 """ 591 cost = 0 592 op_list = [] 593 594 # add for the content 595 if annExtra1.content != annExtra2.content: 596 content_cost: int = ( 597 Comparison._strings_levenshtein_distance( 598 annExtra1.content or '', 599 annExtra2.content or '' 600 ) 601 ) 602 cost += content_cost 603 op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost)) 604 605 # add for the symbolic (cost 2: delete one symbol, add the other) 606 if annExtra1.symbolic != annExtra2.symbolic: 607 cost += 2 608 op_list.append(("extrasymboledit", annExtra1, annExtra2, 2)) 609 610 # add for the infodict 611 if annExtra1.infodict != annExtra2.infodict: 612 info_cost: int = 0 613 # handle everything in annExtra1 (whether or not it is in annExtra2) 614 for k, v in annExtra1.infodict.items(): 615 if k not in annExtra2.infodict: 616 # not in annExtra2: delete a symbol 617 info_cost += 1 618 elif v != annExtra2.infodict[k]: 619 # different in annExtra2: delete a symbol, add a symbol 620 info_cost += 2 621 # handle everything in annExtra2 that is not in annExtra1 622 for k in annExtra2.infodict: 623 if k not in annExtra1.infodict: 624 # add a symbol 625 info_cost += 1 626 cost += info_cost 627 op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost)) 628 629 # add for the offset 630 # Note: offset here is a float, and some file formats have only four 631 # decimal places of precision. So we should not compare exactly here. 632 if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset): 633 cost += 1 634 op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1)) 635 636 # add for the duration 637 # Note: duration here is a float, and some file formats have only four 638 # decimal places of precision. So we should not compare exactly here. 639 if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration): 640 cost += 1 641 op_list.append(("extradurationedit", annExtra1, annExtra2, 1)) 642 643 # add for the style 644 if annExtra1.styledict != annExtra2.styledict: 645 cost += 1 # someday we might count different items in the styledict 646 op_list.append(("extrastyleedit", annExtra1, annExtra2, 1)) 647 648 return op_list, cost 649 650 @staticmethod 651 def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric): 652 """ 653 Compute the differences between two annotated lyrics. 654 Each annotated lyric consists of five values: lyric, verse_id, offset, duration, 655 and styledict. 656 """ 657 cost = 0 658 op_list = [] 659 660 # add for the content 661 if annLyric1.lyric != annLyric2.lyric: 662 content_cost: int = ( 663 Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric) 664 ) 665 cost += content_cost 666 op_list.append(("lyricedit", annLyric1, annLyric2, content_cost)) 667 668 # add for the number 669 if annLyric1.number != annLyric2.number: 670 number_cost: int 671 if annLyric1.number == 0 or annLyric2.number == 0: 672 # add or delete number 673 number_cost = 1 674 else: 675 # add and delete number 676 number_cost = 2 677 cost += number_cost 678 op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost)) 679 680 # add for the identifier 681 if annLyric1.identifier != annLyric2.identifier: 682 # someday we might do a Levenshtein distance of the two ids 683 id_cost: int 684 if not annLyric1.identifier or not annLyric1.identifier: 685 # add or delete identifier 686 id_cost = 1 687 else: 688 # add and delete identifier 689 id_cost = 2 690 cost += id_cost 691 op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost)) 692 693 # add for the offset 694 # Note: offset here is a float, and some file formats have only four 695 # decimal places of precision. So we should not compare exactly here. 696 if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset): 697 cost += 1 698 op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1)) 699 700 # add for the style 701 if annLyric1.styledict != annLyric2.styledict: 702 cost += 1 # someday we might count different items in the styledict 703 op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1)) 704 705 return op_list, cost 706 707 @staticmethod 708 def _annotated_metadata_item_diff( 709 annMetadataItem1: AnnMetadataItem, 710 annMetadataItem2: AnnMetadataItem 711 ): 712 """ 713 Compute the differences between two annotated metadata items. 714 Each annotated metadata item has two values: key: str, value: t.Any, 715 """ 716 cost = 0 717 op_list = [] 718 719 # we don't compare items that don't have the same key. 720 if annMetadataItem1.key != annMetadataItem2.key: 721 raise ValueError('unexpected comparison of metadata items with different keys') 722 723 # add for the value 724 if annMetadataItem1.value != annMetadataItem2.value: 725 value_cost: int = ( 726 Comparison._strings_levenshtein_distance( 727 str(annMetadataItem1.value), 728 str(annMetadataItem2.value) 729 ) 730 ) 731 cost += value_cost 732 op_list.append( 733 ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost) 734 ) 735 736 return op_list, cost 737 738 @staticmethod 739 def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup): 740 """ 741 Compute the differences between two annotated staff groups. 742 Each annotated staff group consists of five values: name, abbreviation, 743 symbol, barTogether, part_indices. 744 """ 745 cost = 0 746 op_list = [] 747 748 # add for the name 749 if annStaffGroup1.name != annStaffGroup2.name: 750 name_cost: int = ( 751 Comparison._strings_levenshtein_distance( 752 annStaffGroup1.name, 753 annStaffGroup2.name 754 ) 755 ) 756 cost += name_cost 757 op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost)) 758 759 # add for the abbreviation 760 if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation: 761 abbreviation_cost: int = ( 762 Comparison._strings_levenshtein_distance( 763 annStaffGroup1.abbreviation, 764 annStaffGroup2.abbreviation 765 ) 766 ) 767 cost += abbreviation_cost 768 op_list.append(( 769 "staffgrpabbreviationedit", 770 annStaffGroup1, 771 annStaffGroup2, 772 abbreviation_cost 773 )) 774 775 # add for the symbol 776 if annStaffGroup1.symbol != annStaffGroup2.symbol: 777 symbol_cost: int 778 if not annStaffGroup1.symbol or not annStaffGroup2.symbol: 779 # add or delete symbol 780 symbol_cost = 1 781 else: 782 # add and delete symbol 783 symbol_cost = 2 784 cost += symbol_cost 785 op_list.append( 786 ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost) 787 ) 788 789 # add for barTogether 790 if annStaffGroup1.barTogether != annStaffGroup2.barTogether: 791 barTogether_cost: int = 1 792 cost += barTogether_cost 793 op_list.append( 794 ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost) 795 ) 796 797 # add for partIndices (sorted list of int) 798 if annStaffGroup1.part_indices != annStaffGroup2.part_indices: 799 partIndices_cost: int = 0 800 if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]: 801 partIndices_cost += 1 # vertical start 802 if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]: 803 partIndices_cost += 1 # vertical height 804 if partIndices_cost == 0: 805 # should never get here, but we have to have a cost 806 partIndices_cost = 1 807 cost += partIndices_cost 808 op_list.append( 809 ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost) 810 ) 811 812 return op_list, cost 813 814 @staticmethod 815 @_memoize_inside_bars_diff_lin 816 def _inside_bars_diff_lin(original, compare_to): 817 # original and compare to are two lists of annotatedNote 818 if len(original) == 0 and len(compare_to) == 0: 819 return [], 0 820 821 if len(original) == 0: 822 cost = 0 823 op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:]) 824 op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size())) 825 cost += compare_to[0].notation_size() 826 return op_list, cost 827 828 if len(compare_to) == 0: 829 cost = 0 830 op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to) 831 op_list.append(("notedel", original[0], None, original[0].notation_size())) 832 cost += original[0].notation_size() 833 return op_list, cost 834 835 # compute the cost and the op_list for the many possibilities of recursion 836 cost = {} 837 op_list = {} 838 # notedel 839 op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin( 840 original[1:], compare_to 841 ) 842 cost["notedel"] += original[0].notation_size() 843 op_list["notedel"].append( 844 ("notedel", original[0], None, original[0].notation_size()) 845 ) 846 # noteins 847 op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin( 848 original, compare_to[1:] 849 ) 850 cost["noteins"] += compare_to[0].notation_size() 851 op_list["noteins"].append( 852 ("noteins", None, compare_to[0], compare_to[0].notation_size()) 853 ) 854 # notesub 855 op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin( 856 original[1:], compare_to[1:] 857 ) 858 if ( 859 original[0] == compare_to[0] 860 ): # avoid call another function if they are equal 861 notesub_op, notesub_cost = [], 0 862 else: 863 notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0]) 864 cost["notesub"] += notesub_cost 865 op_list["notesub"].extend(notesub_op) 866 # compute the minimum of the possibilities 867 min_key = min(cost, key=cost.get) 868 out = op_list[min_key], cost[min_key] 869 return out 870 871 @staticmethod 872 def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote): 873 """ 874 Compute the differences between two annotated notes. 875 Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets) 876 where pitches is a list. 877 Arguments: 878 noteNode1 {[AnnNote]} -- original AnnNote 879 noteNode2 {[AnnNote]} -- compare_to AnnNote 880 """ 881 cost = 0 882 op_list = [] 883 # add for the pitches 884 # if they are equal 885 if annNote1.pitches == annNote2.pitches: 886 op_list_pitch, cost_pitch = [], 0 887 else: 888 # pitches diff is computed using Levenshtein distances (they are already ordered) 889 op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff( 890 annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0) 891 ) 892 op_list.extend(op_list_pitch) 893 cost += cost_pitch 894 # add for the notehead 895 if annNote1.note_head != annNote2.note_head: 896 # delete one note head, add the other (this isn't noteshape, this is 897 # just quarter-note note head vs half-note note head, etc) 898 cost += 2 899 op_list.append(("headedit", annNote1, annNote2, 2)) 900 # add for the dots 901 if annNote1.dots != annNote2.dots: 902 # add one for each added (or deleted) dot 903 dots_diff = abs(annNote1.dots - annNote2.dots) 904 cost += dots_diff 905 if annNote1.dots > annNote2.dots: 906 op_list.append(("dotdel", annNote1, annNote2, dots_diff)) 907 else: 908 op_list.append(("dotins", annNote1, annNote2, dots_diff)) 909 if annNote1.graceType != annNote2.graceType: 910 # accented vs unaccented vs not a grace note (delete the wrong, add the right) 911 cost += 2 912 op_list.append(("graceedit", annNote1, annNote2, 2)) 913 if annNote1.graceSlash != annNote2.graceSlash: 914 # add or delete the slash 915 cost += 1 916 op_list.append(("graceslashedit", annNote1, annNote2, 1)) 917 # add for the beamings 918 if annNote1.beamings != annNote2.beamings: 919 beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff( 920 annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam" 921 ) 922 op_list.extend(beam_op_list) 923 cost += beam_cost 924 # add for the tuplet types 925 if annNote1.tuplets != annNote2.tuplets: 926 tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff( 927 annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet" 928 ) 929 op_list.extend(tuplet_op_list) 930 cost += tuplet_cost 931 # add for the tuplet info 932 if annNote1.tuplet_info != annNote2.tuplet_info: 933 tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff( 934 annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet" 935 ) 936 op_list.extend(tuplet_info_op_list) 937 cost += tuplet_info_cost 938 # add for the articulations 939 if annNote1.articulations != annNote2.articulations: 940 artic_op_list, artic_cost = Comparison._generic_levenshtein_diff( 941 annNote1.articulations, 942 annNote2.articulations, 943 annNote1, 944 annNote2, 945 "articulation", 946 ) 947 op_list.extend(artic_op_list) 948 cost += artic_cost 949 # add for the expressions 950 if annNote1.expressions != annNote2.expressions: 951 expr_op_list, expr_cost = Comparison._generic_levenshtein_diff( 952 annNote1.expressions, 953 annNote2.expressions, 954 annNote1, 955 annNote2, 956 "expression", 957 ) 958 op_list.extend(expr_op_list) 959 cost += expr_cost 960 961 # add for gap from previous note or start of measure if first note in measure 962 # (i.e. horizontal position shift) 963 if annNote1.gap_dur != annNote2.gap_dur: 964 # in all cases, the edit is a simple horizontal shift of the note 965 cost += 1 966 if annNote1.gap_dur == 0: 967 op_list.append(("insspace", annNote1, annNote2, 1)) 968 elif annNote2.gap_dur == 0: 969 op_list.append(("delspace", annNote1, annNote2, 1)) 970 else: 971 # neither is zero 972 op_list.append(("editspace", annNote1, annNote2, 1)) 973 974 # add for noteshape 975 if annNote1.noteshape != annNote2.noteshape: 976 # always delete existing note shape and add the new one 977 cost += 2 978 op_list.append(("editnoteshape", annNote1, annNote2, 2)) 979 # add for noteheadFill 980 if annNote1.noteheadFill != annNote2.noteheadFill: 981 # always delete existing note fill and add the new one 982 cost += 2 983 op_list.append(("editnoteheadfill", annNote1, annNote2, 2)) 984 # add for noteheadParenthesis (True or False) 985 if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis: 986 # always either add or delete parentheses 987 cost += 1 988 op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1)) 989 # add for stemDirection 990 if annNote1.stemDirection != annNote2.stemDirection: 991 stemdir_cost: int 992 if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem': 993 # gonna add a stem 994 stemdir_cost = 1 995 else: 996 # gonna change a stem (add then delete) 997 stemdir_cost = 2 998 cost += stemdir_cost 999 op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost)) 1000 # add for the styledict 1001 if annNote1.styledict != annNote2.styledict: 1002 cost += 1 1003 op_list.append(("editstyle", annNote1, annNote2, 1)) 1004 1005 return op_list, cost 1006 1007 @staticmethod 1008 @_memoize_beamtuplet_lev_diff 1009 def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which): 1010 """ 1011 Compute the levenshtein distance between two sequences of beaming or tuples. 1012 Arguments: 1013 original {list} -- list of strings (start, stop, continue or partial) 1014 compare_to {list} -- list of strings (start, stop, continue or partial) 1015 note1 {AnnNote} -- the note for referencing in the score 1016 note2 {AnnNote} -- the note for referencing in the score 1017 which -- a string: "beam" or "tuplet" depending what we are comparing 1018 """ 1019 if which not in ("beam", "tuplet"): 1020 raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'") 1021 1022 if len(original) == 0 and len(compare_to) == 0: 1023 return [], 0 1024 1025 if len(original) == 0: 1026 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1027 original, compare_to[1:], note1, note2, which 1028 ) 1029 op_list.append(("ins" + which, note1, note2, 1)) 1030 cost += 1 1031 return op_list, cost 1032 1033 if len(compare_to) == 0: 1034 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1035 original[1:], compare_to, note1, note2, which 1036 ) 1037 op_list.append(("del" + which, note1, note2, 1)) 1038 cost += 1 1039 return op_list, cost 1040 1041 # compute the cost and the op_list for the many possibilities of recursion 1042 cost = {} 1043 op_list = {} 1044 # delwhich 1045 op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff( 1046 original[1:], compare_to, note1, note2, which 1047 ) 1048 cost["del" + which] += 1 1049 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1050 # inswhich 1051 op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff( 1052 original, compare_to[1:], note1, note2, which 1053 ) 1054 cost["ins" + which] += 1 1055 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1056 # editwhich 1057 op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff( 1058 original[1:], compare_to[1:], note1, note2, which 1059 ) 1060 if original[0] == compare_to[0]: 1061 beam_diff_op_list = [] 1062 beam_diff_cost = 0 1063 else: 1064 beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1 1065 cost["edit" + which] += beam_diff_cost 1066 op_list["edit" + which].extend(beam_diff_op_list) 1067 # compute the minimum of the possibilities 1068 min_key = min(cost, key=cost.get) 1069 out = op_list[min_key], cost[min_key] 1070 return out 1071 1072 @staticmethod 1073 @_memoize_generic_lev_diff 1074 def _generic_levenshtein_diff(original, compare_to, note1, note2, which): 1075 """ 1076 Compute the Levenshtein distance between two generic sequences of symbols 1077 (e.g., articulations). 1078 Arguments: 1079 original {list} -- list of strings 1080 compare_to {list} -- list of strings 1081 note1 {AnnNote} -- the note for referencing in the score 1082 note2 {AnnNote} -- the note for referencing in the score 1083 which -- a string: e.g. "articulation" depending what we are comparing 1084 """ 1085 if len(original) == 0 and len(compare_to) == 0: 1086 return [], 0 1087 1088 if len(original) == 0: 1089 op_list, cost = Comparison._generic_levenshtein_diff( 1090 original, compare_to[1:], note1, note2, which 1091 ) 1092 op_list.append(("ins" + which, note1, note2, 1)) 1093 cost += 1 1094 return op_list, cost 1095 1096 if len(compare_to) == 0: 1097 op_list, cost = Comparison._generic_levenshtein_diff( 1098 original[1:], compare_to, note1, note2, which 1099 ) 1100 op_list.append(("del" + which, note1, note2, 1)) 1101 cost += 1 1102 return op_list, cost 1103 1104 # compute the cost and the op_list for the many possibilities of recursion 1105 cost = {} 1106 op_list = {} 1107 # delwhich 1108 op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff( 1109 original[1:], compare_to, note1, note2, which 1110 ) 1111 cost["del" + which] += 1 1112 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1113 # inswhich 1114 op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff( 1115 original, compare_to[1:], note1, note2, which 1116 ) 1117 cost["ins" + which] += 1 1118 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1119 # editwhich 1120 op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff( 1121 original[1:], compare_to[1:], note1, note2, which 1122 ) 1123 if original[0] == compare_to[0]: # to avoid perform the diff 1124 generic_diff_op_list = [] 1125 generic_diff_cost = 0 1126 else: 1127 generic_diff_op_list, generic_diff_cost = ( 1128 [("edit" + which, note1, note2, 1)], 1129 1, 1130 ) 1131 cost["edit" + which] += generic_diff_cost 1132 op_list["edit" + which].extend(generic_diff_op_list) 1133 # compute the minimum of the possibilities 1134 min_key = min(cost, key=cost.get) 1135 out = op_list[min_key], cost[min_key] 1136 return out 1137 1138 @staticmethod 1139 @_memoize_notes_set_distance 1140 def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]): 1141 """ 1142 Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in 1143 that order of importance). If you can't find an exactly matching note, try again without 1144 visual duration. 1145 original [list] -- a list of AnnNote (which are never chords) 1146 compare_to [list] -- a list of AnnNote (which are never chords) 1147 """ 1148 paired_notes: list[tuple[AnnNote, AnnNote]] = [] 1149 unpaired_orig_notes: list[AnnNote] = [] 1150 unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to) 1151 1152 for orig_n in original: 1153 fallback: AnnNote | None = None 1154 fallback_i: int = -1 1155 found_it: bool = False 1156 for i, comp_n in enumerate(unpaired_comp_notes): 1157 if orig_n.pitches[0][0] != comp_n.pitches[0][0]: 1158 # this pitch comparison (1) assumes the note is not a chord 1159 # (because we don't do chords when Voicing is not set, and 1160 # we only call _notes_set_distance when Voicing is not set), 1161 # and (2) only compares the visual position of the note (we 1162 # are ignoring the accidental here). This is so that an 1163 # accidental change will show up as a pitch edit, not a 1164 # note remove/insert. 1165 continue 1166 if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset): 1167 continue 1168 if orig_n.note_is_grace != comp_n.note_is_grace: 1169 continue 1170 if fallback is None: 1171 fallback = comp_n 1172 fallback_i = i 1173 1174 if orig_n.note_dur_type != comp_n.note_dur_type: 1175 continue 1176 if orig_n.note_dur_dots != comp_n.note_dur_dots: 1177 continue 1178 1179 # found a perfect match 1180 paired_notes.append((orig_n, comp_n)) 1181 1182 # remove comp_n from unpaired_comp_notes 1183 unpaired_comp_notes.pop(i) # remove(comp_n) would sometimes get the wrong one 1184 1185 found_it = True 1186 break 1187 1188 if found_it: 1189 # on to the next original note 1190 continue 1191 1192 # did we find a fallback (matched except for duration)? 1193 if fallback is not None: 1194 paired_notes.append((orig_n, fallback)) 1195 unpaired_comp_notes.pop(fallback_i) 1196 continue 1197 1198 # we found nothing 1199 unpaired_orig_notes.append(orig_n) 1200 1201 # compute the cost and the op_list 1202 cost: int = 0 1203 op_list: list = [] 1204 1205 # notedel 1206 if unpaired_orig_notes: 1207 for an in unpaired_orig_notes: 1208 cost += an.notation_size() 1209 op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord)) 1210 1211 # noteins 1212 if unpaired_comp_notes: 1213 for an in unpaired_comp_notes: 1214 cost += an.notation_size() 1215 op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord)) 1216 1217 # notesub 1218 if paired_notes: 1219 for ano, anc in paired_notes: 1220 if ano == anc: 1221 # if equal, avoid _annotated_note_diff call 1222 notesub_op, notesub_cost = [], 0 1223 else: 1224 notesub_op, notesub_cost = ( 1225 Comparison._annotated_note_diff(ano, anc) 1226 ) 1227 cost += notesub_cost 1228 op_list.extend(notesub_op) 1229 1230 return op_list, cost 1231 1232 @staticmethod 1233 @_memoize_extras_set_distance 1234 def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]): 1235 """ 1236 Gather up pairs of matching extras (using kind, offset, and visual duration, in 1237 that order of importance). If you can't find an exactly matching extra, try again 1238 without visual duration. 1239 original [list] -- a list of AnnExtras 1240 compare_to [list] -- a list of AnnExtras 1241 """ 1242 paired_extras: list[tuple[AnnExtra, AnnExtra]] = [] 1243 unpaired_orig_extras: list[AnnExtra] = [] 1244 unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to) 1245 1246 for orig_x in original: 1247 fallback: AnnExtra | None = None 1248 fallback_i: int = -1 1249 found_it: bool = False 1250 for i, comp_x in enumerate(unpaired_comp_extras): 1251 # kind and offset are required for pairing 1252 if orig_x.kind != comp_x.kind: 1253 continue 1254 if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset): 1255 continue 1256 if fallback is None: 1257 fallback = comp_x 1258 fallback_i = i 1259 1260 # duration is preferred for pairing 1261 if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration): 1262 continue 1263 1264 # there are a few kind-specific elements that are also preferred: 1265 # 'direction'/'ending': content (visible text) 1266 # 'keysig'/'timesig'/'clef': symbolic (there are sometimes two 1267 # simultaneous keysigs, timesigs, or clefs, and we don't 1268 # want to confuse which one is which, producing a diff where 1269 # there actually isn't one) 1270 # 'slur': placements (because there are often two identical slurs 1271 # whose only difference is 'above' vs 'below', producing a diff 1272 # where there actually isn't one) 1273 if orig_x.kind in ('direction', 'ending'): 1274 if orig_x.content != comp_x.content: 1275 continue 1276 if orig_x.kind == 'clef': 1277 if orig_x.symbolic != comp_x.symbolic: 1278 continue 1279 if orig_x.kind in ('keysig', 'timesig'): 1280 if orig_x.infodict != comp_x.infodict: 1281 continue 1282 if orig_x.kind == 'slur': 1283 orig_placement: str = orig_x.styledict.get('placement', '') 1284 comp_placement: str = comp_x.styledict.get('placement', '') 1285 if orig_placement != comp_placement: 1286 continue 1287 1288 # found a perfect match 1289 paired_extras.append((orig_x, comp_x)) 1290 1291 # remove comp_n from unpaired_comp_extras 1292 unpaired_comp_extras.pop(i) # remove(comp_n) would sometimes get the wrong one 1293 1294 found_it = True 1295 break 1296 1297 if found_it: 1298 # on to the next original extra 1299 continue 1300 1301 # did we find a fallback (matched except for duration)? 1302 if fallback is not None: 1303 paired_extras.append((orig_x, fallback)) 1304 unpaired_comp_extras.pop(fallback_i) 1305 continue 1306 1307 # we found nothing 1308 unpaired_orig_extras.append(orig_x) 1309 1310 # compute the cost and the op_list 1311 cost: int = 0 1312 op_list: list = [] 1313 1314 # extradel 1315 for extra in unpaired_orig_extras: 1316 cost += extra.notation_size() 1317 op_list.append(("extradel", extra, None, extra.notation_size())) 1318 1319 # extrains 1320 for extra in unpaired_comp_extras: 1321 cost += extra.notation_size() 1322 op_list.append(("extrains", None, extra, extra.notation_size())) 1323 1324 # extrasub 1325 if paired_extras: 1326 for extrao, extrac in paired_extras: 1327 if extrao == extrac: 1328 # if equal, avoid _annotated_extra_diff call 1329 extrasub_op, extrasub_cost = [], 0 1330 else: 1331 extrasub_op, extrasub_cost = ( 1332 Comparison._annotated_extra_diff(extrao, extrac) 1333 ) 1334 cost += extrasub_cost 1335 op_list.extend(extrasub_op) 1336 1337 return op_list, cost 1338 1339 @staticmethod 1340 @_memoize_metadata_items_set_distance 1341 def _metadata_items_set_distance( 1342 original: list[AnnMetadataItem], 1343 compare_to: list[AnnMetadataItem] 1344 ): 1345 """ 1346 Gather up pairs of matching metadata_items (using key and value). If you can't find 1347 an exactly matching metadata_item, try again without value. 1348 original [list] -- a list of AnnMetadataItems 1349 compare_to [list] -- a list of AnnMetadataItems 1350 """ 1351 paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = [] 1352 unpaired_orig_metadata_items: list[AnnMetadataItem] = [] 1353 unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to) 1354 1355 # first look for perfect matches 1356 for orig_mdi in original: 1357 found_it: bool = False 1358 for i, comp_mdi in enumerate(unpaired_comp_metadata_items): 1359 # key is required for perfect match 1360 if orig_mdi.key != comp_mdi.key: 1361 continue 1362 1363 # value is required for perfect match 1364 if orig_mdi.value != comp_mdi.value: 1365 continue 1366 1367 # found a perfect match 1368 paired_metadata_items.append((orig_mdi, comp_mdi)) 1369 1370 # remove comp_mdi from unpaired_comp_metadata_items 1371 unpaired_comp_metadata_items.pop(i) 1372 1373 found_it = True 1374 break 1375 1376 if found_it: 1377 # on to the next original metadata_item 1378 continue 1379 1380 # we found no perfect match 1381 unpaired_orig_metadata_items.append(orig_mdi) 1382 1383 # now look among the unpaired remainders for key match only 1384 remove_unpaired_orig_items: list[AnnMetadataItem] = [] 1385 for orig_mdi in unpaired_orig_metadata_items: 1386 for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items): 1387 # key is required for key-only match 1388 if orig_mdi.key != comp_mdi.key: 1389 continue 1390 1391 # found a key-only match 1392 paired_metadata_items.append((orig_mdi, comp_mdi)) 1393 1394 # remove comp_mdi from unpaired_comp_metadata_items 1395 unpaired_comp_metadata_items.pop(comp_idx) 1396 1397 # make a note of unpaired_orig_metadata_item to remove later 1398 remove_unpaired_orig_items.append(orig_mdi) 1399 break 1400 1401 for remove_mdi in remove_unpaired_orig_items: 1402 unpaired_orig_metadata_items.remove(remove_mdi) 1403 1404 # compute the cost and the op_list 1405 cost: int = 0 1406 op_list: list = [] 1407 1408 # mditemdel 1409 for metadata_item in unpaired_orig_metadata_items: 1410 cost += metadata_item.notation_size() 1411 op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size())) 1412 1413 # mditemins 1414 for metadata_item in unpaired_comp_metadata_items: 1415 cost += metadata_item.notation_size() 1416 op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size())) 1417 1418 # mditemsub 1419 if paired_metadata_items: 1420 for metadata_itemo, metadata_itemc in paired_metadata_items: 1421 if metadata_itemo == metadata_itemc: 1422 # if equal, avoid _annotated_metadata_item_diff call 1423 metadata_itemsub_op, metadata_itemsub_cost = [], 0 1424 else: 1425 metadata_itemsub_op, metadata_itemsub_cost = ( 1426 Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc) 1427 ) 1428 cost += metadata_itemsub_cost 1429 op_list.extend(metadata_itemsub_op) 1430 1431 return op_list, cost 1432 1433 @staticmethod 1434 @_memoize_staff_groups_set_distance 1435 def _staff_groups_set_distance( 1436 original: list[AnnStaffGroup], 1437 compare_to: list[AnnStaffGroup] 1438 ): 1439 """ 1440 Gather up pairs of matching staffgroups (using start_index and end_index, in 1441 that order of importance). If you can't find an exactly matching staffgroup, 1442 try again without end_index. 1443 original [list] -- a list of AnnStaffGroups 1444 compare_to [list] -- a list of AnnStaffGroups 1445 """ 1446 paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = [] 1447 unpaired_orig_staff_groups: list[AnnStaffGroup] = [] 1448 unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to) 1449 1450 for orig_sg in original: 1451 fallback: AnnStaffGroup | None = None 1452 fallback_i: int = -1 1453 found_it: bool = False 1454 for i, comp_sg in enumerate(unpaired_comp_staff_groups): 1455 # start index is required for pairing 1456 if orig_sg.part_indices[0] != comp_sg.part_indices[0]: 1457 continue 1458 if fallback is None: 1459 fallback = comp_sg 1460 fallback_i = i 1461 1462 # end index and symbol (brace, bracket, etc) is preferred for pairing 1463 if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]: 1464 continue 1465 if orig_sg.symbol != comp_sg.symbol: 1466 continue 1467 1468 # found a perfect match 1469 paired_staff_groups.append((orig_sg, comp_sg)) 1470 1471 # remove comp_n from unpaired_comp_staff_groups 1472 unpaired_comp_staff_groups.pop(i) 1473 1474 found_it = True 1475 break 1476 1477 if found_it: 1478 # on to the next original staff_group 1479 continue 1480 1481 # did we find a fallback (matched except for duration)? 1482 if fallback is not None: 1483 paired_staff_groups.append((orig_sg, fallback)) 1484 unpaired_comp_staff_groups.pop(fallback_i) 1485 continue 1486 1487 # we found nothing 1488 unpaired_orig_staff_groups.append(orig_sg) 1489 1490 # compute the cost and the op_list 1491 cost: int = 0 1492 op_list: list = [] 1493 1494 # staffgrpdel 1495 for staff_group in unpaired_orig_staff_groups: 1496 cost += staff_group.notation_size() 1497 op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size())) 1498 1499 # staffgrpins 1500 for staff_group in unpaired_comp_staff_groups: 1501 cost += staff_group.notation_size() 1502 op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size())) 1503 1504 # staffgrpsub 1505 if paired_staff_groups: 1506 for staff_groupo, staff_groupc in paired_staff_groups: 1507 if staff_groupo == staff_groupc: 1508 # if equal, avoid _annotated_staff_group_diff call 1509 staff_groupsub_op, staff_groupsub_cost = [], 0 1510 else: 1511 staff_groupsub_op, staff_groupsub_cost = ( 1512 Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc) 1513 ) 1514 cost += staff_groupsub_cost 1515 op_list.extend(staff_groupsub_op) 1516 1517 return op_list, cost 1518 1519 @staticmethod 1520 def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]): 1521 """ 1522 Compare all the possible voices permutations, considering also deletion and 1523 insertion (equation on office lens). 1524 original [list] -- a list of Voice 1525 compare_to [list] -- a list of Voice 1526 """ 1527 if len(original) == 0 and len(compare_to) == 0: # stop the recursion 1528 return [], 0 1529 1530 if len(original) == 0: 1531 # insertion 1532 op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:]) 1533 # add for the inserted voice 1534 op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size())) 1535 cost += compare_to[0].notation_size() 1536 return op_list, cost 1537 1538 if len(compare_to) == 0: 1539 # deletion 1540 op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to) 1541 # add for the deleted voice 1542 op_list.append(("voicedel", original[0], None, original[0].notation_size())) 1543 cost += original[0].notation_size() 1544 return op_list, cost 1545 1546 cost = {} 1547 op_list = {} 1548 # deletion 1549 op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive( 1550 original[1:], compare_to 1551 ) 1552 op_list["voicedel"].append( 1553 ("voicedel", original[0], None, original[0].notation_size()) 1554 ) 1555 cost["voicedel"] += original[0].notation_size() 1556 for i, c in enumerate(compare_to): 1557 # substitution 1558 ( 1559 op_list["voicesub" + str(i)], 1560 cost["voicesub" + str(i)], 1561 ) = Comparison._voices_coupling_recursive( 1562 original[1:], compare_to[:i] + compare_to[i + 1:] 1563 ) 1564 if ( 1565 compare_to[0] != original[0] 1566 ): # add the cost of the sub and the operations from inside_bar_diff 1567 op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin( 1568 original[0].annot_notes, c.annot_notes 1569 ) # compute the distance from original[0] and compare_to[i] 1570 op_list["voicesub" + str(i)].extend(op_list_inside_bar) 1571 cost["voicesub" + str(i)] += cost_inside_bar 1572 # compute the minimum of the possibilities 1573 min_key = min(cost, key=cost.get) 1574 out = op_list[min_key], cost[min_key] 1575 return out 1576 1577 @staticmethod 1578 def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]: 1579 ''' 1580 Compare two annotated scores, computing an operations list and the cost of applying those 1581 operations to the first score to generate the second score. 1582 1583 Args: 1584 score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare. 1585 score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare. 1586 1587 Returns: 1588 list[tuple], int: The operations list and the cost 1589 ''' 1590 # Clear all memoizer caches, in case we are called again with different scores. 1591 # The cached results are no longer valid. 1592 Comparison._clear_memoizer_caches() 1593 1594 op_list_total: list[tuple] = [] 1595 cost_total: int = 0 1596 1597 if score1.n_of_parts == score2.n_of_parts: 1598 n_of_parts = score1.n_of_parts 1599 else: 1600 # The two scores have differing number of parts. For now, assume that 1601 # the parts are in the same order in both scores, and that the missing 1602 # parts are the ones that should have been at the end of the smaller 1603 # score. In future we could do something like we do with voices, where 1604 # we try all the combinations and compare the most-similar pairs of 1605 # parts, and the rest are considered to be the extra parts (deleted 1606 # from score1, or inserted into score2). 1607 n_of_parts = min(score1.n_of_parts, score2.n_of_parts) 1608 if score1.n_of_parts > score2.n_of_parts: 1609 # score1 has more parts that must be deleted 1610 for part_idx in range(score2.n_of_parts, score1.n_of_parts): 1611 deleted_part = score1.part_list[part_idx] 1612 op_list_total.append( 1613 ( 1614 "delpart", 1615 deleted_part, 1616 None, 1617 deleted_part.notation_size() 1618 ) 1619 ) 1620 cost_total += deleted_part.notation_size() 1621 else: 1622 # score2 has more parts that must be inserted 1623 for part_idx in range(score1.n_of_parts, score2.n_of_parts): 1624 inserted_part = score2.part_list[part_idx] 1625 op_list_total.append( 1626 ( 1627 "inspart", 1628 None, 1629 inserted_part, 1630 inserted_part.notation_size() 1631 ) 1632 ) 1633 cost_total += inserted_part.notation_size() 1634 1635 # iterate over parts that exist in both scores 1636 for p_number in range(n_of_parts): 1637 # compute non-common-subseq 1638 ncs = Comparison._non_common_subsequences_of_measures( 1639 score1.part_list[p_number].bar_list, 1640 score2.part_list[p_number].bar_list, 1641 ) 1642 # compute blockdiff 1643 for subseq in ncs: 1644 op_list_block, cost_block = Comparison._block_diff_lin( 1645 subseq["original"], subseq["compare_to"] 1646 ) 1647 op_list_total.extend(op_list_block) 1648 cost_total += cost_block 1649 1650 # compare the staff groups 1651 groups_op_list, groups_cost = Comparison._staff_groups_set_distance( 1652 score1.staff_group_list, score2.staff_group_list 1653 ) 1654 op_list_total.extend(groups_op_list) 1655 cost_total += groups_cost 1656 1657 # compare the metadata items 1658 mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance( 1659 score1.metadata_items_list, score2.metadata_items_list 1660 ) 1661 op_list_total.extend(mditems_op_list) 1662 cost_total += mditems_cost 1663 1664 # Add the cost of any syntax errors in score1 that were fixed during parsing. 1665 # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency. 1666 total_syms: int = score1.notation_size() + score2.notation_size() 1667 cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed 1668 if cost_plus_errors > total_syms: 1669 adjustment: int = cost_plus_errors - total_syms 1670 score1.num_syntax_errors_fixed -= adjustment 1671 1672 cost_total += score1.num_syntax_errors_fixed 1673 1674 return op_list_total, cost_total
class
EvaluationMetrics:
30class EvaluationMetrics: 31 def __init__( 32 self, 33 gt_path: Path, 34 pred_path: Path, 35 gt_numsyms: int, 36 pred_numsyms: int, 37 omr_edit_distance: int, 38 edit_distances_dict: dict[str, int], 39 omr_ned: float 40 ): 41 self.gt_path: Path = gt_path 42 self.pred_path: Path = pred_path 43 self.gt_numsyms: int = gt_numsyms 44 self.pred_numsyms: int = pred_numsyms 45 self.omr_edit_distance: int = omr_edit_distance 46 self.edit_distances_dict: dict[str, int] = edit_distances_dict 47 self.omr_ned: float = omr_ned
EvaluationMetrics( gt_path: pathlib.Path, pred_path: pathlib.Path, gt_numsyms: int, pred_numsyms: int, omr_edit_distance: int, edit_distances_dict: dict[str, int], omr_ned: float)
31 def __init__( 32 self, 33 gt_path: Path, 34 pred_path: Path, 35 gt_numsyms: int, 36 pred_numsyms: int, 37 omr_edit_distance: int, 38 edit_distances_dict: dict[str, int], 39 omr_ned: float 40 ): 41 self.gt_path: Path = gt_path 42 self.pred_path: Path = pred_path 43 self.gt_numsyms: int = gt_numsyms 44 self.pred_numsyms: int = pred_numsyms 45 self.omr_edit_distance: int = omr_edit_distance 46 self.edit_distances_dict: dict[str, int] = edit_distances_dict 47 self.omr_ned: float = omr_ned
class
Comparison:
150class Comparison: 151 _memoizer_mem: dict = {} 152 153 @staticmethod 154 def _clear_memoizer_caches(): 155 Comparison._memoizer_mem = {} 156 157 @staticmethod 158 def _myers_diff(a_lines, b_lines): 159 # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2) 160 # This marks the farthest-right point along each diagonal in the edit 161 # graph, along with the history that got it there 162 Frontier = namedtuple("Frontier", ["x", "history"]) 163 frontier = {1: Frontier(0, [])} 164 165 a_max = len(a_lines) 166 b_max = len(b_lines) 167 for d in range(0, a_max + b_max + 1): 168 for k in range(-d, d + 1, 2): 169 # This determines whether our next search point will be going down 170 # in the edit graph, or to the right. 171 # 172 # The intuition for this is that we should go down if we're on the 173 # left edge (k == -d) to make sure that the left edge is fully 174 # explored. 175 # 176 # If we aren't on the top (k != d), then only go down if going down 177 # would take us to territory that hasn't sufficiently been explored 178 # yet. 179 go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x) 180 181 # Figure out the starting point of this iteration. The diagonal 182 # offsets come from the geometry of the edit grid - if you're going 183 # down, your diagonal is lower, and if you're going right, your 184 # diagonal is higher. 185 if go_down: 186 old_x, history = frontier[k + 1] 187 x = old_x 188 else: 189 old_x, history = frontier[k - 1] 190 x = old_x + 1 191 192 # We want to avoid modifying the old history, since some other step 193 # may decide to use it. 194 history = history[:] 195 y = x - k 196 197 # We start at the invalid point (0, 0) - we should only start building 198 # up history when we move off of it. 199 if 1 <= y <= b_max and go_down: 200 history.append((1, b_lines[y - 1][1])) # add comparetostep 201 elif 1 <= x <= a_max: 202 history.append((0, a_lines[x - 1][1])) # add originalstep 203 204 # Chew up as many diagonal moves as we can - these correspond to common lines, 205 # and they're considered "free" by the algorithm because we want to maximize 206 # the number of these in the output. 207 while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]: 208 x += 1 209 y += 1 210 history.append((2, a_lines[x - 1][1])) # add equal step 211 212 if x >= a_max and y >= b_max: 213 # If we're here, then we've traversed through the bottom-left corner, 214 # and are done. 215 return np.array(history) 216 217 frontier[k] = Frontier(x, history) 218 219 assert False, "Could not find edit script" 220 221 @staticmethod 222 def _non_common_subsequences_myers(original, compare_to): 223 # Both original and compare_to are list of lists, or numpy arrays with 2 columns. 224 # This is necessary because bars need two representation at the same time. 225 # One without the id (for comparison), and one with the id (to retrieve the bar 226 # at the end). 227 228 # get the list of operations 229 op_list = Comparison._myers_diff( 230 np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64) 231 )[::-1] 232 # retrieve the non common subsequences 233 non_common_subsequences = [] 234 non_common_subsequences.append({"original": [], "compare_to": []}) 235 ind = 0 236 for op in op_list[::-1]: 237 if op[0] == 2: # equal 238 non_common_subsequences.append({"original": [], "compare_to": []}) 239 ind += 1 240 elif op[0] == 0: # original step 241 non_common_subsequences[ind]["original"].append(op[1]) 242 elif op[0] == 1: # compare to step 243 non_common_subsequences[ind]["compare_to"].append(op[1]) 244 # remove the empty dict from the list 245 non_common_subsequences = [ 246 s for s in non_common_subsequences if s != {"original": [], "compare_to": []} 247 ] 248 return non_common_subsequences 249 250 @staticmethod 251 def _non_common_subsequences_of_measures(original_m, compare_to_m): 252 # Take the hash for each measure to run faster comparison 253 # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison), 254 # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after 255 # computation) 256 original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m] 257 compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m] 258 ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int) 259 # retrieve the original pointers to measures 260 new_out = [] 261 for e in ncs: 262 new_out.append({}) 263 for k in e.keys(): 264 new_out[-1][k] = [] 265 for repr_hash in e[k]: 266 if k == "original": 267 new_out[-1][k].append( 268 next(m for m in original_m if m.precomputed_repr == repr_hash) 269 ) 270 else: 271 new_out[-1][k].append( 272 next(m for m in compare_to_m if m.precomputed_repr == repr_hash) 273 ) 274 275 return new_out 276 277 @staticmethod 278 @_memoize_pitches_lev_diff 279 def _pitches_levenshtein_diff( 280 original: list[tuple[str, str, bool]], 281 compare_to: list[tuple[str, str, bool]], 282 noteNode1: AnnNote, 283 noteNode2: AnnNote, 284 ids: tuple[int, int] 285 ): 286 """ 287 Compute the levenshtein distance between two sequences of pitches. 288 Arguments: 289 original {list} -- list of pitches 290 compare_to {list} -- list of pitches 291 noteNode1 {annotatedNote} --for referencing 292 noteNode2 {annotatedNote} --for referencing 293 ids {tuple} -- a tuple of 2 elements with the indices of the notes considered 294 """ 295 if len(original) == 0 and len(compare_to) == 0: 296 return [], 0 297 298 if len(original) == 0: 299 op_list, cost = Comparison._pitches_levenshtein_diff( 300 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 301 ) 302 op_list.append( 303 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 304 ) 305 cost += M21Utils.pitch_size(compare_to[0]) 306 return op_list, cost 307 308 if len(compare_to) == 0: 309 op_list, cost = Comparison._pitches_levenshtein_diff( 310 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 311 ) 312 op_list.append( 313 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 314 ) 315 cost += M21Utils.pitch_size(original[0]) 316 return op_list, cost 317 318 # compute the cost and the op_list for the many possibilities of recursion 319 cost_dict = {} 320 op_list_dict = {} 321 # del-pitch 322 op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff( 323 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 324 ) 325 cost_dict["delpitch"] += M21Utils.pitch_size(original[0]) 326 op_list_dict["delpitch"].append( 327 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 328 ) 329 # ins-pitch 330 op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff( 331 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 332 ) 333 cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0]) 334 op_list_dict["inspitch"].append( 335 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 336 ) 337 # edit-pitch 338 op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff( 339 original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1) 340 ) 341 if original[0] == compare_to[0]: # to avoid perform the pitch_diff 342 pitch_diff_op_list = [] 343 pitch_diff_cost = 0 344 else: 345 pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff( 346 original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1]) 347 ) 348 cost_dict["editpitch"] += pitch_diff_cost 349 op_list_dict["editpitch"].extend(pitch_diff_op_list) 350 # compute the minimum of the possibilities 351 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 352 out = op_list_dict[min_key], cost_dict[min_key] 353 return out 354 355 @staticmethod 356 def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids): 357 """ 358 Compute the differences between two pitch (definition from the paper). 359 a pitch consist of a tuple: pitch name (letter+number), accidental, tie. 360 param : pitch1. The music_notation_repr tuple of note1 361 param : pitch2. The music_notation_repr tuple of note2 362 param : noteNode1. The noteNode where pitch1 belongs 363 param : noteNode2. The noteNode where pitch2 belongs 364 param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord 365 Returns: 366 [list] -- the list of differences 367 [int] -- the cost of diff 368 """ 369 cost = 0 370 op_list = [] 371 # add for pitch name differences 372 if pitch1[0] != pitch2[0]: 373 cost += 1 374 # TODO: select the note in a more precise way in case of a chord 375 # rest to note 376 if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"): # xor 377 op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids)) 378 else: # they are two notes 379 op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids)) 380 381 # add for the accidentals 382 if pitch1[1] != pitch2[1]: # if the accidental is different 383 if pitch1[1] == "None": 384 assert pitch2[1] != "None" 385 cost += 1 386 op_list.append(("accidentins", noteNode1, noteNode2, 1, ids)) 387 elif pitch2[1] == "None": 388 assert pitch1[1] != "None" 389 cost += 1 390 op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids)) 391 else: # a different type of alteration is present 392 cost += 2 # delete then add 393 op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids)) 394 # add for the ties 395 if pitch1[2] != pitch2[2]: 396 # exclusive or. Add if one is tied and not the other. 397 # probably to revise for chords 398 cost += 1 399 if pitch1[2]: 400 assert not pitch2[2] 401 op_list.append(("tiedel", noteNode1, noteNode2, 1, ids)) 402 elif pitch2[2]: 403 assert not pitch1[2] 404 op_list.append(("tieins", noteNode1, noteNode2, 1, ids)) 405 return op_list, cost 406 407 @staticmethod 408 @_memoize_block_diff_lin 409 def _block_diff_lin(original, compare_to): 410 if len(original) == 0 and len(compare_to) == 0: 411 return [], 0 412 413 if len(original) == 0: 414 op_list, cost = Comparison._block_diff_lin(original, compare_to[1:]) 415 cost += compare_to[0].notation_size() 416 op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size())) 417 return op_list, cost 418 419 if len(compare_to) == 0: 420 op_list, cost = Comparison._block_diff_lin(original[1:], compare_to) 421 cost += original[0].notation_size() 422 op_list.append(("delbar", original[0], None, original[0].notation_size())) 423 return op_list, cost 424 425 # compute the cost and the op_list for the many possibilities of recursion 426 cost_dict = {} 427 op_list_dict = {} 428 # del-bar 429 op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin( 430 original[1:], compare_to 431 ) 432 cost_dict["delbar"] += original[0].notation_size() 433 op_list_dict["delbar"].append( 434 ("delbar", original[0], None, original[0].notation_size()) 435 ) 436 # ins-bar 437 op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin( 438 original, compare_to[1:] 439 ) 440 cost_dict["insbar"] += compare_to[0].notation_size() 441 op_list_dict["insbar"].append( 442 ("insbar", None, compare_to[0], compare_to[0].notation_size()) 443 ) 444 # edit-bar 445 op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin( 446 original[1:], compare_to[1:] 447 ) 448 if ( 449 original[0] == compare_to[0] 450 ): # to avoid performing the _voices_coupling_recursive/_notes_set_distance 451 # if it's not needed 452 inside_bar_op_list = [] 453 inside_bar_cost = 0 454 else: 455 # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras 456 # instead of lists of AnnNotes) 457 extras_op_list, extras_cost = Comparison._extras_set_distance( 458 original[0].extras_list, compare_to[0].extras_list 459 ) 460 461 # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras) 462 lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin( 463 original[0].lyrics_list, compare_to[0].lyrics_list 464 ) 465 466 if original[0].includes_voicing: 467 # run the voice coupling algorithm, and add to inside_bar_op_list 468 # and inside_bar_cost 469 inside_bar_op_list, inside_bar_cost = ( 470 Comparison._voices_coupling_recursive( 471 original[0].voices_list, compare_to[0].voices_list 472 ) 473 ) 474 else: 475 # run the set distance algorithm, and add to inside_bar_op_list 476 # and inside_bar_cost 477 inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance( 478 original[0].annot_notes, compare_to[0].annot_notes 479 ) 480 481 inside_bar_op_list.extend(extras_op_list) 482 inside_bar_cost += extras_cost 483 inside_bar_op_list.extend(lyrics_op_list) 484 inside_bar_cost += lyrics_cost 485 486 cost_dict["editbar"] += inside_bar_cost 487 op_list_dict["editbar"].extend(inside_bar_op_list) 488 # compute the minimum of the possibilities 489 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 490 out = op_list_dict[min_key], cost_dict[min_key] 491 return out 492 493 @staticmethod 494 @_memoize_lyrics_diff_lin 495 def _lyrics_diff_lin(original, compare_to): 496 # original and compare to are two lists of AnnLyric 497 if len(original) == 0 and len(compare_to) == 0: 498 return [], 0 499 500 if len(original) == 0: 501 cost = 0 502 op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:]) 503 op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size())) 504 cost += compare_to[0].notation_size() 505 return op_list, cost 506 507 if len(compare_to) == 0: 508 cost = 0 509 op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to) 510 op_list.append(("lyricdel", original[0], None, original[0].notation_size())) 511 cost += original[0].notation_size() 512 return op_list, cost 513 514 # compute the cost and the op_list for the many possibilities of recursion 515 cost = {} 516 op_list = {} 517 # lyricdel 518 op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin( 519 original[1:], compare_to 520 ) 521 cost["lyricdel"] += original[0].notation_size() 522 op_list["lyricdel"].append( 523 ("lyricdel", original[0], None, original[0].notation_size()) 524 ) 525 # lyricins 526 op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin( 527 original, compare_to[1:] 528 ) 529 cost["lyricins"] += compare_to[0].notation_size() 530 op_list["lyricins"].append( 531 ("lyricins", None, compare_to[0], compare_to[0].notation_size()) 532 ) 533 # lyricsub 534 op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin( 535 original[1:], compare_to[1:] 536 ) 537 if ( 538 original[0] == compare_to[0] 539 ): # avoid call another function if they are equal 540 lyricsub_op, lyricsub_cost = [], 0 541 else: 542 lyricsub_op, lyricsub_cost = ( 543 Comparison._annotated_lyric_diff(original[0], compare_to[0]) 544 ) 545 cost["lyricsub"] += lyricsub_cost 546 op_list["lyricsub"].extend(lyricsub_op) 547 # compute the minimum of the possibilities 548 min_key = min(cost, key=cost.get) 549 out = op_list[min_key], cost[min_key] 550 return out 551 552 @staticmethod 553 def _strings_levenshtein_distance(str1: str, str2: str): 554 counter: dict = {"+": 0, "-": 0} 555 distance: int = 0 556 for edit_code in ndiff(str1, str2): 557 if edit_code[0] == " ": 558 distance += max(counter.values()) 559 counter = {"+": 0, "-": 0} 560 else: 561 counter[edit_code[0]] += 1 562 distance += max(counter.values()) 563 return distance 564 565 @staticmethod 566 def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool: 567 if off1 == off2: 568 return False 569 570 # this should never happen, but... 571 if off1 is None or off2 is None: 572 if off1 is None and off2 is not None: 573 return True 574 if off1 is not None and off2 is None: 575 return True 576 return False # both are None, therefore not different at all 577 578 diff: OffsetQL = off1 - off2 579 if diff < 0: 580 diff = -diff 581 582 if diff > 0.0001: 583 return True 584 return False 585 586 @staticmethod 587 def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra): 588 """ 589 Compute the differences between two annotated extras. 590 Each annotated extra consists of three values: content, offset, and duration 591 """ 592 cost = 0 593 op_list = [] 594 595 # add for the content 596 if annExtra1.content != annExtra2.content: 597 content_cost: int = ( 598 Comparison._strings_levenshtein_distance( 599 annExtra1.content or '', 600 annExtra2.content or '' 601 ) 602 ) 603 cost += content_cost 604 op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost)) 605 606 # add for the symbolic (cost 2: delete one symbol, add the other) 607 if annExtra1.symbolic != annExtra2.symbolic: 608 cost += 2 609 op_list.append(("extrasymboledit", annExtra1, annExtra2, 2)) 610 611 # add for the infodict 612 if annExtra1.infodict != annExtra2.infodict: 613 info_cost: int = 0 614 # handle everything in annExtra1 (whether or not it is in annExtra2) 615 for k, v in annExtra1.infodict.items(): 616 if k not in annExtra2.infodict: 617 # not in annExtra2: delete a symbol 618 info_cost += 1 619 elif v != annExtra2.infodict[k]: 620 # different in annExtra2: delete a symbol, add a symbol 621 info_cost += 2 622 # handle everything in annExtra2 that is not in annExtra1 623 for k in annExtra2.infodict: 624 if k not in annExtra1.infodict: 625 # add a symbol 626 info_cost += 1 627 cost += info_cost 628 op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost)) 629 630 # add for the offset 631 # Note: offset here is a float, and some file formats have only four 632 # decimal places of precision. So we should not compare exactly here. 633 if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset): 634 cost += 1 635 op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1)) 636 637 # add for the duration 638 # Note: duration here is a float, and some file formats have only four 639 # decimal places of precision. So we should not compare exactly here. 640 if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration): 641 cost += 1 642 op_list.append(("extradurationedit", annExtra1, annExtra2, 1)) 643 644 # add for the style 645 if annExtra1.styledict != annExtra2.styledict: 646 cost += 1 # someday we might count different items in the styledict 647 op_list.append(("extrastyleedit", annExtra1, annExtra2, 1)) 648 649 return op_list, cost 650 651 @staticmethod 652 def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric): 653 """ 654 Compute the differences between two annotated lyrics. 655 Each annotated lyric consists of five values: lyric, verse_id, offset, duration, 656 and styledict. 657 """ 658 cost = 0 659 op_list = [] 660 661 # add for the content 662 if annLyric1.lyric != annLyric2.lyric: 663 content_cost: int = ( 664 Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric) 665 ) 666 cost += content_cost 667 op_list.append(("lyricedit", annLyric1, annLyric2, content_cost)) 668 669 # add for the number 670 if annLyric1.number != annLyric2.number: 671 number_cost: int 672 if annLyric1.number == 0 or annLyric2.number == 0: 673 # add or delete number 674 number_cost = 1 675 else: 676 # add and delete number 677 number_cost = 2 678 cost += number_cost 679 op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost)) 680 681 # add for the identifier 682 if annLyric1.identifier != annLyric2.identifier: 683 # someday we might do a Levenshtein distance of the two ids 684 id_cost: int 685 if not annLyric1.identifier or not annLyric1.identifier: 686 # add or delete identifier 687 id_cost = 1 688 else: 689 # add and delete identifier 690 id_cost = 2 691 cost += id_cost 692 op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost)) 693 694 # add for the offset 695 # Note: offset here is a float, and some file formats have only four 696 # decimal places of precision. So we should not compare exactly here. 697 if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset): 698 cost += 1 699 op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1)) 700 701 # add for the style 702 if annLyric1.styledict != annLyric2.styledict: 703 cost += 1 # someday we might count different items in the styledict 704 op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1)) 705 706 return op_list, cost 707 708 @staticmethod 709 def _annotated_metadata_item_diff( 710 annMetadataItem1: AnnMetadataItem, 711 annMetadataItem2: AnnMetadataItem 712 ): 713 """ 714 Compute the differences between two annotated metadata items. 715 Each annotated metadata item has two values: key: str, value: t.Any, 716 """ 717 cost = 0 718 op_list = [] 719 720 # we don't compare items that don't have the same key. 721 if annMetadataItem1.key != annMetadataItem2.key: 722 raise ValueError('unexpected comparison of metadata items with different keys') 723 724 # add for the value 725 if annMetadataItem1.value != annMetadataItem2.value: 726 value_cost: int = ( 727 Comparison._strings_levenshtein_distance( 728 str(annMetadataItem1.value), 729 str(annMetadataItem2.value) 730 ) 731 ) 732 cost += value_cost 733 op_list.append( 734 ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost) 735 ) 736 737 return op_list, cost 738 739 @staticmethod 740 def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup): 741 """ 742 Compute the differences between two annotated staff groups. 743 Each annotated staff group consists of five values: name, abbreviation, 744 symbol, barTogether, part_indices. 745 """ 746 cost = 0 747 op_list = [] 748 749 # add for the name 750 if annStaffGroup1.name != annStaffGroup2.name: 751 name_cost: int = ( 752 Comparison._strings_levenshtein_distance( 753 annStaffGroup1.name, 754 annStaffGroup2.name 755 ) 756 ) 757 cost += name_cost 758 op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost)) 759 760 # add for the abbreviation 761 if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation: 762 abbreviation_cost: int = ( 763 Comparison._strings_levenshtein_distance( 764 annStaffGroup1.abbreviation, 765 annStaffGroup2.abbreviation 766 ) 767 ) 768 cost += abbreviation_cost 769 op_list.append(( 770 "staffgrpabbreviationedit", 771 annStaffGroup1, 772 annStaffGroup2, 773 abbreviation_cost 774 )) 775 776 # add for the symbol 777 if annStaffGroup1.symbol != annStaffGroup2.symbol: 778 symbol_cost: int 779 if not annStaffGroup1.symbol or not annStaffGroup2.symbol: 780 # add or delete symbol 781 symbol_cost = 1 782 else: 783 # add and delete symbol 784 symbol_cost = 2 785 cost += symbol_cost 786 op_list.append( 787 ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost) 788 ) 789 790 # add for barTogether 791 if annStaffGroup1.barTogether != annStaffGroup2.barTogether: 792 barTogether_cost: int = 1 793 cost += barTogether_cost 794 op_list.append( 795 ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost) 796 ) 797 798 # add for partIndices (sorted list of int) 799 if annStaffGroup1.part_indices != annStaffGroup2.part_indices: 800 partIndices_cost: int = 0 801 if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]: 802 partIndices_cost += 1 # vertical start 803 if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]: 804 partIndices_cost += 1 # vertical height 805 if partIndices_cost == 0: 806 # should never get here, but we have to have a cost 807 partIndices_cost = 1 808 cost += partIndices_cost 809 op_list.append( 810 ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost) 811 ) 812 813 return op_list, cost 814 815 @staticmethod 816 @_memoize_inside_bars_diff_lin 817 def _inside_bars_diff_lin(original, compare_to): 818 # original and compare to are two lists of annotatedNote 819 if len(original) == 0 and len(compare_to) == 0: 820 return [], 0 821 822 if len(original) == 0: 823 cost = 0 824 op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:]) 825 op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size())) 826 cost += compare_to[0].notation_size() 827 return op_list, cost 828 829 if len(compare_to) == 0: 830 cost = 0 831 op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to) 832 op_list.append(("notedel", original[0], None, original[0].notation_size())) 833 cost += original[0].notation_size() 834 return op_list, cost 835 836 # compute the cost and the op_list for the many possibilities of recursion 837 cost = {} 838 op_list = {} 839 # notedel 840 op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin( 841 original[1:], compare_to 842 ) 843 cost["notedel"] += original[0].notation_size() 844 op_list["notedel"].append( 845 ("notedel", original[0], None, original[0].notation_size()) 846 ) 847 # noteins 848 op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin( 849 original, compare_to[1:] 850 ) 851 cost["noteins"] += compare_to[0].notation_size() 852 op_list["noteins"].append( 853 ("noteins", None, compare_to[0], compare_to[0].notation_size()) 854 ) 855 # notesub 856 op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin( 857 original[1:], compare_to[1:] 858 ) 859 if ( 860 original[0] == compare_to[0] 861 ): # avoid call another function if they are equal 862 notesub_op, notesub_cost = [], 0 863 else: 864 notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0]) 865 cost["notesub"] += notesub_cost 866 op_list["notesub"].extend(notesub_op) 867 # compute the minimum of the possibilities 868 min_key = min(cost, key=cost.get) 869 out = op_list[min_key], cost[min_key] 870 return out 871 872 @staticmethod 873 def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote): 874 """ 875 Compute the differences between two annotated notes. 876 Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets) 877 where pitches is a list. 878 Arguments: 879 noteNode1 {[AnnNote]} -- original AnnNote 880 noteNode2 {[AnnNote]} -- compare_to AnnNote 881 """ 882 cost = 0 883 op_list = [] 884 # add for the pitches 885 # if they are equal 886 if annNote1.pitches == annNote2.pitches: 887 op_list_pitch, cost_pitch = [], 0 888 else: 889 # pitches diff is computed using Levenshtein distances (they are already ordered) 890 op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff( 891 annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0) 892 ) 893 op_list.extend(op_list_pitch) 894 cost += cost_pitch 895 # add for the notehead 896 if annNote1.note_head != annNote2.note_head: 897 # delete one note head, add the other (this isn't noteshape, this is 898 # just quarter-note note head vs half-note note head, etc) 899 cost += 2 900 op_list.append(("headedit", annNote1, annNote2, 2)) 901 # add for the dots 902 if annNote1.dots != annNote2.dots: 903 # add one for each added (or deleted) dot 904 dots_diff = abs(annNote1.dots - annNote2.dots) 905 cost += dots_diff 906 if annNote1.dots > annNote2.dots: 907 op_list.append(("dotdel", annNote1, annNote2, dots_diff)) 908 else: 909 op_list.append(("dotins", annNote1, annNote2, dots_diff)) 910 if annNote1.graceType != annNote2.graceType: 911 # accented vs unaccented vs not a grace note (delete the wrong, add the right) 912 cost += 2 913 op_list.append(("graceedit", annNote1, annNote2, 2)) 914 if annNote1.graceSlash != annNote2.graceSlash: 915 # add or delete the slash 916 cost += 1 917 op_list.append(("graceslashedit", annNote1, annNote2, 1)) 918 # add for the beamings 919 if annNote1.beamings != annNote2.beamings: 920 beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff( 921 annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam" 922 ) 923 op_list.extend(beam_op_list) 924 cost += beam_cost 925 # add for the tuplet types 926 if annNote1.tuplets != annNote2.tuplets: 927 tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff( 928 annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet" 929 ) 930 op_list.extend(tuplet_op_list) 931 cost += tuplet_cost 932 # add for the tuplet info 933 if annNote1.tuplet_info != annNote2.tuplet_info: 934 tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff( 935 annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet" 936 ) 937 op_list.extend(tuplet_info_op_list) 938 cost += tuplet_info_cost 939 # add for the articulations 940 if annNote1.articulations != annNote2.articulations: 941 artic_op_list, artic_cost = Comparison._generic_levenshtein_diff( 942 annNote1.articulations, 943 annNote2.articulations, 944 annNote1, 945 annNote2, 946 "articulation", 947 ) 948 op_list.extend(artic_op_list) 949 cost += artic_cost 950 # add for the expressions 951 if annNote1.expressions != annNote2.expressions: 952 expr_op_list, expr_cost = Comparison._generic_levenshtein_diff( 953 annNote1.expressions, 954 annNote2.expressions, 955 annNote1, 956 annNote2, 957 "expression", 958 ) 959 op_list.extend(expr_op_list) 960 cost += expr_cost 961 962 # add for gap from previous note or start of measure if first note in measure 963 # (i.e. horizontal position shift) 964 if annNote1.gap_dur != annNote2.gap_dur: 965 # in all cases, the edit is a simple horizontal shift of the note 966 cost += 1 967 if annNote1.gap_dur == 0: 968 op_list.append(("insspace", annNote1, annNote2, 1)) 969 elif annNote2.gap_dur == 0: 970 op_list.append(("delspace", annNote1, annNote2, 1)) 971 else: 972 # neither is zero 973 op_list.append(("editspace", annNote1, annNote2, 1)) 974 975 # add for noteshape 976 if annNote1.noteshape != annNote2.noteshape: 977 # always delete existing note shape and add the new one 978 cost += 2 979 op_list.append(("editnoteshape", annNote1, annNote2, 2)) 980 # add for noteheadFill 981 if annNote1.noteheadFill != annNote2.noteheadFill: 982 # always delete existing note fill and add the new one 983 cost += 2 984 op_list.append(("editnoteheadfill", annNote1, annNote2, 2)) 985 # add for noteheadParenthesis (True or False) 986 if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis: 987 # always either add or delete parentheses 988 cost += 1 989 op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1)) 990 # add for stemDirection 991 if annNote1.stemDirection != annNote2.stemDirection: 992 stemdir_cost: int 993 if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem': 994 # gonna add a stem 995 stemdir_cost = 1 996 else: 997 # gonna change a stem (add then delete) 998 stemdir_cost = 2 999 cost += stemdir_cost 1000 op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost)) 1001 # add for the styledict 1002 if annNote1.styledict != annNote2.styledict: 1003 cost += 1 1004 op_list.append(("editstyle", annNote1, annNote2, 1)) 1005 1006 return op_list, cost 1007 1008 @staticmethod 1009 @_memoize_beamtuplet_lev_diff 1010 def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which): 1011 """ 1012 Compute the levenshtein distance between two sequences of beaming or tuples. 1013 Arguments: 1014 original {list} -- list of strings (start, stop, continue or partial) 1015 compare_to {list} -- list of strings (start, stop, continue or partial) 1016 note1 {AnnNote} -- the note for referencing in the score 1017 note2 {AnnNote} -- the note for referencing in the score 1018 which -- a string: "beam" or "tuplet" depending what we are comparing 1019 """ 1020 if which not in ("beam", "tuplet"): 1021 raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'") 1022 1023 if len(original) == 0 and len(compare_to) == 0: 1024 return [], 0 1025 1026 if len(original) == 0: 1027 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1028 original, compare_to[1:], note1, note2, which 1029 ) 1030 op_list.append(("ins" + which, note1, note2, 1)) 1031 cost += 1 1032 return op_list, cost 1033 1034 if len(compare_to) == 0: 1035 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1036 original[1:], compare_to, note1, note2, which 1037 ) 1038 op_list.append(("del" + which, note1, note2, 1)) 1039 cost += 1 1040 return op_list, cost 1041 1042 # compute the cost and the op_list for the many possibilities of recursion 1043 cost = {} 1044 op_list = {} 1045 # delwhich 1046 op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff( 1047 original[1:], compare_to, note1, note2, which 1048 ) 1049 cost["del" + which] += 1 1050 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1051 # inswhich 1052 op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff( 1053 original, compare_to[1:], note1, note2, which 1054 ) 1055 cost["ins" + which] += 1 1056 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1057 # editwhich 1058 op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff( 1059 original[1:], compare_to[1:], note1, note2, which 1060 ) 1061 if original[0] == compare_to[0]: 1062 beam_diff_op_list = [] 1063 beam_diff_cost = 0 1064 else: 1065 beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1 1066 cost["edit" + which] += beam_diff_cost 1067 op_list["edit" + which].extend(beam_diff_op_list) 1068 # compute the minimum of the possibilities 1069 min_key = min(cost, key=cost.get) 1070 out = op_list[min_key], cost[min_key] 1071 return out 1072 1073 @staticmethod 1074 @_memoize_generic_lev_diff 1075 def _generic_levenshtein_diff(original, compare_to, note1, note2, which): 1076 """ 1077 Compute the Levenshtein distance between two generic sequences of symbols 1078 (e.g., articulations). 1079 Arguments: 1080 original {list} -- list of strings 1081 compare_to {list} -- list of strings 1082 note1 {AnnNote} -- the note for referencing in the score 1083 note2 {AnnNote} -- the note for referencing in the score 1084 which -- a string: e.g. "articulation" depending what we are comparing 1085 """ 1086 if len(original) == 0 and len(compare_to) == 0: 1087 return [], 0 1088 1089 if len(original) == 0: 1090 op_list, cost = Comparison._generic_levenshtein_diff( 1091 original, compare_to[1:], note1, note2, which 1092 ) 1093 op_list.append(("ins" + which, note1, note2, 1)) 1094 cost += 1 1095 return op_list, cost 1096 1097 if len(compare_to) == 0: 1098 op_list, cost = Comparison._generic_levenshtein_diff( 1099 original[1:], compare_to, note1, note2, which 1100 ) 1101 op_list.append(("del" + which, note1, note2, 1)) 1102 cost += 1 1103 return op_list, cost 1104 1105 # compute the cost and the op_list for the many possibilities of recursion 1106 cost = {} 1107 op_list = {} 1108 # delwhich 1109 op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff( 1110 original[1:], compare_to, note1, note2, which 1111 ) 1112 cost["del" + which] += 1 1113 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1114 # inswhich 1115 op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff( 1116 original, compare_to[1:], note1, note2, which 1117 ) 1118 cost["ins" + which] += 1 1119 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1120 # editwhich 1121 op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff( 1122 original[1:], compare_to[1:], note1, note2, which 1123 ) 1124 if original[0] == compare_to[0]: # to avoid perform the diff 1125 generic_diff_op_list = [] 1126 generic_diff_cost = 0 1127 else: 1128 generic_diff_op_list, generic_diff_cost = ( 1129 [("edit" + which, note1, note2, 1)], 1130 1, 1131 ) 1132 cost["edit" + which] += generic_diff_cost 1133 op_list["edit" + which].extend(generic_diff_op_list) 1134 # compute the minimum of the possibilities 1135 min_key = min(cost, key=cost.get) 1136 out = op_list[min_key], cost[min_key] 1137 return out 1138 1139 @staticmethod 1140 @_memoize_notes_set_distance 1141 def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]): 1142 """ 1143 Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in 1144 that order of importance). If you can't find an exactly matching note, try again without 1145 visual duration. 1146 original [list] -- a list of AnnNote (which are never chords) 1147 compare_to [list] -- a list of AnnNote (which are never chords) 1148 """ 1149 paired_notes: list[tuple[AnnNote, AnnNote]] = [] 1150 unpaired_orig_notes: list[AnnNote] = [] 1151 unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to) 1152 1153 for orig_n in original: 1154 fallback: AnnNote | None = None 1155 fallback_i: int = -1 1156 found_it: bool = False 1157 for i, comp_n in enumerate(unpaired_comp_notes): 1158 if orig_n.pitches[0][0] != comp_n.pitches[0][0]: 1159 # this pitch comparison (1) assumes the note is not a chord 1160 # (because we don't do chords when Voicing is not set, and 1161 # we only call _notes_set_distance when Voicing is not set), 1162 # and (2) only compares the visual position of the note (we 1163 # are ignoring the accidental here). This is so that an 1164 # accidental change will show up as a pitch edit, not a 1165 # note remove/insert. 1166 continue 1167 if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset): 1168 continue 1169 if orig_n.note_is_grace != comp_n.note_is_grace: 1170 continue 1171 if fallback is None: 1172 fallback = comp_n 1173 fallback_i = i 1174 1175 if orig_n.note_dur_type != comp_n.note_dur_type: 1176 continue 1177 if orig_n.note_dur_dots != comp_n.note_dur_dots: 1178 continue 1179 1180 # found a perfect match 1181 paired_notes.append((orig_n, comp_n)) 1182 1183 # remove comp_n from unpaired_comp_notes 1184 unpaired_comp_notes.pop(i) # remove(comp_n) would sometimes get the wrong one 1185 1186 found_it = True 1187 break 1188 1189 if found_it: 1190 # on to the next original note 1191 continue 1192 1193 # did we find a fallback (matched except for duration)? 1194 if fallback is not None: 1195 paired_notes.append((orig_n, fallback)) 1196 unpaired_comp_notes.pop(fallback_i) 1197 continue 1198 1199 # we found nothing 1200 unpaired_orig_notes.append(orig_n) 1201 1202 # compute the cost and the op_list 1203 cost: int = 0 1204 op_list: list = [] 1205 1206 # notedel 1207 if unpaired_orig_notes: 1208 for an in unpaired_orig_notes: 1209 cost += an.notation_size() 1210 op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord)) 1211 1212 # noteins 1213 if unpaired_comp_notes: 1214 for an in unpaired_comp_notes: 1215 cost += an.notation_size() 1216 op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord)) 1217 1218 # notesub 1219 if paired_notes: 1220 for ano, anc in paired_notes: 1221 if ano == anc: 1222 # if equal, avoid _annotated_note_diff call 1223 notesub_op, notesub_cost = [], 0 1224 else: 1225 notesub_op, notesub_cost = ( 1226 Comparison._annotated_note_diff(ano, anc) 1227 ) 1228 cost += notesub_cost 1229 op_list.extend(notesub_op) 1230 1231 return op_list, cost 1232 1233 @staticmethod 1234 @_memoize_extras_set_distance 1235 def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]): 1236 """ 1237 Gather up pairs of matching extras (using kind, offset, and visual duration, in 1238 that order of importance). If you can't find an exactly matching extra, try again 1239 without visual duration. 1240 original [list] -- a list of AnnExtras 1241 compare_to [list] -- a list of AnnExtras 1242 """ 1243 paired_extras: list[tuple[AnnExtra, AnnExtra]] = [] 1244 unpaired_orig_extras: list[AnnExtra] = [] 1245 unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to) 1246 1247 for orig_x in original: 1248 fallback: AnnExtra | None = None 1249 fallback_i: int = -1 1250 found_it: bool = False 1251 for i, comp_x in enumerate(unpaired_comp_extras): 1252 # kind and offset are required for pairing 1253 if orig_x.kind != comp_x.kind: 1254 continue 1255 if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset): 1256 continue 1257 if fallback is None: 1258 fallback = comp_x 1259 fallback_i = i 1260 1261 # duration is preferred for pairing 1262 if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration): 1263 continue 1264 1265 # there are a few kind-specific elements that are also preferred: 1266 # 'direction'/'ending': content (visible text) 1267 # 'keysig'/'timesig'/'clef': symbolic (there are sometimes two 1268 # simultaneous keysigs, timesigs, or clefs, and we don't 1269 # want to confuse which one is which, producing a diff where 1270 # there actually isn't one) 1271 # 'slur': placements (because there are often two identical slurs 1272 # whose only difference is 'above' vs 'below', producing a diff 1273 # where there actually isn't one) 1274 if orig_x.kind in ('direction', 'ending'): 1275 if orig_x.content != comp_x.content: 1276 continue 1277 if orig_x.kind == 'clef': 1278 if orig_x.symbolic != comp_x.symbolic: 1279 continue 1280 if orig_x.kind in ('keysig', 'timesig'): 1281 if orig_x.infodict != comp_x.infodict: 1282 continue 1283 if orig_x.kind == 'slur': 1284 orig_placement: str = orig_x.styledict.get('placement', '') 1285 comp_placement: str = comp_x.styledict.get('placement', '') 1286 if orig_placement != comp_placement: 1287 continue 1288 1289 # found a perfect match 1290 paired_extras.append((orig_x, comp_x)) 1291 1292 # remove comp_n from unpaired_comp_extras 1293 unpaired_comp_extras.pop(i) # remove(comp_n) would sometimes get the wrong one 1294 1295 found_it = True 1296 break 1297 1298 if found_it: 1299 # on to the next original extra 1300 continue 1301 1302 # did we find a fallback (matched except for duration)? 1303 if fallback is not None: 1304 paired_extras.append((orig_x, fallback)) 1305 unpaired_comp_extras.pop(fallback_i) 1306 continue 1307 1308 # we found nothing 1309 unpaired_orig_extras.append(orig_x) 1310 1311 # compute the cost and the op_list 1312 cost: int = 0 1313 op_list: list = [] 1314 1315 # extradel 1316 for extra in unpaired_orig_extras: 1317 cost += extra.notation_size() 1318 op_list.append(("extradel", extra, None, extra.notation_size())) 1319 1320 # extrains 1321 for extra in unpaired_comp_extras: 1322 cost += extra.notation_size() 1323 op_list.append(("extrains", None, extra, extra.notation_size())) 1324 1325 # extrasub 1326 if paired_extras: 1327 for extrao, extrac in paired_extras: 1328 if extrao == extrac: 1329 # if equal, avoid _annotated_extra_diff call 1330 extrasub_op, extrasub_cost = [], 0 1331 else: 1332 extrasub_op, extrasub_cost = ( 1333 Comparison._annotated_extra_diff(extrao, extrac) 1334 ) 1335 cost += extrasub_cost 1336 op_list.extend(extrasub_op) 1337 1338 return op_list, cost 1339 1340 @staticmethod 1341 @_memoize_metadata_items_set_distance 1342 def _metadata_items_set_distance( 1343 original: list[AnnMetadataItem], 1344 compare_to: list[AnnMetadataItem] 1345 ): 1346 """ 1347 Gather up pairs of matching metadata_items (using key and value). If you can't find 1348 an exactly matching metadata_item, try again without value. 1349 original [list] -- a list of AnnMetadataItems 1350 compare_to [list] -- a list of AnnMetadataItems 1351 """ 1352 paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = [] 1353 unpaired_orig_metadata_items: list[AnnMetadataItem] = [] 1354 unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to) 1355 1356 # first look for perfect matches 1357 for orig_mdi in original: 1358 found_it: bool = False 1359 for i, comp_mdi in enumerate(unpaired_comp_metadata_items): 1360 # key is required for perfect match 1361 if orig_mdi.key != comp_mdi.key: 1362 continue 1363 1364 # value is required for perfect match 1365 if orig_mdi.value != comp_mdi.value: 1366 continue 1367 1368 # found a perfect match 1369 paired_metadata_items.append((orig_mdi, comp_mdi)) 1370 1371 # remove comp_mdi from unpaired_comp_metadata_items 1372 unpaired_comp_metadata_items.pop(i) 1373 1374 found_it = True 1375 break 1376 1377 if found_it: 1378 # on to the next original metadata_item 1379 continue 1380 1381 # we found no perfect match 1382 unpaired_orig_metadata_items.append(orig_mdi) 1383 1384 # now look among the unpaired remainders for key match only 1385 remove_unpaired_orig_items: list[AnnMetadataItem] = [] 1386 for orig_mdi in unpaired_orig_metadata_items: 1387 for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items): 1388 # key is required for key-only match 1389 if orig_mdi.key != comp_mdi.key: 1390 continue 1391 1392 # found a key-only match 1393 paired_metadata_items.append((orig_mdi, comp_mdi)) 1394 1395 # remove comp_mdi from unpaired_comp_metadata_items 1396 unpaired_comp_metadata_items.pop(comp_idx) 1397 1398 # make a note of unpaired_orig_metadata_item to remove later 1399 remove_unpaired_orig_items.append(orig_mdi) 1400 break 1401 1402 for remove_mdi in remove_unpaired_orig_items: 1403 unpaired_orig_metadata_items.remove(remove_mdi) 1404 1405 # compute the cost and the op_list 1406 cost: int = 0 1407 op_list: list = [] 1408 1409 # mditemdel 1410 for metadata_item in unpaired_orig_metadata_items: 1411 cost += metadata_item.notation_size() 1412 op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size())) 1413 1414 # mditemins 1415 for metadata_item in unpaired_comp_metadata_items: 1416 cost += metadata_item.notation_size() 1417 op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size())) 1418 1419 # mditemsub 1420 if paired_metadata_items: 1421 for metadata_itemo, metadata_itemc in paired_metadata_items: 1422 if metadata_itemo == metadata_itemc: 1423 # if equal, avoid _annotated_metadata_item_diff call 1424 metadata_itemsub_op, metadata_itemsub_cost = [], 0 1425 else: 1426 metadata_itemsub_op, metadata_itemsub_cost = ( 1427 Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc) 1428 ) 1429 cost += metadata_itemsub_cost 1430 op_list.extend(metadata_itemsub_op) 1431 1432 return op_list, cost 1433 1434 @staticmethod 1435 @_memoize_staff_groups_set_distance 1436 def _staff_groups_set_distance( 1437 original: list[AnnStaffGroup], 1438 compare_to: list[AnnStaffGroup] 1439 ): 1440 """ 1441 Gather up pairs of matching staffgroups (using start_index and end_index, in 1442 that order of importance). If you can't find an exactly matching staffgroup, 1443 try again without end_index. 1444 original [list] -- a list of AnnStaffGroups 1445 compare_to [list] -- a list of AnnStaffGroups 1446 """ 1447 paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = [] 1448 unpaired_orig_staff_groups: list[AnnStaffGroup] = [] 1449 unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to) 1450 1451 for orig_sg in original: 1452 fallback: AnnStaffGroup | None = None 1453 fallback_i: int = -1 1454 found_it: bool = False 1455 for i, comp_sg in enumerate(unpaired_comp_staff_groups): 1456 # start index is required for pairing 1457 if orig_sg.part_indices[0] != comp_sg.part_indices[0]: 1458 continue 1459 if fallback is None: 1460 fallback = comp_sg 1461 fallback_i = i 1462 1463 # end index and symbol (brace, bracket, etc) is preferred for pairing 1464 if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]: 1465 continue 1466 if orig_sg.symbol != comp_sg.symbol: 1467 continue 1468 1469 # found a perfect match 1470 paired_staff_groups.append((orig_sg, comp_sg)) 1471 1472 # remove comp_n from unpaired_comp_staff_groups 1473 unpaired_comp_staff_groups.pop(i) 1474 1475 found_it = True 1476 break 1477 1478 if found_it: 1479 # on to the next original staff_group 1480 continue 1481 1482 # did we find a fallback (matched except for duration)? 1483 if fallback is not None: 1484 paired_staff_groups.append((orig_sg, fallback)) 1485 unpaired_comp_staff_groups.pop(fallback_i) 1486 continue 1487 1488 # we found nothing 1489 unpaired_orig_staff_groups.append(orig_sg) 1490 1491 # compute the cost and the op_list 1492 cost: int = 0 1493 op_list: list = [] 1494 1495 # staffgrpdel 1496 for staff_group in unpaired_orig_staff_groups: 1497 cost += staff_group.notation_size() 1498 op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size())) 1499 1500 # staffgrpins 1501 for staff_group in unpaired_comp_staff_groups: 1502 cost += staff_group.notation_size() 1503 op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size())) 1504 1505 # staffgrpsub 1506 if paired_staff_groups: 1507 for staff_groupo, staff_groupc in paired_staff_groups: 1508 if staff_groupo == staff_groupc: 1509 # if equal, avoid _annotated_staff_group_diff call 1510 staff_groupsub_op, staff_groupsub_cost = [], 0 1511 else: 1512 staff_groupsub_op, staff_groupsub_cost = ( 1513 Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc) 1514 ) 1515 cost += staff_groupsub_cost 1516 op_list.extend(staff_groupsub_op) 1517 1518 return op_list, cost 1519 1520 @staticmethod 1521 def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]): 1522 """ 1523 Compare all the possible voices permutations, considering also deletion and 1524 insertion (equation on office lens). 1525 original [list] -- a list of Voice 1526 compare_to [list] -- a list of Voice 1527 """ 1528 if len(original) == 0 and len(compare_to) == 0: # stop the recursion 1529 return [], 0 1530 1531 if len(original) == 0: 1532 # insertion 1533 op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:]) 1534 # add for the inserted voice 1535 op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size())) 1536 cost += compare_to[0].notation_size() 1537 return op_list, cost 1538 1539 if len(compare_to) == 0: 1540 # deletion 1541 op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to) 1542 # add for the deleted voice 1543 op_list.append(("voicedel", original[0], None, original[0].notation_size())) 1544 cost += original[0].notation_size() 1545 return op_list, cost 1546 1547 cost = {} 1548 op_list = {} 1549 # deletion 1550 op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive( 1551 original[1:], compare_to 1552 ) 1553 op_list["voicedel"].append( 1554 ("voicedel", original[0], None, original[0].notation_size()) 1555 ) 1556 cost["voicedel"] += original[0].notation_size() 1557 for i, c in enumerate(compare_to): 1558 # substitution 1559 ( 1560 op_list["voicesub" + str(i)], 1561 cost["voicesub" + str(i)], 1562 ) = Comparison._voices_coupling_recursive( 1563 original[1:], compare_to[:i] + compare_to[i + 1:] 1564 ) 1565 if ( 1566 compare_to[0] != original[0] 1567 ): # add the cost of the sub and the operations from inside_bar_diff 1568 op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin( 1569 original[0].annot_notes, c.annot_notes 1570 ) # compute the distance from original[0] and compare_to[i] 1571 op_list["voicesub" + str(i)].extend(op_list_inside_bar) 1572 cost["voicesub" + str(i)] += cost_inside_bar 1573 # compute the minimum of the possibilities 1574 min_key = min(cost, key=cost.get) 1575 out = op_list[min_key], cost[min_key] 1576 return out 1577 1578 @staticmethod 1579 def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]: 1580 ''' 1581 Compare two annotated scores, computing an operations list and the cost of applying those 1582 operations to the first score to generate the second score. 1583 1584 Args: 1585 score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare. 1586 score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare. 1587 1588 Returns: 1589 list[tuple], int: The operations list and the cost 1590 ''' 1591 # Clear all memoizer caches, in case we are called again with different scores. 1592 # The cached results are no longer valid. 1593 Comparison._clear_memoizer_caches() 1594 1595 op_list_total: list[tuple] = [] 1596 cost_total: int = 0 1597 1598 if score1.n_of_parts == score2.n_of_parts: 1599 n_of_parts = score1.n_of_parts 1600 else: 1601 # The two scores have differing number of parts. For now, assume that 1602 # the parts are in the same order in both scores, and that the missing 1603 # parts are the ones that should have been at the end of the smaller 1604 # score. In future we could do something like we do with voices, where 1605 # we try all the combinations and compare the most-similar pairs of 1606 # parts, and the rest are considered to be the extra parts (deleted 1607 # from score1, or inserted into score2). 1608 n_of_parts = min(score1.n_of_parts, score2.n_of_parts) 1609 if score1.n_of_parts > score2.n_of_parts: 1610 # score1 has more parts that must be deleted 1611 for part_idx in range(score2.n_of_parts, score1.n_of_parts): 1612 deleted_part = score1.part_list[part_idx] 1613 op_list_total.append( 1614 ( 1615 "delpart", 1616 deleted_part, 1617 None, 1618 deleted_part.notation_size() 1619 ) 1620 ) 1621 cost_total += deleted_part.notation_size() 1622 else: 1623 # score2 has more parts that must be inserted 1624 for part_idx in range(score1.n_of_parts, score2.n_of_parts): 1625 inserted_part = score2.part_list[part_idx] 1626 op_list_total.append( 1627 ( 1628 "inspart", 1629 None, 1630 inserted_part, 1631 inserted_part.notation_size() 1632 ) 1633 ) 1634 cost_total += inserted_part.notation_size() 1635 1636 # iterate over parts that exist in both scores 1637 for p_number in range(n_of_parts): 1638 # compute non-common-subseq 1639 ncs = Comparison._non_common_subsequences_of_measures( 1640 score1.part_list[p_number].bar_list, 1641 score2.part_list[p_number].bar_list, 1642 ) 1643 # compute blockdiff 1644 for subseq in ncs: 1645 op_list_block, cost_block = Comparison._block_diff_lin( 1646 subseq["original"], subseq["compare_to"] 1647 ) 1648 op_list_total.extend(op_list_block) 1649 cost_total += cost_block 1650 1651 # compare the staff groups 1652 groups_op_list, groups_cost = Comparison._staff_groups_set_distance( 1653 score1.staff_group_list, score2.staff_group_list 1654 ) 1655 op_list_total.extend(groups_op_list) 1656 cost_total += groups_cost 1657 1658 # compare the metadata items 1659 mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance( 1660 score1.metadata_items_list, score2.metadata_items_list 1661 ) 1662 op_list_total.extend(mditems_op_list) 1663 cost_total += mditems_cost 1664 1665 # Add the cost of any syntax errors in score1 that were fixed during parsing. 1666 # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency. 1667 total_syms: int = score1.notation_size() + score2.notation_size() 1668 cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed 1669 if cost_plus_errors > total_syms: 1670 adjustment: int = cost_plus_errors - total_syms 1671 score1.num_syntax_errors_fixed -= adjustment 1672 1673 cost_total += score1.num_syntax_errors_fixed 1674 1675 return op_list_total, cost_total
@staticmethod
def
annotated_scores_diff( score1: musicdiff.annotation.AnnScore, score2: musicdiff.annotation.AnnScore) -> tuple[list[tuple], int]:
1578 @staticmethod 1579 def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]: 1580 ''' 1581 Compare two annotated scores, computing an operations list and the cost of applying those 1582 operations to the first score to generate the second score. 1583 1584 Args: 1585 score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare. 1586 score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare. 1587 1588 Returns: 1589 list[tuple], int: The operations list and the cost 1590 ''' 1591 # Clear all memoizer caches, in case we are called again with different scores. 1592 # The cached results are no longer valid. 1593 Comparison._clear_memoizer_caches() 1594 1595 op_list_total: list[tuple] = [] 1596 cost_total: int = 0 1597 1598 if score1.n_of_parts == score2.n_of_parts: 1599 n_of_parts = score1.n_of_parts 1600 else: 1601 # The two scores have differing number of parts. For now, assume that 1602 # the parts are in the same order in both scores, and that the missing 1603 # parts are the ones that should have been at the end of the smaller 1604 # score. In future we could do something like we do with voices, where 1605 # we try all the combinations and compare the most-similar pairs of 1606 # parts, and the rest are considered to be the extra parts (deleted 1607 # from score1, or inserted into score2). 1608 n_of_parts = min(score1.n_of_parts, score2.n_of_parts) 1609 if score1.n_of_parts > score2.n_of_parts: 1610 # score1 has more parts that must be deleted 1611 for part_idx in range(score2.n_of_parts, score1.n_of_parts): 1612 deleted_part = score1.part_list[part_idx] 1613 op_list_total.append( 1614 ( 1615 "delpart", 1616 deleted_part, 1617 None, 1618 deleted_part.notation_size() 1619 ) 1620 ) 1621 cost_total += deleted_part.notation_size() 1622 else: 1623 # score2 has more parts that must be inserted 1624 for part_idx in range(score1.n_of_parts, score2.n_of_parts): 1625 inserted_part = score2.part_list[part_idx] 1626 op_list_total.append( 1627 ( 1628 "inspart", 1629 None, 1630 inserted_part, 1631 inserted_part.notation_size() 1632 ) 1633 ) 1634 cost_total += inserted_part.notation_size() 1635 1636 # iterate over parts that exist in both scores 1637 for p_number in range(n_of_parts): 1638 # compute non-common-subseq 1639 ncs = Comparison._non_common_subsequences_of_measures( 1640 score1.part_list[p_number].bar_list, 1641 score2.part_list[p_number].bar_list, 1642 ) 1643 # compute blockdiff 1644 for subseq in ncs: 1645 op_list_block, cost_block = Comparison._block_diff_lin( 1646 subseq["original"], subseq["compare_to"] 1647 ) 1648 op_list_total.extend(op_list_block) 1649 cost_total += cost_block 1650 1651 # compare the staff groups 1652 groups_op_list, groups_cost = Comparison._staff_groups_set_distance( 1653 score1.staff_group_list, score2.staff_group_list 1654 ) 1655 op_list_total.extend(groups_op_list) 1656 cost_total += groups_cost 1657 1658 # compare the metadata items 1659 mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance( 1660 score1.metadata_items_list, score2.metadata_items_list 1661 ) 1662 op_list_total.extend(mditems_op_list) 1663 cost_total += mditems_cost 1664 1665 # Add the cost of any syntax errors in score1 that were fixed during parsing. 1666 # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency. 1667 total_syms: int = score1.notation_size() + score2.notation_size() 1668 cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed 1669 if cost_plus_errors > total_syms: 1670 adjustment: int = cost_plus_errors - total_syms 1671 score1.num_syntax_errors_fixed -= adjustment 1672 1673 cost_total += score1.num_syntax_errors_fixed 1674 1675 return op_list_total, cost_total
Compare two annotated scores, computing an operations list and the cost of applying those operations to the first score to generate the second score.
Arguments:
- score1 (
musicdiff.annotation.AnnScore): The first annotated score to compare. - score2 (
musicdiff.annotation.AnnScore): The second annotated score to compare.
Returns:
list[tuple], int: The operations list and the cost