musicdiff.comparison
1# ------------------------------------------------------------------------------ 2# Purpose: comparison is a music comparison package for use by musicdiff. 3# musicdiff is a package for comparing music scores using music21. 4# 5# Authors: Greg Chapman <gregc@mac.com> 6# musicdiff is derived from: 7# https://github.com/fosfrancesco/music-score-diff.git 8# by Francesco Foscarin <foscarin.francesco@gmail.com> 9# 10# Copyright: (c) 2022-2025 Francesco Foscarin, Greg Chapman 11# License: MIT, see LICENSE 12# ------------------------------------------------------------------------------ 13 14__docformat__ = "google" 15 16import copy 17from collections import namedtuple 18from difflib import ndiff 19from pathlib import Path 20 21# import typing as t 22import numpy as np 23 24from music21.common import OffsetQL 25from musicdiff.annotation import AnnScore, AnnNote, AnnVoice, AnnExtra, AnnLyric 26from musicdiff.annotation import AnnStaffGroup, AnnMetadataItem 27from musicdiff import M21Utils 28 29class EvaluationMetrics: 30 def __init__( 31 self, 32 gt_path: Path, 33 pred_path: Path, 34 gt_numsyms: int, 35 pred_numsyms: int, 36 omr_edit_distance: int, 37 edit_distances_dict: dict[str, int], 38 omr_ned: float 39 ): 40 self.gt_path: Path = gt_path 41 self.pred_path: Path = pred_path 42 self.gt_numsyms: int = gt_numsyms 43 self.pred_numsyms: int = pred_numsyms 44 self.omr_edit_distance: int = omr_edit_distance 45 self.edit_distances_dict: dict[str, int] = edit_distances_dict 46 self.omr_ned: float = omr_ned 47 48# memoizers to speed up the recursive computation 49def _memoize_notes_set_distance(func): 50 def memoizer(original, compare_to): 51 key = repr(original) + repr(compare_to) 52 if key not in Comparison._memoizer_mem: 53 Comparison._memoizer_mem[key] = func(original, compare_to) 54 return copy.deepcopy(Comparison._memoizer_mem[key]) 55 56 return memoizer 57 58def _memoize_extras_set_distance(func): 59 def memoizer(original, compare_to): 60 key = repr(original) + repr(compare_to) 61 if key not in Comparison._memoizer_mem: 62 Comparison._memoizer_mem[key] = func(original, compare_to) 63 return copy.deepcopy(Comparison._memoizer_mem[key]) 64 65 return memoizer 66 67def _memoize_staff_groups_set_distance(func): 68 def memoizer(original, compare_to): 69 key = repr(original) + repr(compare_to) 70 if key not in Comparison._memoizer_mem: 71 Comparison._memoizer_mem[key] = func(original, compare_to) 72 return copy.deepcopy(Comparison._memoizer_mem[key]) 73 74 return memoizer 75 76def _memoize_metadata_items_set_distance(func): 77 def memoizer(original, compare_to): 78 key = repr(original) + repr(compare_to) 79 if key not in Comparison._memoizer_mem: 80 Comparison._memoizer_mem[key] = func(original, compare_to) 81 return copy.deepcopy(Comparison._memoizer_mem[key]) 82 83 return memoizer 84 85def _memoize_inside_bars_diff_lin(func): 86 def memoizer(original, compare_to): 87 key = repr(original) + repr(compare_to) 88 if key not in Comparison._memoizer_mem: 89 Comparison._memoizer_mem[key] = func(original, compare_to) 90 return copy.deepcopy(Comparison._memoizer_mem[key]) 91 92 return memoizer 93 94def _memoize_lyrics_diff_lin(func): 95 def memoizer(original, compare_to): 96 key = repr(original) + repr(compare_to) 97 if key not in Comparison._memoizer_mem: 98 Comparison._memoizer_mem[key] = func(original, compare_to) 99 return copy.deepcopy(Comparison._memoizer_mem[key]) 100 101 return memoizer 102 103def _memoize_block_diff_lin(func): 104 def memoizer(original, compare_to): 105 key = repr(original) + repr(compare_to) 106 if key not in Comparison._memoizer_mem: 107 Comparison._memoizer_mem[key] = func(original, compare_to) 108 return copy.deepcopy(Comparison._memoizer_mem[key]) 109 110 return memoizer 111 112def _memoize_pitches_lev_diff(func): 113 def memoizer(original, compare_to, noteNode1, noteNode2, ids): 114 key = ( 115 repr(original) 116 + repr(compare_to) 117 + repr(noteNode1) 118 + repr(noteNode2) 119 + repr(ids) 120 ) 121 if key not in Comparison._memoizer_mem: 122 Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, ids) 123 return copy.deepcopy(Comparison._memoizer_mem[key]) 124 125 return memoizer 126 127def _memoize_beamtuplet_lev_diff(func): 128 def memoizer(original, compare_to, noteNode1, noteNode2, which): 129 key = ( 130 repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which 131 ) 132 if key not in Comparison._memoizer_mem: 133 Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which) 134 return copy.deepcopy(Comparison._memoizer_mem[key]) 135 136 return memoizer 137 138def _memoize_generic_lev_diff(func): 139 def memoizer(original, compare_to, noteNode1, noteNode2, which): 140 key = ( 141 repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which 142 ) 143 if key not in Comparison._memoizer_mem: 144 Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which) 145 return copy.deepcopy(Comparison._memoizer_mem[key]) 146 147 return memoizer 148 149class Comparison: 150 _memoizer_mem: dict = {} 151 152 @staticmethod 153 def _clear_memoizer_caches(): 154 Comparison._memoizer_mem = {} 155 156 @staticmethod 157 def _myers_diff(a_lines, b_lines): 158 # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2) 159 # This marks the farthest-right point along each diagonal in the edit 160 # graph, along with the history that got it there 161 Frontier = namedtuple("Frontier", ["x", "history"]) 162 frontier = {1: Frontier(0, [])} 163 164 a_max = len(a_lines) 165 b_max = len(b_lines) 166 for d in range(0, a_max + b_max + 1): 167 for k in range(-d, d + 1, 2): 168 # This determines whether our next search point will be going down 169 # in the edit graph, or to the right. 170 # 171 # The intuition for this is that we should go down if we're on the 172 # left edge (k == -d) to make sure that the left edge is fully 173 # explored. 174 # 175 # If we aren't on the top (k != d), then only go down if going down 176 # would take us to territory that hasn't sufficiently been explored 177 # yet. 178 go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x) 179 180 # Figure out the starting point of this iteration. The diagonal 181 # offsets come from the geometry of the edit grid - if you're going 182 # down, your diagonal is lower, and if you're going right, your 183 # diagonal is higher. 184 if go_down: 185 old_x, history = frontier[k + 1] 186 x = old_x 187 else: 188 old_x, history = frontier[k - 1] 189 x = old_x + 1 190 191 # We want to avoid modifying the old history, since some other step 192 # may decide to use it. 193 history = history[:] 194 y = x - k 195 196 # We start at the invalid point (0, 0) - we should only start building 197 # up history when we move off of it. 198 if 1 <= y <= b_max and go_down: 199 history.append((1, b_lines[y - 1][1])) # add comparetostep 200 elif 1 <= x <= a_max: 201 history.append((0, a_lines[x - 1][1])) # add originalstep 202 203 # Chew up as many diagonal moves as we can - these correspond to common lines, 204 # and they're considered "free" by the algorithm because we want to maximize 205 # the number of these in the output. 206 while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]: 207 x += 1 208 y += 1 209 history.append((2, a_lines[x - 1][1])) # add equal step 210 211 if x >= a_max and y >= b_max: 212 # If we're here, then we've traversed through the bottom-left corner, 213 # and are done. 214 return np.array(history) 215 216 frontier[k] = Frontier(x, history) 217 218 assert False, "Could not find edit script" 219 220 @staticmethod 221 def _non_common_subsequences_myers(original, compare_to): 222 # Both original and compare_to are list of lists, or numpy arrays with 2 columns. 223 # This is necessary because bars need two representation at the same time. 224 # One without the id (for comparison), and one with the id (to retrieve the bar 225 # at the end). 226 227 # get the list of operations 228 op_list = Comparison._myers_diff( 229 np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64) 230 )[::-1] 231 # retrieve the non common subsequences 232 non_common_subsequences = [] 233 non_common_subsequences.append({"original": [], "compare_to": []}) 234 ind = 0 235 for op in op_list[::-1]: 236 if op[0] == 2: # equal 237 non_common_subsequences.append({"original": [], "compare_to": []}) 238 ind += 1 239 elif op[0] == 0: # original step 240 non_common_subsequences[ind]["original"].append(op[1]) 241 elif op[0] == 1: # compare to step 242 non_common_subsequences[ind]["compare_to"].append(op[1]) 243 # remove the empty dict from the list 244 non_common_subsequences = [ 245 s for s in non_common_subsequences if s != {"original": [], "compare_to": []} 246 ] 247 return non_common_subsequences 248 249 @staticmethod 250 def _non_common_subsequences_of_measures(original_m, compare_to_m): 251 # Take the hash for each measure to run faster comparison 252 # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison), 253 # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after 254 # computation) 255 original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m] 256 compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m] 257 ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int) 258 # retrieve the original pointers to measures 259 new_out = [] 260 for e in ncs: 261 new_out.append({}) 262 for k in e.keys(): 263 new_out[-1][k] = [] 264 for repr_hash in e[k]: 265 if k == "original": 266 new_out[-1][k].append( 267 next(m for m in original_m if m.precomputed_repr == repr_hash) 268 ) 269 else: 270 new_out[-1][k].append( 271 next(m for m in compare_to_m if m.precomputed_repr == repr_hash) 272 ) 273 274 return new_out 275 276 @staticmethod 277 @_memoize_pitches_lev_diff 278 def _pitches_levenshtein_diff( 279 original: list[tuple[str, str, bool]], 280 compare_to: list[tuple[str, str, bool]], 281 noteNode1: AnnNote, 282 noteNode2: AnnNote, 283 ids: tuple[int, int] 284 ): 285 """ 286 Compute the levenshtein distance between two sequences of pitches. 287 Arguments: 288 original {list} -- list of pitches 289 compare_to {list} -- list of pitches 290 noteNode1 {annotatedNote} --for referencing 291 noteNode2 {annotatedNote} --for referencing 292 ids {tuple} -- a tuple of 2 elements with the indices of the notes considered 293 """ 294 if len(original) == 0 and len(compare_to) == 0: 295 return [], 0 296 297 if len(original) == 0: 298 op_list, cost = Comparison._pitches_levenshtein_diff( 299 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 300 ) 301 op_list.append( 302 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 303 ) 304 cost += M21Utils.pitch_size(compare_to[0]) 305 return op_list, cost 306 307 if len(compare_to) == 0: 308 op_list, cost = Comparison._pitches_levenshtein_diff( 309 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 310 ) 311 op_list.append( 312 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 313 ) 314 cost += M21Utils.pitch_size(original[0]) 315 return op_list, cost 316 317 # compute the cost and the op_list for the many possibilities of recursion 318 cost_dict = {} 319 op_list_dict = {} 320 # del-pitch 321 op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff( 322 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 323 ) 324 cost_dict["delpitch"] += M21Utils.pitch_size(original[0]) 325 op_list_dict["delpitch"].append( 326 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 327 ) 328 # ins-pitch 329 op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff( 330 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 331 ) 332 cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0]) 333 op_list_dict["inspitch"].append( 334 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 335 ) 336 # edit-pitch 337 op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff( 338 original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1) 339 ) 340 if original[0] == compare_to[0]: # to avoid perform the pitch_diff 341 pitch_diff_op_list = [] 342 pitch_diff_cost = 0 343 else: 344 pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff( 345 original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1]) 346 ) 347 cost_dict["editpitch"] += pitch_diff_cost 348 op_list_dict["editpitch"].extend(pitch_diff_op_list) 349 # compute the minimum of the possibilities 350 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 351 out = op_list_dict[min_key], cost_dict[min_key] 352 return out 353 354 @staticmethod 355 def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids): 356 """ 357 Compute the differences between two pitch (definition from the paper). 358 a pitch consist of a tuple: pitch name (letter+number), accidental, tie. 359 param : pitch1. The music_notation_repr tuple of note1 360 param : pitch2. The music_notation_repr tuple of note2 361 param : noteNode1. The noteNode where pitch1 belongs 362 param : noteNode2. The noteNode where pitch2 belongs 363 param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord 364 Returns: 365 [list] -- the list of differences 366 [int] -- the cost of diff 367 """ 368 cost = 0 369 op_list = [] 370 # add for pitch name differences 371 if pitch1[0] != pitch2[0]: 372 cost += 1 373 # TODO: select the note in a more precise way in case of a chord 374 # rest to note 375 if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"): # xor 376 op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids)) 377 else: # they are two notes 378 op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids)) 379 380 # add for the accidentals 381 if pitch1[1] != pitch2[1]: # if the accidental is different 382 if pitch1[1] == "None": 383 assert pitch2[1] != "None" 384 cost += 1 385 op_list.append(("accidentins", noteNode1, noteNode2, 1, ids)) 386 elif pitch2[1] == "None": 387 assert pitch1[1] != "None" 388 cost += 1 389 op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids)) 390 else: # a different type of alteration is present 391 cost += 2 # delete then add 392 op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids)) 393 # add for the ties 394 if pitch1[2] != pitch2[2]: 395 # exclusive or. Add if one is tied and not the other. 396 # probably to revise for chords 397 cost += 1 398 if pitch1[2]: 399 assert not pitch2[2] 400 op_list.append(("tiedel", noteNode1, noteNode2, 1, ids)) 401 elif pitch2[2]: 402 assert not pitch1[2] 403 op_list.append(("tieins", noteNode1, noteNode2, 1, ids)) 404 return op_list, cost 405 406 @staticmethod 407 @_memoize_block_diff_lin 408 def _block_diff_lin(original, compare_to): 409 if len(original) == 0 and len(compare_to) == 0: 410 return [], 0 411 412 if len(original) == 0: 413 op_list, cost = Comparison._block_diff_lin(original, compare_to[1:]) 414 cost += compare_to[0].notation_size() 415 op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size())) 416 return op_list, cost 417 418 if len(compare_to) == 0: 419 op_list, cost = Comparison._block_diff_lin(original[1:], compare_to) 420 cost += original[0].notation_size() 421 op_list.append(("delbar", original[0], None, original[0].notation_size())) 422 return op_list, cost 423 424 # compute the cost and the op_list for the many possibilities of recursion 425 cost_dict = {} 426 op_list_dict = {} 427 # del-bar 428 op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin( 429 original[1:], compare_to 430 ) 431 cost_dict["delbar"] += original[0].notation_size() 432 op_list_dict["delbar"].append( 433 ("delbar", original[0], None, original[0].notation_size()) 434 ) 435 # ins-bar 436 op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin( 437 original, compare_to[1:] 438 ) 439 cost_dict["insbar"] += compare_to[0].notation_size() 440 op_list_dict["insbar"].append( 441 ("insbar", None, compare_to[0], compare_to[0].notation_size()) 442 ) 443 # edit-bar 444 op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin( 445 original[1:], compare_to[1:] 446 ) 447 if ( 448 original[0] == compare_to[0] 449 ): # to avoid performing the _voices_coupling_recursive/_notes_set_distance 450 # if it's not needed 451 inside_bar_op_list = [] 452 inside_bar_cost = 0 453 else: 454 # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras 455 # instead of lists of AnnNotes) 456 extras_op_list, extras_cost = Comparison._extras_set_distance( 457 original[0].extras_list, compare_to[0].extras_list 458 ) 459 460 # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras) 461 lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin( 462 original[0].lyrics_list, compare_to[0].lyrics_list 463 ) 464 465 if original[0].includes_voicing: 466 # run the voice coupling algorithm, and add to inside_bar_op_list 467 # and inside_bar_cost 468 inside_bar_op_list, inside_bar_cost = ( 469 Comparison._voices_coupling_recursive( 470 original[0].voices_list, compare_to[0].voices_list 471 ) 472 ) 473 else: 474 # run the set distance algorithm, and add to inside_bar_op_list 475 # and inside_bar_cost 476 inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance( 477 original[0].annot_notes, compare_to[0].annot_notes 478 ) 479 480 inside_bar_op_list.extend(extras_op_list) 481 inside_bar_cost += extras_cost 482 inside_bar_op_list.extend(lyrics_op_list) 483 inside_bar_cost += lyrics_cost 484 485 cost_dict["editbar"] += inside_bar_cost 486 op_list_dict["editbar"].extend(inside_bar_op_list) 487 # compute the minimum of the possibilities 488 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 489 out = op_list_dict[min_key], cost_dict[min_key] 490 return out 491 492 @staticmethod 493 @_memoize_lyrics_diff_lin 494 def _lyrics_diff_lin(original, compare_to): 495 # original and compare to are two lists of AnnLyric 496 if len(original) == 0 and len(compare_to) == 0: 497 return [], 0 498 499 if len(original) == 0: 500 cost = 0 501 op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:]) 502 op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size())) 503 cost += compare_to[0].notation_size() 504 return op_list, cost 505 506 if len(compare_to) == 0: 507 cost = 0 508 op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to) 509 op_list.append(("lyricdel", original[0], None, original[0].notation_size())) 510 cost += original[0].notation_size() 511 return op_list, cost 512 513 # compute the cost and the op_list for the many possibilities of recursion 514 cost = {} 515 op_list = {} 516 # lyricdel 517 op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin( 518 original[1:], compare_to 519 ) 520 cost["lyricdel"] += original[0].notation_size() 521 op_list["lyricdel"].append( 522 ("lyricdel", original[0], None, original[0].notation_size()) 523 ) 524 # lyricins 525 op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin( 526 original, compare_to[1:] 527 ) 528 cost["lyricins"] += compare_to[0].notation_size() 529 op_list["lyricins"].append( 530 ("lyricins", None, compare_to[0], compare_to[0].notation_size()) 531 ) 532 # lyricsub 533 op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin( 534 original[1:], compare_to[1:] 535 ) 536 if ( 537 original[0] == compare_to[0] 538 ): # avoid call another function if they are equal 539 lyricsub_op, lyricsub_cost = [], 0 540 else: 541 lyricsub_op, lyricsub_cost = ( 542 Comparison._annotated_lyric_diff(original[0], compare_to[0]) 543 ) 544 cost["lyricsub"] += lyricsub_cost 545 op_list["lyricsub"].extend(lyricsub_op) 546 # compute the minimum of the possibilities 547 min_key = min(cost, key=cost.get) 548 out = op_list[min_key], cost[min_key] 549 return out 550 551 @staticmethod 552 def _strings_levenshtein_distance(str1: str, str2: str): 553 counter: dict = {"+": 0, "-": 0} 554 distance: int = 0 555 for edit_code in ndiff(str1, str2): 556 if edit_code[0] == " ": 557 distance += max(counter.values()) 558 counter = {"+": 0, "-": 0} 559 else: 560 counter[edit_code[0]] += 1 561 distance += max(counter.values()) 562 return distance 563 564 @staticmethod 565 def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool: 566 if off1 == off2: 567 return False 568 569 # this should never happen, but... 570 if off1 is None or off2 is None: 571 if off1 is None and off2 is not None: 572 return True 573 if off1 is not None and off2 is None: 574 return True 575 return False # both are None, therefore not different at all 576 577 diff: OffsetQL = off1 - off2 578 if diff < 0: 579 diff = -diff 580 581 if diff > 0.0001: 582 return True 583 return False 584 585 @staticmethod 586 def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra): 587 """ 588 Compute the differences between two annotated extras. 589 Each annotated extra consists of three values: content, offset, and duration 590 """ 591 cost = 0 592 op_list = [] 593 594 # add for the content 595 if annExtra1.content != annExtra2.content: 596 content_cost: int = ( 597 Comparison._strings_levenshtein_distance( 598 annExtra1.content or '', 599 annExtra2.content or '' 600 ) 601 ) 602 cost += content_cost 603 op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost)) 604 605 # add for the symbolic (cost 2: delete one symbol, add the other) 606 if annExtra1.symbolic != annExtra2.symbolic: 607 cost += 2 608 op_list.append(("extrasymboledit", annExtra1, annExtra2, 2)) 609 610 # add for the infodict 611 if annExtra1.infodict != annExtra2.infodict: 612 info_cost: int = 0 613 # handle everything in annExtra1 (whether or not it is in annExtra2) 614 for k, v in annExtra1.infodict.items(): 615 if k not in annExtra2.infodict: 616 # not in annExtra2: delete a symbol 617 info_cost += 1 618 elif v != annExtra2.infodict[k]: 619 # different in annExtra2: delete a symbol, add a symbol 620 info_cost += 2 621 # handle everything in annExtra2 that is not in annExtra1 622 for k in annExtra2.infodict: 623 if k not in annExtra1.infodict: 624 # add a symbol 625 info_cost += 1 626 cost += info_cost 627 op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost)) 628 629 # add for the offset 630 # Note: offset here is a float, and some file formats have only four 631 # decimal places of precision. So we should not compare exactly here. 632 if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset): 633 cost += 1 634 op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1)) 635 636 # add for the duration 637 # Note: duration here is a float, and some file formats have only four 638 # decimal places of precision. So we should not compare exactly here. 639 if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration): 640 cost += 1 641 op_list.append(("extradurationedit", annExtra1, annExtra2, 1)) 642 643 # add for the style 644 if annExtra1.styledict != annExtra2.styledict: 645 cost += 1 # someday we might count different items in the styledict 646 op_list.append(("extrastyleedit", annExtra1, annExtra2, 1)) 647 648 return op_list, cost 649 650 @staticmethod 651 def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric): 652 """ 653 Compute the differences between two annotated lyrics. 654 Each annotated lyric consists of five values: lyric, verse_id, offset, duration, 655 and styledict. 656 """ 657 cost = 0 658 op_list = [] 659 660 # add for the content 661 if annLyric1.lyric != annLyric2.lyric: 662 content_cost: int = ( 663 Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric) 664 ) 665 cost += content_cost 666 op_list.append(("lyricedit", annLyric1, annLyric2, content_cost)) 667 668 # add for the number 669 if annLyric1.number != annLyric2.number: 670 number_cost: int 671 if annLyric1.number == 0 or annLyric2.number == 0: 672 # add or delete number 673 number_cost = 1 674 else: 675 # add and delete number 676 number_cost = 2 677 cost += number_cost 678 op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost)) 679 680 # add for the identifier 681 if annLyric1.identifier != annLyric2.identifier: 682 # someday we might do a Levenshtein distance of the two ids 683 id_cost: int 684 if not annLyric1.identifier or not annLyric1.identifier: 685 # add or delete identifier 686 id_cost = 1 687 else: 688 # add and delete identifier 689 id_cost = 2 690 cost += id_cost 691 op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost)) 692 693 # add for the offset 694 # Note: offset here is a float, and some file formats have only four 695 # decimal places of precision. So we should not compare exactly here. 696 if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset): 697 cost += 1 698 op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1)) 699 700 # add for the style 701 if annLyric1.styledict != annLyric2.styledict: 702 cost += 1 # someday we might count different items in the styledict 703 op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1)) 704 705 return op_list, cost 706 707 @staticmethod 708 def _annotated_metadata_item_diff( 709 annMetadataItem1: AnnMetadataItem, 710 annMetadataItem2: AnnMetadataItem 711 ): 712 """ 713 Compute the differences between two annotated metadata items. 714 Each annotated metadata item has two values: key: str, value: t.Any, 715 """ 716 cost = 0 717 op_list = [] 718 719 # add for the key (this never adds anything because we don't compare 720 # items that don't have the same key). 721 if annMetadataItem1.key != annMetadataItem2.key: 722 key_cost: int = ( 723 Comparison._strings_levenshtein_distance( 724 annMetadataItem1.key, 725 annMetadataItem2.key 726 ) 727 ) 728 cost += key_cost 729 op_list.append(("mditemkeyedit", annMetadataItem1, annMetadataItem2, key_cost)) 730 731 # add for the value 732 if annMetadataItem1.value != annMetadataItem2.value: 733 value_cost: int = ( 734 Comparison._strings_levenshtein_distance( 735 str(annMetadataItem1.value), 736 str(annMetadataItem2.value) 737 ) 738 ) 739 cost += value_cost 740 op_list.append( 741 ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost) 742 ) 743 744 return op_list, cost 745 746 @staticmethod 747 def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup): 748 """ 749 Compute the differences between two annotated staff groups. 750 Each annotated staff group consists of five values: name, abbreviation, 751 symbol, barTogether, part_indices. 752 """ 753 cost = 0 754 op_list = [] 755 756 # add for the name 757 if annStaffGroup1.name != annStaffGroup2.name: 758 name_cost: int = ( 759 Comparison._strings_levenshtein_distance( 760 annStaffGroup1.name, 761 annStaffGroup2.name 762 ) 763 ) 764 cost += name_cost 765 op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost)) 766 767 # add for the abbreviation 768 if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation: 769 abbreviation_cost: int = ( 770 Comparison._strings_levenshtein_distance( 771 annStaffGroup1.abbreviation, 772 annStaffGroup2.abbreviation 773 ) 774 ) 775 cost += abbreviation_cost 776 op_list.append(( 777 "staffgrpabbreviationedit", 778 annStaffGroup1, 779 annStaffGroup2, 780 abbreviation_cost 781 )) 782 783 # add for the symbol 784 if annStaffGroup1.symbol != annStaffGroup2.symbol: 785 symbol_cost: int 786 if not annStaffGroup1.symbol or not annStaffGroup2.symbol: 787 # add or delete symbol 788 symbol_cost = 1 789 else: 790 # add and delete symbol 791 symbol_cost = 2 792 cost += symbol_cost 793 op_list.append( 794 ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost) 795 ) 796 797 # add for barTogether 798 if annStaffGroup1.barTogether != annStaffGroup2.barTogether: 799 barTogether_cost: int = 1 800 cost += barTogether_cost 801 op_list.append( 802 ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost) 803 ) 804 805 # add for partIndices (sorted list of int) 806 if annStaffGroup1.part_indices != annStaffGroup2.part_indices: 807 partIndices_cost: int = 0 808 if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]: 809 partIndices_cost += 1 # vertical start 810 if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]: 811 partIndices_cost += 1 # vertical height 812 if partIndices_cost == 0: 813 # should never get here, but we have to have a cost 814 partIndices_cost = 1 815 cost += partIndices_cost 816 op_list.append( 817 ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost) 818 ) 819 820 return op_list, cost 821 822 @staticmethod 823 @_memoize_inside_bars_diff_lin 824 def _inside_bars_diff_lin(original, compare_to): 825 # original and compare to are two lists of annotatedNote 826 if len(original) == 0 and len(compare_to) == 0: 827 return [], 0 828 829 if len(original) == 0: 830 cost = 0 831 op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:]) 832 op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size())) 833 cost += compare_to[0].notation_size() 834 return op_list, cost 835 836 if len(compare_to) == 0: 837 cost = 0 838 op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to) 839 op_list.append(("notedel", original[0], None, original[0].notation_size())) 840 cost += original[0].notation_size() 841 return op_list, cost 842 843 # compute the cost and the op_list for the many possibilities of recursion 844 cost = {} 845 op_list = {} 846 # notedel 847 op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin( 848 original[1:], compare_to 849 ) 850 cost["notedel"] += original[0].notation_size() 851 op_list["notedel"].append( 852 ("notedel", original[0], None, original[0].notation_size()) 853 ) 854 # noteins 855 op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin( 856 original, compare_to[1:] 857 ) 858 cost["noteins"] += compare_to[0].notation_size() 859 op_list["noteins"].append( 860 ("noteins", None, compare_to[0], compare_to[0].notation_size()) 861 ) 862 # notesub 863 op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin( 864 original[1:], compare_to[1:] 865 ) 866 if ( 867 original[0] == compare_to[0] 868 ): # avoid call another function if they are equal 869 notesub_op, notesub_cost = [], 0 870 else: 871 notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0]) 872 cost["notesub"] += notesub_cost 873 op_list["notesub"].extend(notesub_op) 874 # compute the minimum of the possibilities 875 min_key = min(cost, key=cost.get) 876 out = op_list[min_key], cost[min_key] 877 return out 878 879 @staticmethod 880 def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote): 881 """ 882 Compute the differences between two annotated notes. 883 Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets) 884 where pitches is a list. 885 Arguments: 886 noteNode1 {[AnnNote]} -- original AnnNote 887 noteNode2 {[AnnNote]} -- compare_to AnnNote 888 """ 889 cost = 0 890 op_list = [] 891 # add for the pitches 892 # if they are equal 893 if annNote1.pitches == annNote2.pitches: 894 op_list_pitch, cost_pitch = [], 0 895 else: 896 # pitches diff is computed using Levenshtein distances (they are already ordered) 897 op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff( 898 annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0) 899 ) 900 op_list.extend(op_list_pitch) 901 cost += cost_pitch 902 # add for the notehead 903 if annNote1.note_head != annNote2.note_head: 904 # delete one note head, add the other (this isn't noteshape, this is 905 # just quarter-note note head vs half-note note head, etc) 906 cost += 2 907 op_list.append(("headedit", annNote1, annNote2, 2)) 908 # add for the dots 909 if annNote1.dots != annNote2.dots: 910 # add one for each added (or deleted) dot 911 dots_diff = abs(annNote1.dots - annNote2.dots) 912 cost += dots_diff 913 if annNote1.dots > annNote2.dots: 914 op_list.append(("dotdel", annNote1, annNote2, dots_diff)) 915 else: 916 op_list.append(("dotins", annNote1, annNote2, dots_diff)) 917 if annNote1.graceType != annNote2.graceType: 918 # accented vs unaccented vs not a grace note (delete the wrong, add the right) 919 cost += 2 920 op_list.append(("graceedit", annNote1, annNote2, 2)) 921 if annNote1.graceSlash != annNote2.graceSlash: 922 # add or delete the slash 923 cost += 1 924 op_list.append(("graceslashedit", annNote1, annNote2, 1)) 925 # add for the beamings 926 if annNote1.beamings != annNote2.beamings: 927 beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff( 928 annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam" 929 ) 930 op_list.extend(beam_op_list) 931 cost += beam_cost 932 # add for the tuplet types 933 if annNote1.tuplets != annNote2.tuplets: 934 tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff( 935 annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet" 936 ) 937 op_list.extend(tuplet_op_list) 938 cost += tuplet_cost 939 # add for the tuplet info 940 if annNote1.tuplet_info != annNote2.tuplet_info: 941 tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff( 942 annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet" 943 ) 944 op_list.extend(tuplet_info_op_list) 945 cost += tuplet_info_cost 946 # add for the articulations 947 if annNote1.articulations != annNote2.articulations: 948 artic_op_list, artic_cost = Comparison._generic_levenshtein_diff( 949 annNote1.articulations, 950 annNote2.articulations, 951 annNote1, 952 annNote2, 953 "articulation", 954 ) 955 op_list.extend(artic_op_list) 956 cost += artic_cost 957 # add for the expressions 958 if annNote1.expressions != annNote2.expressions: 959 expr_op_list, expr_cost = Comparison._generic_levenshtein_diff( 960 annNote1.expressions, 961 annNote2.expressions, 962 annNote1, 963 annNote2, 964 "expression", 965 ) 966 op_list.extend(expr_op_list) 967 cost += expr_cost 968 969 # add for gap from previous note or start of measure if first note in measure 970 # (i.e. horizontal position shift) 971 if annNote1.gap_dur != annNote2.gap_dur: 972 # in all cases, the edit is a simple horizontal shift of the note 973 cost += 1 974 if annNote1.gap_dur == 0: 975 op_list.append(("insspace", annNote1, annNote2, 1)) 976 elif annNote2.gap_dur == 0: 977 op_list.append(("delspace", annNote1, annNote2, 1)) 978 else: 979 # neither is zero 980 op_list.append(("editspace", annNote1, annNote2, 1)) 981 982 # add for noteshape 983 if annNote1.noteshape != annNote2.noteshape: 984 # always delete existing note shape and add the new one 985 cost += 2 986 op_list.append(("editnoteshape", annNote1, annNote2, 2)) 987 # add for noteheadFill 988 if annNote1.noteheadFill != annNote2.noteheadFill: 989 # always delete existing note fill and add the new one 990 cost += 2 991 op_list.append(("editnoteheadfill", annNote1, annNote2, 2)) 992 # add for noteheadParenthesis (True or False) 993 if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis: 994 # always either add or delete parentheses 995 cost += 1 996 op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1)) 997 # add for stemDirection 998 if annNote1.stemDirection != annNote2.stemDirection: 999 stemdir_cost: int 1000 if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem': 1001 # gonna add a stem 1002 stemdir_cost = 1 1003 else: 1004 # gonna change a stem (add then delete) 1005 stemdir_cost = 2 1006 cost += stemdir_cost 1007 op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost)) 1008 # add for the styledict 1009 if annNote1.styledict != annNote2.styledict: 1010 cost += 1 1011 op_list.append(("editstyle", annNote1, annNote2, 1)) 1012 1013 return op_list, cost 1014 1015 @staticmethod 1016 @_memoize_beamtuplet_lev_diff 1017 def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which): 1018 """ 1019 Compute the levenshtein distance between two sequences of beaming or tuples. 1020 Arguments: 1021 original {list} -- list of strings (start, stop, continue or partial) 1022 compare_to {list} -- list of strings (start, stop, continue or partial) 1023 note1 {AnnNote} -- the note for referencing in the score 1024 note2 {AnnNote} -- the note for referencing in the score 1025 which -- a string: "beam" or "tuplet" depending what we are comparing 1026 """ 1027 if which not in ("beam", "tuplet"): 1028 raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'") 1029 1030 if len(original) == 0 and len(compare_to) == 0: 1031 return [], 0 1032 1033 if len(original) == 0: 1034 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1035 original, compare_to[1:], note1, note2, which 1036 ) 1037 op_list.append(("ins" + which, note1, note2, 1)) 1038 cost += 1 1039 return op_list, cost 1040 1041 if len(compare_to) == 0: 1042 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1043 original[1:], compare_to, note1, note2, which 1044 ) 1045 op_list.append(("del" + which, note1, note2, 1)) 1046 cost += 1 1047 return op_list, cost 1048 1049 # compute the cost and the op_list for the many possibilities of recursion 1050 cost = {} 1051 op_list = {} 1052 # delwhich 1053 op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff( 1054 original[1:], compare_to, note1, note2, which 1055 ) 1056 cost["del" + which] += 1 1057 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1058 # inswhich 1059 op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff( 1060 original, compare_to[1:], note1, note2, which 1061 ) 1062 cost["ins" + which] += 1 1063 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1064 # editwhich 1065 op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff( 1066 original[1:], compare_to[1:], note1, note2, which 1067 ) 1068 if original[0] == compare_to[0]: 1069 beam_diff_op_list = [] 1070 beam_diff_cost = 0 1071 else: 1072 beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1 1073 cost["edit" + which] += beam_diff_cost 1074 op_list["edit" + which].extend(beam_diff_op_list) 1075 # compute the minimum of the possibilities 1076 min_key = min(cost, key=cost.get) 1077 out = op_list[min_key], cost[min_key] 1078 return out 1079 1080 @staticmethod 1081 @_memoize_generic_lev_diff 1082 def _generic_levenshtein_diff(original, compare_to, note1, note2, which): 1083 """ 1084 Compute the Levenshtein distance between two generic sequences of symbols 1085 (e.g., articulations). 1086 Arguments: 1087 original {list} -- list of strings 1088 compare_to {list} -- list of strings 1089 note1 {AnnNote} -- the note for referencing in the score 1090 note2 {AnnNote} -- the note for referencing in the score 1091 which -- a string: e.g. "articulation" depending what we are comparing 1092 """ 1093 if len(original) == 0 and len(compare_to) == 0: 1094 return [], 0 1095 1096 if len(original) == 0: 1097 op_list, cost = Comparison._generic_levenshtein_diff( 1098 original, compare_to[1:], note1, note2, which 1099 ) 1100 op_list.append(("ins" + which, note1, note2, 1)) 1101 cost += 1 1102 return op_list, cost 1103 1104 if len(compare_to) == 0: 1105 op_list, cost = Comparison._generic_levenshtein_diff( 1106 original[1:], compare_to, note1, note2, which 1107 ) 1108 op_list.append(("del" + which, note1, note2, 1)) 1109 cost += 1 1110 return op_list, cost 1111 1112 # compute the cost and the op_list for the many possibilities of recursion 1113 cost = {} 1114 op_list = {} 1115 # delwhich 1116 op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff( 1117 original[1:], compare_to, note1, note2, which 1118 ) 1119 cost["del" + which] += 1 1120 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1121 # inswhich 1122 op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff( 1123 original, compare_to[1:], note1, note2, which 1124 ) 1125 cost["ins" + which] += 1 1126 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1127 # editwhich 1128 op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff( 1129 original[1:], compare_to[1:], note1, note2, which 1130 ) 1131 if original[0] == compare_to[0]: # to avoid perform the diff 1132 generic_diff_op_list = [] 1133 generic_diff_cost = 0 1134 else: 1135 generic_diff_op_list, generic_diff_cost = ( 1136 [("edit" + which, note1, note2, 1)], 1137 1, 1138 ) 1139 cost["edit" + which] += generic_diff_cost 1140 op_list["edit" + which].extend(generic_diff_op_list) 1141 # compute the minimum of the possibilities 1142 min_key = min(cost, key=cost.get) 1143 out = op_list[min_key], cost[min_key] 1144 return out 1145 1146 @staticmethod 1147 @_memoize_notes_set_distance 1148 def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]): 1149 """ 1150 Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in 1151 that order of importance). If you can't find an exactly matching note, try again without 1152 visual duration. 1153 original [list] -- a list of AnnNote (which are never chords) 1154 compare_to [list] -- a list of AnnNote (which are never chords) 1155 """ 1156 paired_notes: list[tuple[AnnNote, AnnNote]] = [] 1157 unpaired_orig_notes: list[AnnNote] = [] 1158 unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to) 1159 1160 for orig_n in original: 1161 fallback: AnnNote | None = None 1162 fallback_i: int = -1 1163 found_it: bool = False 1164 for i, comp_n in enumerate(unpaired_comp_notes): 1165 if orig_n.pitches[0][0] != comp_n.pitches[0][0]: 1166 # this pitch comparison (1) assumes the note is not a chord 1167 # (because we don't do chords when Voicing is not set, and 1168 # we only call _notes_set_distance when Voicing is not set), 1169 # and (2) only compares the visual position of the note (we 1170 # are ignoring the accidental here). This is so that an 1171 # accidental change will show up as a pitch edit, not a 1172 # note remove/insert. 1173 continue 1174 if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset): 1175 continue 1176 if orig_n.note_is_grace != comp_n.note_is_grace: 1177 continue 1178 if fallback is None: 1179 fallback = comp_n 1180 fallback_i = i 1181 1182 if orig_n.note_dur_type != comp_n.note_dur_type: 1183 continue 1184 if orig_n.note_dur_dots != comp_n.note_dur_dots: 1185 continue 1186 1187 # found a perfect match 1188 paired_notes.append((orig_n, comp_n)) 1189 1190 # remove comp_n from unpaired_comp_notes 1191 unpaired_comp_notes.pop(i) # remove(comp_n) would sometimes get the wrong one 1192 1193 found_it = True 1194 break 1195 1196 if found_it: 1197 # on to the next original note 1198 continue 1199 1200 # did we find a fallback (matched except for duration)? 1201 if fallback is not None: 1202 paired_notes.append((orig_n, fallback)) 1203 unpaired_comp_notes.pop(fallback_i) 1204 continue 1205 1206 # we found nothing 1207 unpaired_orig_notes.append(orig_n) 1208 1209 # compute the cost and the op_list 1210 cost: int = 0 1211 op_list: list = [] 1212 1213 # notedel 1214 if unpaired_orig_notes: 1215 for an in unpaired_orig_notes: 1216 cost += an.notation_size() 1217 op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord)) 1218 1219 # noteins 1220 if unpaired_comp_notes: 1221 for an in unpaired_comp_notes: 1222 cost += an.notation_size() 1223 op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord)) 1224 1225 # notesub 1226 if paired_notes: 1227 for ano, anc in paired_notes: 1228 if ano == anc: 1229 # if equal, avoid _annotated_note_diff call 1230 notesub_op, notesub_cost = [], 0 1231 else: 1232 notesub_op, notesub_cost = ( 1233 Comparison._annotated_note_diff(ano, anc) 1234 ) 1235 cost += notesub_cost 1236 op_list.extend(notesub_op) 1237 1238 return op_list, cost 1239 1240 @staticmethod 1241 @_memoize_extras_set_distance 1242 def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]): 1243 """ 1244 Gather up pairs of matching extras (using kind, offset, and visual duration, in 1245 that order of importance). If you can't find an exactly matching extra, try again 1246 without visual duration. 1247 original [list] -- a list of AnnExtras 1248 compare_to [list] -- a list of AnnExtras 1249 """ 1250 paired_extras: list[tuple[AnnExtra, AnnExtra]] = [] 1251 unpaired_orig_extras: list[AnnExtra] = [] 1252 unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to) 1253 1254 for orig_x in original: 1255 fallback: AnnExtra | None = None 1256 fallback_i: int = -1 1257 found_it: bool = False 1258 for i, comp_x in enumerate(unpaired_comp_extras): 1259 # kind and offset are required for pairing 1260 if orig_x.kind != comp_x.kind: 1261 continue 1262 if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset): 1263 continue 1264 if fallback is None: 1265 fallback = comp_x 1266 fallback_i = i 1267 1268 # duration is preferred for pairing 1269 if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration): 1270 continue 1271 1272 # there are a few kind-specific elements that are also preferred: 1273 # 'direction'/'ending': content (visible text) 1274 # 'keysig'/'timesig'/'clef': symbolic (there are sometimes two 1275 # simultaneous keysigs, timesigs, or clefs, and we don't 1276 # want to confuse which one is which, producing a diff where 1277 # there actually isn't one) 1278 # 'slur': placements (because there are often two identical slurs 1279 # whose only difference is 'above' vs 'below', producing a diff 1280 # where there actually isn't one) 1281 if orig_x.kind in ('direction', 'ending'): 1282 if orig_x.content != comp_x.content: 1283 continue 1284 if orig_x.kind == 'clef': 1285 if orig_x.symbolic != comp_x.symbolic: 1286 continue 1287 if orig_x.kind in ('keysig', 'timesig'): 1288 if orig_x.infodict != comp_x.infodict: 1289 continue 1290 if orig_x.kind == 'slur': 1291 orig_placement: str = orig_x.styledict.get('placement', '') 1292 comp_placement: str = comp_x.styledict.get('placement', '') 1293 if orig_placement != comp_placement: 1294 continue 1295 1296 # found a perfect match 1297 paired_extras.append((orig_x, comp_x)) 1298 1299 # remove comp_n from unpaired_comp_extras 1300 unpaired_comp_extras.pop(i) # remove(comp_n) would sometimes get the wrong one 1301 1302 found_it = True 1303 break 1304 1305 if found_it: 1306 # on to the next original extra 1307 continue 1308 1309 # did we find a fallback (matched except for duration)? 1310 if fallback is not None: 1311 paired_extras.append((orig_x, fallback)) 1312 unpaired_comp_extras.pop(fallback_i) 1313 continue 1314 1315 # we found nothing 1316 unpaired_orig_extras.append(orig_x) 1317 1318 # compute the cost and the op_list 1319 cost: int = 0 1320 op_list: list = [] 1321 1322 # extradel 1323 for extra in unpaired_orig_extras: 1324 cost += extra.notation_size() 1325 op_list.append(("extradel", extra, None, extra.notation_size())) 1326 1327 # extrains 1328 for extra in unpaired_comp_extras: 1329 cost += extra.notation_size() 1330 op_list.append(("extrains", None, extra, extra.notation_size())) 1331 1332 # extrasub 1333 if paired_extras: 1334 for extrao, extrac in paired_extras: 1335 if extrao == extrac: 1336 # if equal, avoid _annotated_extra_diff call 1337 extrasub_op, extrasub_cost = [], 0 1338 else: 1339 extrasub_op, extrasub_cost = ( 1340 Comparison._annotated_extra_diff(extrao, extrac) 1341 ) 1342 cost += extrasub_cost 1343 op_list.extend(extrasub_op) 1344 1345 return op_list, cost 1346 1347 @staticmethod 1348 @_memoize_metadata_items_set_distance 1349 def _metadata_items_set_distance( 1350 original: list[AnnMetadataItem], 1351 compare_to: list[AnnMetadataItem] 1352 ): 1353 """ 1354 Gather up pairs of matching metadata_items (using key and value). If you can't find 1355 an exactly matching metadata_item, try again without value. 1356 original [list] -- a list of AnnMetadataItems 1357 compare_to [list] -- a list of AnnMetadataItems 1358 """ 1359 paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = [] 1360 unpaired_orig_metadata_items: list[AnnMetadataItem] = [] 1361 unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to) 1362 1363 # first look for perfect matches 1364 for orig_mdi in original: 1365 found_it: bool = False 1366 for i, comp_mdi in enumerate(unpaired_comp_metadata_items): 1367 # key is required for perfect match 1368 if orig_mdi.key != comp_mdi.key: 1369 continue 1370 1371 # value is required for perfect match 1372 if orig_mdi.value != comp_mdi.value: 1373 continue 1374 1375 # found a perfect match 1376 paired_metadata_items.append((orig_mdi, comp_mdi)) 1377 1378 # remove comp_mdi from unpaired_comp_metadata_items 1379 unpaired_comp_metadata_items.pop(i) 1380 1381 found_it = True 1382 break 1383 1384 if found_it: 1385 # on to the next original metadata_item 1386 continue 1387 1388 # we found no perfect match 1389 unpaired_orig_metadata_items.append(orig_mdi) 1390 1391 # now look among the unpaired remainders for key match only 1392 remove_unpaired_orig_items: list[AnnMetadataItem] = [] 1393 for orig_mdi in unpaired_orig_metadata_items: 1394 for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items): 1395 # key is required for key-only match 1396 if orig_mdi.key != comp_mdi.key: 1397 continue 1398 1399 # found a key-only match 1400 paired_metadata_items.append((orig_mdi, comp_mdi)) 1401 1402 # remove comp_mdi from unpaired_comp_metadata_items 1403 unpaired_comp_metadata_items.pop(comp_idx) 1404 1405 # make a note of unpaired_orig_metadata_item to remove later 1406 remove_unpaired_orig_items.append(orig_mdi) 1407 break 1408 1409 for remove_mdi in remove_unpaired_orig_items: 1410 unpaired_orig_metadata_items.remove(remove_mdi) 1411 1412 # compute the cost and the op_list 1413 cost: int = 0 1414 op_list: list = [] 1415 1416 # mditemdel 1417 for metadata_item in unpaired_orig_metadata_items: 1418 cost += metadata_item.notation_size() 1419 op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size())) 1420 1421 # mditemins 1422 for metadata_item in unpaired_comp_metadata_items: 1423 cost += metadata_item.notation_size() 1424 op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size())) 1425 1426 # mditemsub 1427 if paired_metadata_items: 1428 for metadata_itemo, metadata_itemc in paired_metadata_items: 1429 if metadata_itemo == metadata_itemc: 1430 # if equal, avoid _annotated_metadata_item_diff call 1431 metadata_itemsub_op, metadata_itemsub_cost = [], 0 1432 else: 1433 metadata_itemsub_op, metadata_itemsub_cost = ( 1434 Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc) 1435 ) 1436 cost += metadata_itemsub_cost 1437 op_list.extend(metadata_itemsub_op) 1438 1439 return op_list, cost 1440 1441 @staticmethod 1442 @_memoize_staff_groups_set_distance 1443 def _staff_groups_set_distance( 1444 original: list[AnnStaffGroup], 1445 compare_to: list[AnnStaffGroup] 1446 ): 1447 """ 1448 Gather up pairs of matching staffgroups (using start_index and end_index, in 1449 that order of importance). If you can't find an exactly matching staffgroup, 1450 try again without end_index. 1451 original [list] -- a list of AnnStaffGroups 1452 compare_to [list] -- a list of AnnStaffGroups 1453 """ 1454 paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = [] 1455 unpaired_orig_staff_groups: list[AnnStaffGroup] = [] 1456 unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to) 1457 1458 for orig_sg in original: 1459 fallback: AnnStaffGroup | None = None 1460 fallback_i: int = -1 1461 found_it: bool = False 1462 for i, comp_sg in enumerate(unpaired_comp_staff_groups): 1463 # start index is required for pairing 1464 if orig_sg.part_indices[0] != comp_sg.part_indices[0]: 1465 continue 1466 if fallback is None: 1467 fallback = comp_sg 1468 fallback_i = i 1469 1470 # end index and symbol (brace, bracket, etc) is preferred for pairing 1471 if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]: 1472 continue 1473 if orig_sg.symbol != comp_sg.symbol: 1474 continue 1475 1476 # found a perfect match 1477 paired_staff_groups.append((orig_sg, comp_sg)) 1478 1479 # remove comp_n from unpaired_comp_staff_groups 1480 unpaired_comp_staff_groups.pop(i) 1481 1482 found_it = True 1483 break 1484 1485 if found_it: 1486 # on to the next original staff_group 1487 continue 1488 1489 # did we find a fallback (matched except for duration)? 1490 if fallback is not None: 1491 paired_staff_groups.append((orig_sg, fallback)) 1492 unpaired_comp_staff_groups.pop(fallback_i) 1493 continue 1494 1495 # we found nothing 1496 unpaired_orig_staff_groups.append(orig_sg) 1497 1498 # compute the cost and the op_list 1499 cost: int = 0 1500 op_list: list = [] 1501 1502 # staffgrpdel 1503 for staff_group in unpaired_orig_staff_groups: 1504 cost += staff_group.notation_size() 1505 op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size())) 1506 1507 # staffgrpins 1508 for staff_group in unpaired_comp_staff_groups: 1509 cost += staff_group.notation_size() 1510 op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size())) 1511 1512 # staffgrpsub 1513 if paired_staff_groups: 1514 for staff_groupo, staff_groupc in paired_staff_groups: 1515 if staff_groupo == staff_groupc: 1516 # if equal, avoid _annotated_staff_group_diff call 1517 staff_groupsub_op, staff_groupsub_cost = [], 0 1518 else: 1519 staff_groupsub_op, staff_groupsub_cost = ( 1520 Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc) 1521 ) 1522 cost += staff_groupsub_cost 1523 op_list.extend(staff_groupsub_op) 1524 1525 return op_list, cost 1526 1527 @staticmethod 1528 def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]): 1529 """ 1530 Compare all the possible voices permutations, considering also deletion and 1531 insertion (equation on office lens). 1532 original [list] -- a list of Voice 1533 compare_to [list] -- a list of Voice 1534 """ 1535 if len(original) == 0 and len(compare_to) == 0: # stop the recursion 1536 return [], 0 1537 1538 if len(original) == 0: 1539 # insertion 1540 op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:]) 1541 # add for the inserted voice 1542 op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size())) 1543 cost += compare_to[0].notation_size() 1544 return op_list, cost 1545 1546 if len(compare_to) == 0: 1547 # deletion 1548 op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to) 1549 # add for the deleted voice 1550 op_list.append(("voicedel", original[0], None, original[0].notation_size())) 1551 cost += original[0].notation_size() 1552 return op_list, cost 1553 1554 cost = {} 1555 op_list = {} 1556 # deletion 1557 op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive( 1558 original[1:], compare_to 1559 ) 1560 op_list["voicedel"].append( 1561 ("voicedel", original[0], None, original[0].notation_size()) 1562 ) 1563 cost["voicedel"] += original[0].notation_size() 1564 for i, c in enumerate(compare_to): 1565 # substitution 1566 ( 1567 op_list["voicesub" + str(i)], 1568 cost["voicesub" + str(i)], 1569 ) = Comparison._voices_coupling_recursive( 1570 original[1:], compare_to[:i] + compare_to[i + 1:] 1571 ) 1572 if ( 1573 compare_to[0] != original[0] 1574 ): # add the cost of the sub and the operations from inside_bar_diff 1575 op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin( 1576 original[0].annot_notes, c.annot_notes 1577 ) # compute the distance from original[0] and compare_to[i] 1578 op_list["voicesub" + str(i)].extend(op_list_inside_bar) 1579 cost["voicesub" + str(i)] += cost_inside_bar 1580 # compute the minimum of the possibilities 1581 min_key = min(cost, key=cost.get) 1582 out = op_list[min_key], cost[min_key] 1583 return out 1584 1585 @staticmethod 1586 def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]: 1587 ''' 1588 Compare two annotated scores, computing an operations list and the cost of applying those 1589 operations to the first score to generate the second score. 1590 1591 Args: 1592 score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare. 1593 score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare. 1594 1595 Returns: 1596 list[tuple], int: The operations list and the cost 1597 ''' 1598 # Clear all memoizer caches, in case we are called again with different scores. 1599 # The cached results are no longer valid. 1600 Comparison._clear_memoizer_caches() 1601 1602 op_list_total: list[tuple] = [] 1603 cost_total: int = 0 1604 1605 if score1.n_of_parts == score2.n_of_parts: 1606 n_of_parts = score1.n_of_parts 1607 else: 1608 # The two scores have differing number of parts. For now, assume that 1609 # the parts are in the same order in both scores, and that the missing 1610 # parts are the ones that should have been at the end of the smaller 1611 # score. In future we could do something like we do with voices, where 1612 # we try all the combinations and compare the most-similar pairs of 1613 # parts, and the rest are considered to be the extra parts (deleted 1614 # from score1, or inserted into score2). 1615 n_of_parts = min(score1.n_of_parts, score2.n_of_parts) 1616 if score1.n_of_parts > score2.n_of_parts: 1617 # score1 has more parts that must be deleted 1618 for part_idx in range(score2.n_of_parts, score1.n_of_parts): 1619 deleted_part = score1.part_list[part_idx] 1620 op_list_total.append( 1621 ( 1622 "delpart", 1623 deleted_part, 1624 None, 1625 deleted_part.notation_size() 1626 ) 1627 ) 1628 cost_total += deleted_part.notation_size() 1629 else: 1630 # score2 has more parts that must be inserted 1631 for part_idx in range(score1.n_of_parts, score2.n_of_parts): 1632 inserted_part = score2.part_list[part_idx] 1633 op_list_total.append( 1634 ( 1635 "inspart", 1636 None, 1637 inserted_part, 1638 inserted_part.notation_size() 1639 ) 1640 ) 1641 cost_total += inserted_part.notation_size() 1642 1643 # iterate over parts that exist in both scores 1644 for p_number in range(n_of_parts): 1645 # compute non-common-subseq 1646 ncs = Comparison._non_common_subsequences_of_measures( 1647 score1.part_list[p_number].bar_list, 1648 score2.part_list[p_number].bar_list, 1649 ) 1650 # compute blockdiff 1651 for subseq in ncs: 1652 op_list_block, cost_block = Comparison._block_diff_lin( 1653 subseq["original"], subseq["compare_to"] 1654 ) 1655 op_list_total.extend(op_list_block) 1656 cost_total += cost_block 1657 1658 # compare the staff groups 1659 groups_op_list, groups_cost = Comparison._staff_groups_set_distance( 1660 score1.staff_group_list, score2.staff_group_list 1661 ) 1662 op_list_total.extend(groups_op_list) 1663 cost_total += groups_cost 1664 1665 # compare the metadata items 1666 mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance( 1667 score1.metadata_items_list, score2.metadata_items_list 1668 ) 1669 op_list_total.extend(mditems_op_list) 1670 cost_total += mditems_cost 1671 1672 # Add the cost of any syntax errors in score1 that were fixed during parsing. 1673 # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency. 1674 total_syms: int = score1.notation_size() + score2.notation_size() 1675 cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed 1676 if cost_plus_errors > total_syms: 1677 adjustment: int = cost_plus_errors - total_syms 1678 score1.num_syntax_errors_fixed -= adjustment 1679 1680 cost_total += score1.num_syntax_errors_fixed 1681 1682 return op_list_total, cost_total
class
EvaluationMetrics:
30class EvaluationMetrics: 31 def __init__( 32 self, 33 gt_path: Path, 34 pred_path: Path, 35 gt_numsyms: int, 36 pred_numsyms: int, 37 omr_edit_distance: int, 38 edit_distances_dict: dict[str, int], 39 omr_ned: float 40 ): 41 self.gt_path: Path = gt_path 42 self.pred_path: Path = pred_path 43 self.gt_numsyms: int = gt_numsyms 44 self.pred_numsyms: int = pred_numsyms 45 self.omr_edit_distance: int = omr_edit_distance 46 self.edit_distances_dict: dict[str, int] = edit_distances_dict 47 self.omr_ned: float = omr_ned
EvaluationMetrics( gt_path: pathlib.Path, pred_path: pathlib.Path, gt_numsyms: int, pred_numsyms: int, omr_edit_distance: int, edit_distances_dict: dict[str, int], omr_ned: float)
31 def __init__( 32 self, 33 gt_path: Path, 34 pred_path: Path, 35 gt_numsyms: int, 36 pred_numsyms: int, 37 omr_edit_distance: int, 38 edit_distances_dict: dict[str, int], 39 omr_ned: float 40 ): 41 self.gt_path: Path = gt_path 42 self.pred_path: Path = pred_path 43 self.gt_numsyms: int = gt_numsyms 44 self.pred_numsyms: int = pred_numsyms 45 self.omr_edit_distance: int = omr_edit_distance 46 self.edit_distances_dict: dict[str, int] = edit_distances_dict 47 self.omr_ned: float = omr_ned
class
Comparison:
150class Comparison: 151 _memoizer_mem: dict = {} 152 153 @staticmethod 154 def _clear_memoizer_caches(): 155 Comparison._memoizer_mem = {} 156 157 @staticmethod 158 def _myers_diff(a_lines, b_lines): 159 # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2) 160 # This marks the farthest-right point along each diagonal in the edit 161 # graph, along with the history that got it there 162 Frontier = namedtuple("Frontier", ["x", "history"]) 163 frontier = {1: Frontier(0, [])} 164 165 a_max = len(a_lines) 166 b_max = len(b_lines) 167 for d in range(0, a_max + b_max + 1): 168 for k in range(-d, d + 1, 2): 169 # This determines whether our next search point will be going down 170 # in the edit graph, or to the right. 171 # 172 # The intuition for this is that we should go down if we're on the 173 # left edge (k == -d) to make sure that the left edge is fully 174 # explored. 175 # 176 # If we aren't on the top (k != d), then only go down if going down 177 # would take us to territory that hasn't sufficiently been explored 178 # yet. 179 go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x) 180 181 # Figure out the starting point of this iteration. The diagonal 182 # offsets come from the geometry of the edit grid - if you're going 183 # down, your diagonal is lower, and if you're going right, your 184 # diagonal is higher. 185 if go_down: 186 old_x, history = frontier[k + 1] 187 x = old_x 188 else: 189 old_x, history = frontier[k - 1] 190 x = old_x + 1 191 192 # We want to avoid modifying the old history, since some other step 193 # may decide to use it. 194 history = history[:] 195 y = x - k 196 197 # We start at the invalid point (0, 0) - we should only start building 198 # up history when we move off of it. 199 if 1 <= y <= b_max and go_down: 200 history.append((1, b_lines[y - 1][1])) # add comparetostep 201 elif 1 <= x <= a_max: 202 history.append((0, a_lines[x - 1][1])) # add originalstep 203 204 # Chew up as many diagonal moves as we can - these correspond to common lines, 205 # and they're considered "free" by the algorithm because we want to maximize 206 # the number of these in the output. 207 while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]: 208 x += 1 209 y += 1 210 history.append((2, a_lines[x - 1][1])) # add equal step 211 212 if x >= a_max and y >= b_max: 213 # If we're here, then we've traversed through the bottom-left corner, 214 # and are done. 215 return np.array(history) 216 217 frontier[k] = Frontier(x, history) 218 219 assert False, "Could not find edit script" 220 221 @staticmethod 222 def _non_common_subsequences_myers(original, compare_to): 223 # Both original and compare_to are list of lists, or numpy arrays with 2 columns. 224 # This is necessary because bars need two representation at the same time. 225 # One without the id (for comparison), and one with the id (to retrieve the bar 226 # at the end). 227 228 # get the list of operations 229 op_list = Comparison._myers_diff( 230 np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64) 231 )[::-1] 232 # retrieve the non common subsequences 233 non_common_subsequences = [] 234 non_common_subsequences.append({"original": [], "compare_to": []}) 235 ind = 0 236 for op in op_list[::-1]: 237 if op[0] == 2: # equal 238 non_common_subsequences.append({"original": [], "compare_to": []}) 239 ind += 1 240 elif op[0] == 0: # original step 241 non_common_subsequences[ind]["original"].append(op[1]) 242 elif op[0] == 1: # compare to step 243 non_common_subsequences[ind]["compare_to"].append(op[1]) 244 # remove the empty dict from the list 245 non_common_subsequences = [ 246 s for s in non_common_subsequences if s != {"original": [], "compare_to": []} 247 ] 248 return non_common_subsequences 249 250 @staticmethod 251 def _non_common_subsequences_of_measures(original_m, compare_to_m): 252 # Take the hash for each measure to run faster comparison 253 # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison), 254 # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after 255 # computation) 256 original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m] 257 compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m] 258 ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int) 259 # retrieve the original pointers to measures 260 new_out = [] 261 for e in ncs: 262 new_out.append({}) 263 for k in e.keys(): 264 new_out[-1][k] = [] 265 for repr_hash in e[k]: 266 if k == "original": 267 new_out[-1][k].append( 268 next(m for m in original_m if m.precomputed_repr == repr_hash) 269 ) 270 else: 271 new_out[-1][k].append( 272 next(m for m in compare_to_m if m.precomputed_repr == repr_hash) 273 ) 274 275 return new_out 276 277 @staticmethod 278 @_memoize_pitches_lev_diff 279 def _pitches_levenshtein_diff( 280 original: list[tuple[str, str, bool]], 281 compare_to: list[tuple[str, str, bool]], 282 noteNode1: AnnNote, 283 noteNode2: AnnNote, 284 ids: tuple[int, int] 285 ): 286 """ 287 Compute the levenshtein distance between two sequences of pitches. 288 Arguments: 289 original {list} -- list of pitches 290 compare_to {list} -- list of pitches 291 noteNode1 {annotatedNote} --for referencing 292 noteNode2 {annotatedNote} --for referencing 293 ids {tuple} -- a tuple of 2 elements with the indices of the notes considered 294 """ 295 if len(original) == 0 and len(compare_to) == 0: 296 return [], 0 297 298 if len(original) == 0: 299 op_list, cost = Comparison._pitches_levenshtein_diff( 300 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 301 ) 302 op_list.append( 303 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 304 ) 305 cost += M21Utils.pitch_size(compare_to[0]) 306 return op_list, cost 307 308 if len(compare_to) == 0: 309 op_list, cost = Comparison._pitches_levenshtein_diff( 310 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 311 ) 312 op_list.append( 313 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 314 ) 315 cost += M21Utils.pitch_size(original[0]) 316 return op_list, cost 317 318 # compute the cost and the op_list for the many possibilities of recursion 319 cost_dict = {} 320 op_list_dict = {} 321 # del-pitch 322 op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff( 323 original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1]) 324 ) 325 cost_dict["delpitch"] += M21Utils.pitch_size(original[0]) 326 op_list_dict["delpitch"].append( 327 ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids) 328 ) 329 # ins-pitch 330 op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff( 331 original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1) 332 ) 333 cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0]) 334 op_list_dict["inspitch"].append( 335 ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids) 336 ) 337 # edit-pitch 338 op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff( 339 original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1) 340 ) 341 if original[0] == compare_to[0]: # to avoid perform the pitch_diff 342 pitch_diff_op_list = [] 343 pitch_diff_cost = 0 344 else: 345 pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff( 346 original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1]) 347 ) 348 cost_dict["editpitch"] += pitch_diff_cost 349 op_list_dict["editpitch"].extend(pitch_diff_op_list) 350 # compute the minimum of the possibilities 351 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 352 out = op_list_dict[min_key], cost_dict[min_key] 353 return out 354 355 @staticmethod 356 def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids): 357 """ 358 Compute the differences between two pitch (definition from the paper). 359 a pitch consist of a tuple: pitch name (letter+number), accidental, tie. 360 param : pitch1. The music_notation_repr tuple of note1 361 param : pitch2. The music_notation_repr tuple of note2 362 param : noteNode1. The noteNode where pitch1 belongs 363 param : noteNode2. The noteNode where pitch2 belongs 364 param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord 365 Returns: 366 [list] -- the list of differences 367 [int] -- the cost of diff 368 """ 369 cost = 0 370 op_list = [] 371 # add for pitch name differences 372 if pitch1[0] != pitch2[0]: 373 cost += 1 374 # TODO: select the note in a more precise way in case of a chord 375 # rest to note 376 if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"): # xor 377 op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids)) 378 else: # they are two notes 379 op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids)) 380 381 # add for the accidentals 382 if pitch1[1] != pitch2[1]: # if the accidental is different 383 if pitch1[1] == "None": 384 assert pitch2[1] != "None" 385 cost += 1 386 op_list.append(("accidentins", noteNode1, noteNode2, 1, ids)) 387 elif pitch2[1] == "None": 388 assert pitch1[1] != "None" 389 cost += 1 390 op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids)) 391 else: # a different type of alteration is present 392 cost += 2 # delete then add 393 op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids)) 394 # add for the ties 395 if pitch1[2] != pitch2[2]: 396 # exclusive or. Add if one is tied and not the other. 397 # probably to revise for chords 398 cost += 1 399 if pitch1[2]: 400 assert not pitch2[2] 401 op_list.append(("tiedel", noteNode1, noteNode2, 1, ids)) 402 elif pitch2[2]: 403 assert not pitch1[2] 404 op_list.append(("tieins", noteNode1, noteNode2, 1, ids)) 405 return op_list, cost 406 407 @staticmethod 408 @_memoize_block_diff_lin 409 def _block_diff_lin(original, compare_to): 410 if len(original) == 0 and len(compare_to) == 0: 411 return [], 0 412 413 if len(original) == 0: 414 op_list, cost = Comparison._block_diff_lin(original, compare_to[1:]) 415 cost += compare_to[0].notation_size() 416 op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size())) 417 return op_list, cost 418 419 if len(compare_to) == 0: 420 op_list, cost = Comparison._block_diff_lin(original[1:], compare_to) 421 cost += original[0].notation_size() 422 op_list.append(("delbar", original[0], None, original[0].notation_size())) 423 return op_list, cost 424 425 # compute the cost and the op_list for the many possibilities of recursion 426 cost_dict = {} 427 op_list_dict = {} 428 # del-bar 429 op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin( 430 original[1:], compare_to 431 ) 432 cost_dict["delbar"] += original[0].notation_size() 433 op_list_dict["delbar"].append( 434 ("delbar", original[0], None, original[0].notation_size()) 435 ) 436 # ins-bar 437 op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin( 438 original, compare_to[1:] 439 ) 440 cost_dict["insbar"] += compare_to[0].notation_size() 441 op_list_dict["insbar"].append( 442 ("insbar", None, compare_to[0], compare_to[0].notation_size()) 443 ) 444 # edit-bar 445 op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin( 446 original[1:], compare_to[1:] 447 ) 448 if ( 449 original[0] == compare_to[0] 450 ): # to avoid performing the _voices_coupling_recursive/_notes_set_distance 451 # if it's not needed 452 inside_bar_op_list = [] 453 inside_bar_cost = 0 454 else: 455 # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras 456 # instead of lists of AnnNotes) 457 extras_op_list, extras_cost = Comparison._extras_set_distance( 458 original[0].extras_list, compare_to[0].extras_list 459 ) 460 461 # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras) 462 lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin( 463 original[0].lyrics_list, compare_to[0].lyrics_list 464 ) 465 466 if original[0].includes_voicing: 467 # run the voice coupling algorithm, and add to inside_bar_op_list 468 # and inside_bar_cost 469 inside_bar_op_list, inside_bar_cost = ( 470 Comparison._voices_coupling_recursive( 471 original[0].voices_list, compare_to[0].voices_list 472 ) 473 ) 474 else: 475 # run the set distance algorithm, and add to inside_bar_op_list 476 # and inside_bar_cost 477 inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance( 478 original[0].annot_notes, compare_to[0].annot_notes 479 ) 480 481 inside_bar_op_list.extend(extras_op_list) 482 inside_bar_cost += extras_cost 483 inside_bar_op_list.extend(lyrics_op_list) 484 inside_bar_cost += lyrics_cost 485 486 cost_dict["editbar"] += inside_bar_cost 487 op_list_dict["editbar"].extend(inside_bar_op_list) 488 # compute the minimum of the possibilities 489 min_key = min(cost_dict, key=lambda k: cost_dict[k]) 490 out = op_list_dict[min_key], cost_dict[min_key] 491 return out 492 493 @staticmethod 494 @_memoize_lyrics_diff_lin 495 def _lyrics_diff_lin(original, compare_to): 496 # original and compare to are two lists of AnnLyric 497 if len(original) == 0 and len(compare_to) == 0: 498 return [], 0 499 500 if len(original) == 0: 501 cost = 0 502 op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:]) 503 op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size())) 504 cost += compare_to[0].notation_size() 505 return op_list, cost 506 507 if len(compare_to) == 0: 508 cost = 0 509 op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to) 510 op_list.append(("lyricdel", original[0], None, original[0].notation_size())) 511 cost += original[0].notation_size() 512 return op_list, cost 513 514 # compute the cost and the op_list for the many possibilities of recursion 515 cost = {} 516 op_list = {} 517 # lyricdel 518 op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin( 519 original[1:], compare_to 520 ) 521 cost["lyricdel"] += original[0].notation_size() 522 op_list["lyricdel"].append( 523 ("lyricdel", original[0], None, original[0].notation_size()) 524 ) 525 # lyricins 526 op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin( 527 original, compare_to[1:] 528 ) 529 cost["lyricins"] += compare_to[0].notation_size() 530 op_list["lyricins"].append( 531 ("lyricins", None, compare_to[0], compare_to[0].notation_size()) 532 ) 533 # lyricsub 534 op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin( 535 original[1:], compare_to[1:] 536 ) 537 if ( 538 original[0] == compare_to[0] 539 ): # avoid call another function if they are equal 540 lyricsub_op, lyricsub_cost = [], 0 541 else: 542 lyricsub_op, lyricsub_cost = ( 543 Comparison._annotated_lyric_diff(original[0], compare_to[0]) 544 ) 545 cost["lyricsub"] += lyricsub_cost 546 op_list["lyricsub"].extend(lyricsub_op) 547 # compute the minimum of the possibilities 548 min_key = min(cost, key=cost.get) 549 out = op_list[min_key], cost[min_key] 550 return out 551 552 @staticmethod 553 def _strings_levenshtein_distance(str1: str, str2: str): 554 counter: dict = {"+": 0, "-": 0} 555 distance: int = 0 556 for edit_code in ndiff(str1, str2): 557 if edit_code[0] == " ": 558 distance += max(counter.values()) 559 counter = {"+": 0, "-": 0} 560 else: 561 counter[edit_code[0]] += 1 562 distance += max(counter.values()) 563 return distance 564 565 @staticmethod 566 def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool: 567 if off1 == off2: 568 return False 569 570 # this should never happen, but... 571 if off1 is None or off2 is None: 572 if off1 is None and off2 is not None: 573 return True 574 if off1 is not None and off2 is None: 575 return True 576 return False # both are None, therefore not different at all 577 578 diff: OffsetQL = off1 - off2 579 if diff < 0: 580 diff = -diff 581 582 if diff > 0.0001: 583 return True 584 return False 585 586 @staticmethod 587 def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra): 588 """ 589 Compute the differences between two annotated extras. 590 Each annotated extra consists of three values: content, offset, and duration 591 """ 592 cost = 0 593 op_list = [] 594 595 # add for the content 596 if annExtra1.content != annExtra2.content: 597 content_cost: int = ( 598 Comparison._strings_levenshtein_distance( 599 annExtra1.content or '', 600 annExtra2.content or '' 601 ) 602 ) 603 cost += content_cost 604 op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost)) 605 606 # add for the symbolic (cost 2: delete one symbol, add the other) 607 if annExtra1.symbolic != annExtra2.symbolic: 608 cost += 2 609 op_list.append(("extrasymboledit", annExtra1, annExtra2, 2)) 610 611 # add for the infodict 612 if annExtra1.infodict != annExtra2.infodict: 613 info_cost: int = 0 614 # handle everything in annExtra1 (whether or not it is in annExtra2) 615 for k, v in annExtra1.infodict.items(): 616 if k not in annExtra2.infodict: 617 # not in annExtra2: delete a symbol 618 info_cost += 1 619 elif v != annExtra2.infodict[k]: 620 # different in annExtra2: delete a symbol, add a symbol 621 info_cost += 2 622 # handle everything in annExtra2 that is not in annExtra1 623 for k in annExtra2.infodict: 624 if k not in annExtra1.infodict: 625 # add a symbol 626 info_cost += 1 627 cost += info_cost 628 op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost)) 629 630 # add for the offset 631 # Note: offset here is a float, and some file formats have only four 632 # decimal places of precision. So we should not compare exactly here. 633 if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset): 634 cost += 1 635 op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1)) 636 637 # add for the duration 638 # Note: duration here is a float, and some file formats have only four 639 # decimal places of precision. So we should not compare exactly here. 640 if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration): 641 cost += 1 642 op_list.append(("extradurationedit", annExtra1, annExtra2, 1)) 643 644 # add for the style 645 if annExtra1.styledict != annExtra2.styledict: 646 cost += 1 # someday we might count different items in the styledict 647 op_list.append(("extrastyleedit", annExtra1, annExtra2, 1)) 648 649 return op_list, cost 650 651 @staticmethod 652 def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric): 653 """ 654 Compute the differences between two annotated lyrics. 655 Each annotated lyric consists of five values: lyric, verse_id, offset, duration, 656 and styledict. 657 """ 658 cost = 0 659 op_list = [] 660 661 # add for the content 662 if annLyric1.lyric != annLyric2.lyric: 663 content_cost: int = ( 664 Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric) 665 ) 666 cost += content_cost 667 op_list.append(("lyricedit", annLyric1, annLyric2, content_cost)) 668 669 # add for the number 670 if annLyric1.number != annLyric2.number: 671 number_cost: int 672 if annLyric1.number == 0 or annLyric2.number == 0: 673 # add or delete number 674 number_cost = 1 675 else: 676 # add and delete number 677 number_cost = 2 678 cost += number_cost 679 op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost)) 680 681 # add for the identifier 682 if annLyric1.identifier != annLyric2.identifier: 683 # someday we might do a Levenshtein distance of the two ids 684 id_cost: int 685 if not annLyric1.identifier or not annLyric1.identifier: 686 # add or delete identifier 687 id_cost = 1 688 else: 689 # add and delete identifier 690 id_cost = 2 691 cost += id_cost 692 op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost)) 693 694 # add for the offset 695 # Note: offset here is a float, and some file formats have only four 696 # decimal places of precision. So we should not compare exactly here. 697 if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset): 698 cost += 1 699 op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1)) 700 701 # add for the style 702 if annLyric1.styledict != annLyric2.styledict: 703 cost += 1 # someday we might count different items in the styledict 704 op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1)) 705 706 return op_list, cost 707 708 @staticmethod 709 def _annotated_metadata_item_diff( 710 annMetadataItem1: AnnMetadataItem, 711 annMetadataItem2: AnnMetadataItem 712 ): 713 """ 714 Compute the differences between two annotated metadata items. 715 Each annotated metadata item has two values: key: str, value: t.Any, 716 """ 717 cost = 0 718 op_list = [] 719 720 # add for the key (this never adds anything because we don't compare 721 # items that don't have the same key). 722 if annMetadataItem1.key != annMetadataItem2.key: 723 key_cost: int = ( 724 Comparison._strings_levenshtein_distance( 725 annMetadataItem1.key, 726 annMetadataItem2.key 727 ) 728 ) 729 cost += key_cost 730 op_list.append(("mditemkeyedit", annMetadataItem1, annMetadataItem2, key_cost)) 731 732 # add for the value 733 if annMetadataItem1.value != annMetadataItem2.value: 734 value_cost: int = ( 735 Comparison._strings_levenshtein_distance( 736 str(annMetadataItem1.value), 737 str(annMetadataItem2.value) 738 ) 739 ) 740 cost += value_cost 741 op_list.append( 742 ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost) 743 ) 744 745 return op_list, cost 746 747 @staticmethod 748 def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup): 749 """ 750 Compute the differences between two annotated staff groups. 751 Each annotated staff group consists of five values: name, abbreviation, 752 symbol, barTogether, part_indices. 753 """ 754 cost = 0 755 op_list = [] 756 757 # add for the name 758 if annStaffGroup1.name != annStaffGroup2.name: 759 name_cost: int = ( 760 Comparison._strings_levenshtein_distance( 761 annStaffGroup1.name, 762 annStaffGroup2.name 763 ) 764 ) 765 cost += name_cost 766 op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost)) 767 768 # add for the abbreviation 769 if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation: 770 abbreviation_cost: int = ( 771 Comparison._strings_levenshtein_distance( 772 annStaffGroup1.abbreviation, 773 annStaffGroup2.abbreviation 774 ) 775 ) 776 cost += abbreviation_cost 777 op_list.append(( 778 "staffgrpabbreviationedit", 779 annStaffGroup1, 780 annStaffGroup2, 781 abbreviation_cost 782 )) 783 784 # add for the symbol 785 if annStaffGroup1.symbol != annStaffGroup2.symbol: 786 symbol_cost: int 787 if not annStaffGroup1.symbol or not annStaffGroup2.symbol: 788 # add or delete symbol 789 symbol_cost = 1 790 else: 791 # add and delete symbol 792 symbol_cost = 2 793 cost += symbol_cost 794 op_list.append( 795 ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost) 796 ) 797 798 # add for barTogether 799 if annStaffGroup1.barTogether != annStaffGroup2.barTogether: 800 barTogether_cost: int = 1 801 cost += barTogether_cost 802 op_list.append( 803 ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost) 804 ) 805 806 # add for partIndices (sorted list of int) 807 if annStaffGroup1.part_indices != annStaffGroup2.part_indices: 808 partIndices_cost: int = 0 809 if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]: 810 partIndices_cost += 1 # vertical start 811 if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]: 812 partIndices_cost += 1 # vertical height 813 if partIndices_cost == 0: 814 # should never get here, but we have to have a cost 815 partIndices_cost = 1 816 cost += partIndices_cost 817 op_list.append( 818 ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost) 819 ) 820 821 return op_list, cost 822 823 @staticmethod 824 @_memoize_inside_bars_diff_lin 825 def _inside_bars_diff_lin(original, compare_to): 826 # original and compare to are two lists of annotatedNote 827 if len(original) == 0 and len(compare_to) == 0: 828 return [], 0 829 830 if len(original) == 0: 831 cost = 0 832 op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:]) 833 op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size())) 834 cost += compare_to[0].notation_size() 835 return op_list, cost 836 837 if len(compare_to) == 0: 838 cost = 0 839 op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to) 840 op_list.append(("notedel", original[0], None, original[0].notation_size())) 841 cost += original[0].notation_size() 842 return op_list, cost 843 844 # compute the cost and the op_list for the many possibilities of recursion 845 cost = {} 846 op_list = {} 847 # notedel 848 op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin( 849 original[1:], compare_to 850 ) 851 cost["notedel"] += original[0].notation_size() 852 op_list["notedel"].append( 853 ("notedel", original[0], None, original[0].notation_size()) 854 ) 855 # noteins 856 op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin( 857 original, compare_to[1:] 858 ) 859 cost["noteins"] += compare_to[0].notation_size() 860 op_list["noteins"].append( 861 ("noteins", None, compare_to[0], compare_to[0].notation_size()) 862 ) 863 # notesub 864 op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin( 865 original[1:], compare_to[1:] 866 ) 867 if ( 868 original[0] == compare_to[0] 869 ): # avoid call another function if they are equal 870 notesub_op, notesub_cost = [], 0 871 else: 872 notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0]) 873 cost["notesub"] += notesub_cost 874 op_list["notesub"].extend(notesub_op) 875 # compute the minimum of the possibilities 876 min_key = min(cost, key=cost.get) 877 out = op_list[min_key], cost[min_key] 878 return out 879 880 @staticmethod 881 def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote): 882 """ 883 Compute the differences between two annotated notes. 884 Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets) 885 where pitches is a list. 886 Arguments: 887 noteNode1 {[AnnNote]} -- original AnnNote 888 noteNode2 {[AnnNote]} -- compare_to AnnNote 889 """ 890 cost = 0 891 op_list = [] 892 # add for the pitches 893 # if they are equal 894 if annNote1.pitches == annNote2.pitches: 895 op_list_pitch, cost_pitch = [], 0 896 else: 897 # pitches diff is computed using Levenshtein distances (they are already ordered) 898 op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff( 899 annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0) 900 ) 901 op_list.extend(op_list_pitch) 902 cost += cost_pitch 903 # add for the notehead 904 if annNote1.note_head != annNote2.note_head: 905 # delete one note head, add the other (this isn't noteshape, this is 906 # just quarter-note note head vs half-note note head, etc) 907 cost += 2 908 op_list.append(("headedit", annNote1, annNote2, 2)) 909 # add for the dots 910 if annNote1.dots != annNote2.dots: 911 # add one for each added (or deleted) dot 912 dots_diff = abs(annNote1.dots - annNote2.dots) 913 cost += dots_diff 914 if annNote1.dots > annNote2.dots: 915 op_list.append(("dotdel", annNote1, annNote2, dots_diff)) 916 else: 917 op_list.append(("dotins", annNote1, annNote2, dots_diff)) 918 if annNote1.graceType != annNote2.graceType: 919 # accented vs unaccented vs not a grace note (delete the wrong, add the right) 920 cost += 2 921 op_list.append(("graceedit", annNote1, annNote2, 2)) 922 if annNote1.graceSlash != annNote2.graceSlash: 923 # add or delete the slash 924 cost += 1 925 op_list.append(("graceslashedit", annNote1, annNote2, 1)) 926 # add for the beamings 927 if annNote1.beamings != annNote2.beamings: 928 beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff( 929 annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam" 930 ) 931 op_list.extend(beam_op_list) 932 cost += beam_cost 933 # add for the tuplet types 934 if annNote1.tuplets != annNote2.tuplets: 935 tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff( 936 annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet" 937 ) 938 op_list.extend(tuplet_op_list) 939 cost += tuplet_cost 940 # add for the tuplet info 941 if annNote1.tuplet_info != annNote2.tuplet_info: 942 tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff( 943 annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet" 944 ) 945 op_list.extend(tuplet_info_op_list) 946 cost += tuplet_info_cost 947 # add for the articulations 948 if annNote1.articulations != annNote2.articulations: 949 artic_op_list, artic_cost = Comparison._generic_levenshtein_diff( 950 annNote1.articulations, 951 annNote2.articulations, 952 annNote1, 953 annNote2, 954 "articulation", 955 ) 956 op_list.extend(artic_op_list) 957 cost += artic_cost 958 # add for the expressions 959 if annNote1.expressions != annNote2.expressions: 960 expr_op_list, expr_cost = Comparison._generic_levenshtein_diff( 961 annNote1.expressions, 962 annNote2.expressions, 963 annNote1, 964 annNote2, 965 "expression", 966 ) 967 op_list.extend(expr_op_list) 968 cost += expr_cost 969 970 # add for gap from previous note or start of measure if first note in measure 971 # (i.e. horizontal position shift) 972 if annNote1.gap_dur != annNote2.gap_dur: 973 # in all cases, the edit is a simple horizontal shift of the note 974 cost += 1 975 if annNote1.gap_dur == 0: 976 op_list.append(("insspace", annNote1, annNote2, 1)) 977 elif annNote2.gap_dur == 0: 978 op_list.append(("delspace", annNote1, annNote2, 1)) 979 else: 980 # neither is zero 981 op_list.append(("editspace", annNote1, annNote2, 1)) 982 983 # add for noteshape 984 if annNote1.noteshape != annNote2.noteshape: 985 # always delete existing note shape and add the new one 986 cost += 2 987 op_list.append(("editnoteshape", annNote1, annNote2, 2)) 988 # add for noteheadFill 989 if annNote1.noteheadFill != annNote2.noteheadFill: 990 # always delete existing note fill and add the new one 991 cost += 2 992 op_list.append(("editnoteheadfill", annNote1, annNote2, 2)) 993 # add for noteheadParenthesis (True or False) 994 if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis: 995 # always either add or delete parentheses 996 cost += 1 997 op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1)) 998 # add for stemDirection 999 if annNote1.stemDirection != annNote2.stemDirection: 1000 stemdir_cost: int 1001 if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem': 1002 # gonna add a stem 1003 stemdir_cost = 1 1004 else: 1005 # gonna change a stem (add then delete) 1006 stemdir_cost = 2 1007 cost += stemdir_cost 1008 op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost)) 1009 # add for the styledict 1010 if annNote1.styledict != annNote2.styledict: 1011 cost += 1 1012 op_list.append(("editstyle", annNote1, annNote2, 1)) 1013 1014 return op_list, cost 1015 1016 @staticmethod 1017 @_memoize_beamtuplet_lev_diff 1018 def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which): 1019 """ 1020 Compute the levenshtein distance between two sequences of beaming or tuples. 1021 Arguments: 1022 original {list} -- list of strings (start, stop, continue or partial) 1023 compare_to {list} -- list of strings (start, stop, continue or partial) 1024 note1 {AnnNote} -- the note for referencing in the score 1025 note2 {AnnNote} -- the note for referencing in the score 1026 which -- a string: "beam" or "tuplet" depending what we are comparing 1027 """ 1028 if which not in ("beam", "tuplet"): 1029 raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'") 1030 1031 if len(original) == 0 and len(compare_to) == 0: 1032 return [], 0 1033 1034 if len(original) == 0: 1035 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1036 original, compare_to[1:], note1, note2, which 1037 ) 1038 op_list.append(("ins" + which, note1, note2, 1)) 1039 cost += 1 1040 return op_list, cost 1041 1042 if len(compare_to) == 0: 1043 op_list, cost = Comparison._beamtuplet_levenshtein_diff( 1044 original[1:], compare_to, note1, note2, which 1045 ) 1046 op_list.append(("del" + which, note1, note2, 1)) 1047 cost += 1 1048 return op_list, cost 1049 1050 # compute the cost and the op_list for the many possibilities of recursion 1051 cost = {} 1052 op_list = {} 1053 # delwhich 1054 op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff( 1055 original[1:], compare_to, note1, note2, which 1056 ) 1057 cost["del" + which] += 1 1058 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1059 # inswhich 1060 op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff( 1061 original, compare_to[1:], note1, note2, which 1062 ) 1063 cost["ins" + which] += 1 1064 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1065 # editwhich 1066 op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff( 1067 original[1:], compare_to[1:], note1, note2, which 1068 ) 1069 if original[0] == compare_to[0]: 1070 beam_diff_op_list = [] 1071 beam_diff_cost = 0 1072 else: 1073 beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1 1074 cost["edit" + which] += beam_diff_cost 1075 op_list["edit" + which].extend(beam_diff_op_list) 1076 # compute the minimum of the possibilities 1077 min_key = min(cost, key=cost.get) 1078 out = op_list[min_key], cost[min_key] 1079 return out 1080 1081 @staticmethod 1082 @_memoize_generic_lev_diff 1083 def _generic_levenshtein_diff(original, compare_to, note1, note2, which): 1084 """ 1085 Compute the Levenshtein distance between two generic sequences of symbols 1086 (e.g., articulations). 1087 Arguments: 1088 original {list} -- list of strings 1089 compare_to {list} -- list of strings 1090 note1 {AnnNote} -- the note for referencing in the score 1091 note2 {AnnNote} -- the note for referencing in the score 1092 which -- a string: e.g. "articulation" depending what we are comparing 1093 """ 1094 if len(original) == 0 and len(compare_to) == 0: 1095 return [], 0 1096 1097 if len(original) == 0: 1098 op_list, cost = Comparison._generic_levenshtein_diff( 1099 original, compare_to[1:], note1, note2, which 1100 ) 1101 op_list.append(("ins" + which, note1, note2, 1)) 1102 cost += 1 1103 return op_list, cost 1104 1105 if len(compare_to) == 0: 1106 op_list, cost = Comparison._generic_levenshtein_diff( 1107 original[1:], compare_to, note1, note2, which 1108 ) 1109 op_list.append(("del" + which, note1, note2, 1)) 1110 cost += 1 1111 return op_list, cost 1112 1113 # compute the cost and the op_list for the many possibilities of recursion 1114 cost = {} 1115 op_list = {} 1116 # delwhich 1117 op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff( 1118 original[1:], compare_to, note1, note2, which 1119 ) 1120 cost["del" + which] += 1 1121 op_list["del" + which].append(("del" + which, note1, note2, 1)) 1122 # inswhich 1123 op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff( 1124 original, compare_to[1:], note1, note2, which 1125 ) 1126 cost["ins" + which] += 1 1127 op_list["ins" + which].append(("ins" + which, note1, note2, 1)) 1128 # editwhich 1129 op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff( 1130 original[1:], compare_to[1:], note1, note2, which 1131 ) 1132 if original[0] == compare_to[0]: # to avoid perform the diff 1133 generic_diff_op_list = [] 1134 generic_diff_cost = 0 1135 else: 1136 generic_diff_op_list, generic_diff_cost = ( 1137 [("edit" + which, note1, note2, 1)], 1138 1, 1139 ) 1140 cost["edit" + which] += generic_diff_cost 1141 op_list["edit" + which].extend(generic_diff_op_list) 1142 # compute the minimum of the possibilities 1143 min_key = min(cost, key=cost.get) 1144 out = op_list[min_key], cost[min_key] 1145 return out 1146 1147 @staticmethod 1148 @_memoize_notes_set_distance 1149 def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]): 1150 """ 1151 Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in 1152 that order of importance). If you can't find an exactly matching note, try again without 1153 visual duration. 1154 original [list] -- a list of AnnNote (which are never chords) 1155 compare_to [list] -- a list of AnnNote (which are never chords) 1156 """ 1157 paired_notes: list[tuple[AnnNote, AnnNote]] = [] 1158 unpaired_orig_notes: list[AnnNote] = [] 1159 unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to) 1160 1161 for orig_n in original: 1162 fallback: AnnNote | None = None 1163 fallback_i: int = -1 1164 found_it: bool = False 1165 for i, comp_n in enumerate(unpaired_comp_notes): 1166 if orig_n.pitches[0][0] != comp_n.pitches[0][0]: 1167 # this pitch comparison (1) assumes the note is not a chord 1168 # (because we don't do chords when Voicing is not set, and 1169 # we only call _notes_set_distance when Voicing is not set), 1170 # and (2) only compares the visual position of the note (we 1171 # are ignoring the accidental here). This is so that an 1172 # accidental change will show up as a pitch edit, not a 1173 # note remove/insert. 1174 continue 1175 if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset): 1176 continue 1177 if orig_n.note_is_grace != comp_n.note_is_grace: 1178 continue 1179 if fallback is None: 1180 fallback = comp_n 1181 fallback_i = i 1182 1183 if orig_n.note_dur_type != comp_n.note_dur_type: 1184 continue 1185 if orig_n.note_dur_dots != comp_n.note_dur_dots: 1186 continue 1187 1188 # found a perfect match 1189 paired_notes.append((orig_n, comp_n)) 1190 1191 # remove comp_n from unpaired_comp_notes 1192 unpaired_comp_notes.pop(i) # remove(comp_n) would sometimes get the wrong one 1193 1194 found_it = True 1195 break 1196 1197 if found_it: 1198 # on to the next original note 1199 continue 1200 1201 # did we find a fallback (matched except for duration)? 1202 if fallback is not None: 1203 paired_notes.append((orig_n, fallback)) 1204 unpaired_comp_notes.pop(fallback_i) 1205 continue 1206 1207 # we found nothing 1208 unpaired_orig_notes.append(orig_n) 1209 1210 # compute the cost and the op_list 1211 cost: int = 0 1212 op_list: list = [] 1213 1214 # notedel 1215 if unpaired_orig_notes: 1216 for an in unpaired_orig_notes: 1217 cost += an.notation_size() 1218 op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord)) 1219 1220 # noteins 1221 if unpaired_comp_notes: 1222 for an in unpaired_comp_notes: 1223 cost += an.notation_size() 1224 op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord)) 1225 1226 # notesub 1227 if paired_notes: 1228 for ano, anc in paired_notes: 1229 if ano == anc: 1230 # if equal, avoid _annotated_note_diff call 1231 notesub_op, notesub_cost = [], 0 1232 else: 1233 notesub_op, notesub_cost = ( 1234 Comparison._annotated_note_diff(ano, anc) 1235 ) 1236 cost += notesub_cost 1237 op_list.extend(notesub_op) 1238 1239 return op_list, cost 1240 1241 @staticmethod 1242 @_memoize_extras_set_distance 1243 def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]): 1244 """ 1245 Gather up pairs of matching extras (using kind, offset, and visual duration, in 1246 that order of importance). If you can't find an exactly matching extra, try again 1247 without visual duration. 1248 original [list] -- a list of AnnExtras 1249 compare_to [list] -- a list of AnnExtras 1250 """ 1251 paired_extras: list[tuple[AnnExtra, AnnExtra]] = [] 1252 unpaired_orig_extras: list[AnnExtra] = [] 1253 unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to) 1254 1255 for orig_x in original: 1256 fallback: AnnExtra | None = None 1257 fallback_i: int = -1 1258 found_it: bool = False 1259 for i, comp_x in enumerate(unpaired_comp_extras): 1260 # kind and offset are required for pairing 1261 if orig_x.kind != comp_x.kind: 1262 continue 1263 if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset): 1264 continue 1265 if fallback is None: 1266 fallback = comp_x 1267 fallback_i = i 1268 1269 # duration is preferred for pairing 1270 if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration): 1271 continue 1272 1273 # there are a few kind-specific elements that are also preferred: 1274 # 'direction'/'ending': content (visible text) 1275 # 'keysig'/'timesig'/'clef': symbolic (there are sometimes two 1276 # simultaneous keysigs, timesigs, or clefs, and we don't 1277 # want to confuse which one is which, producing a diff where 1278 # there actually isn't one) 1279 # 'slur': placements (because there are often two identical slurs 1280 # whose only difference is 'above' vs 'below', producing a diff 1281 # where there actually isn't one) 1282 if orig_x.kind in ('direction', 'ending'): 1283 if orig_x.content != comp_x.content: 1284 continue 1285 if orig_x.kind == 'clef': 1286 if orig_x.symbolic != comp_x.symbolic: 1287 continue 1288 if orig_x.kind in ('keysig', 'timesig'): 1289 if orig_x.infodict != comp_x.infodict: 1290 continue 1291 if orig_x.kind == 'slur': 1292 orig_placement: str = orig_x.styledict.get('placement', '') 1293 comp_placement: str = comp_x.styledict.get('placement', '') 1294 if orig_placement != comp_placement: 1295 continue 1296 1297 # found a perfect match 1298 paired_extras.append((orig_x, comp_x)) 1299 1300 # remove comp_n from unpaired_comp_extras 1301 unpaired_comp_extras.pop(i) # remove(comp_n) would sometimes get the wrong one 1302 1303 found_it = True 1304 break 1305 1306 if found_it: 1307 # on to the next original extra 1308 continue 1309 1310 # did we find a fallback (matched except for duration)? 1311 if fallback is not None: 1312 paired_extras.append((orig_x, fallback)) 1313 unpaired_comp_extras.pop(fallback_i) 1314 continue 1315 1316 # we found nothing 1317 unpaired_orig_extras.append(orig_x) 1318 1319 # compute the cost and the op_list 1320 cost: int = 0 1321 op_list: list = [] 1322 1323 # extradel 1324 for extra in unpaired_orig_extras: 1325 cost += extra.notation_size() 1326 op_list.append(("extradel", extra, None, extra.notation_size())) 1327 1328 # extrains 1329 for extra in unpaired_comp_extras: 1330 cost += extra.notation_size() 1331 op_list.append(("extrains", None, extra, extra.notation_size())) 1332 1333 # extrasub 1334 if paired_extras: 1335 for extrao, extrac in paired_extras: 1336 if extrao == extrac: 1337 # if equal, avoid _annotated_extra_diff call 1338 extrasub_op, extrasub_cost = [], 0 1339 else: 1340 extrasub_op, extrasub_cost = ( 1341 Comparison._annotated_extra_diff(extrao, extrac) 1342 ) 1343 cost += extrasub_cost 1344 op_list.extend(extrasub_op) 1345 1346 return op_list, cost 1347 1348 @staticmethod 1349 @_memoize_metadata_items_set_distance 1350 def _metadata_items_set_distance( 1351 original: list[AnnMetadataItem], 1352 compare_to: list[AnnMetadataItem] 1353 ): 1354 """ 1355 Gather up pairs of matching metadata_items (using key and value). If you can't find 1356 an exactly matching metadata_item, try again without value. 1357 original [list] -- a list of AnnMetadataItems 1358 compare_to [list] -- a list of AnnMetadataItems 1359 """ 1360 paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = [] 1361 unpaired_orig_metadata_items: list[AnnMetadataItem] = [] 1362 unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to) 1363 1364 # first look for perfect matches 1365 for orig_mdi in original: 1366 found_it: bool = False 1367 for i, comp_mdi in enumerate(unpaired_comp_metadata_items): 1368 # key is required for perfect match 1369 if orig_mdi.key != comp_mdi.key: 1370 continue 1371 1372 # value is required for perfect match 1373 if orig_mdi.value != comp_mdi.value: 1374 continue 1375 1376 # found a perfect match 1377 paired_metadata_items.append((orig_mdi, comp_mdi)) 1378 1379 # remove comp_mdi from unpaired_comp_metadata_items 1380 unpaired_comp_metadata_items.pop(i) 1381 1382 found_it = True 1383 break 1384 1385 if found_it: 1386 # on to the next original metadata_item 1387 continue 1388 1389 # we found no perfect match 1390 unpaired_orig_metadata_items.append(orig_mdi) 1391 1392 # now look among the unpaired remainders for key match only 1393 remove_unpaired_orig_items: list[AnnMetadataItem] = [] 1394 for orig_mdi in unpaired_orig_metadata_items: 1395 for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items): 1396 # key is required for key-only match 1397 if orig_mdi.key != comp_mdi.key: 1398 continue 1399 1400 # found a key-only match 1401 paired_metadata_items.append((orig_mdi, comp_mdi)) 1402 1403 # remove comp_mdi from unpaired_comp_metadata_items 1404 unpaired_comp_metadata_items.pop(comp_idx) 1405 1406 # make a note of unpaired_orig_metadata_item to remove later 1407 remove_unpaired_orig_items.append(orig_mdi) 1408 break 1409 1410 for remove_mdi in remove_unpaired_orig_items: 1411 unpaired_orig_metadata_items.remove(remove_mdi) 1412 1413 # compute the cost and the op_list 1414 cost: int = 0 1415 op_list: list = [] 1416 1417 # mditemdel 1418 for metadata_item in unpaired_orig_metadata_items: 1419 cost += metadata_item.notation_size() 1420 op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size())) 1421 1422 # mditemins 1423 for metadata_item in unpaired_comp_metadata_items: 1424 cost += metadata_item.notation_size() 1425 op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size())) 1426 1427 # mditemsub 1428 if paired_metadata_items: 1429 for metadata_itemo, metadata_itemc in paired_metadata_items: 1430 if metadata_itemo == metadata_itemc: 1431 # if equal, avoid _annotated_metadata_item_diff call 1432 metadata_itemsub_op, metadata_itemsub_cost = [], 0 1433 else: 1434 metadata_itemsub_op, metadata_itemsub_cost = ( 1435 Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc) 1436 ) 1437 cost += metadata_itemsub_cost 1438 op_list.extend(metadata_itemsub_op) 1439 1440 return op_list, cost 1441 1442 @staticmethod 1443 @_memoize_staff_groups_set_distance 1444 def _staff_groups_set_distance( 1445 original: list[AnnStaffGroup], 1446 compare_to: list[AnnStaffGroup] 1447 ): 1448 """ 1449 Gather up pairs of matching staffgroups (using start_index and end_index, in 1450 that order of importance). If you can't find an exactly matching staffgroup, 1451 try again without end_index. 1452 original [list] -- a list of AnnStaffGroups 1453 compare_to [list] -- a list of AnnStaffGroups 1454 """ 1455 paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = [] 1456 unpaired_orig_staff_groups: list[AnnStaffGroup] = [] 1457 unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to) 1458 1459 for orig_sg in original: 1460 fallback: AnnStaffGroup | None = None 1461 fallback_i: int = -1 1462 found_it: bool = False 1463 for i, comp_sg in enumerate(unpaired_comp_staff_groups): 1464 # start index is required for pairing 1465 if orig_sg.part_indices[0] != comp_sg.part_indices[0]: 1466 continue 1467 if fallback is None: 1468 fallback = comp_sg 1469 fallback_i = i 1470 1471 # end index and symbol (brace, bracket, etc) is preferred for pairing 1472 if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]: 1473 continue 1474 if orig_sg.symbol != comp_sg.symbol: 1475 continue 1476 1477 # found a perfect match 1478 paired_staff_groups.append((orig_sg, comp_sg)) 1479 1480 # remove comp_n from unpaired_comp_staff_groups 1481 unpaired_comp_staff_groups.pop(i) 1482 1483 found_it = True 1484 break 1485 1486 if found_it: 1487 # on to the next original staff_group 1488 continue 1489 1490 # did we find a fallback (matched except for duration)? 1491 if fallback is not None: 1492 paired_staff_groups.append((orig_sg, fallback)) 1493 unpaired_comp_staff_groups.pop(fallback_i) 1494 continue 1495 1496 # we found nothing 1497 unpaired_orig_staff_groups.append(orig_sg) 1498 1499 # compute the cost and the op_list 1500 cost: int = 0 1501 op_list: list = [] 1502 1503 # staffgrpdel 1504 for staff_group in unpaired_orig_staff_groups: 1505 cost += staff_group.notation_size() 1506 op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size())) 1507 1508 # staffgrpins 1509 for staff_group in unpaired_comp_staff_groups: 1510 cost += staff_group.notation_size() 1511 op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size())) 1512 1513 # staffgrpsub 1514 if paired_staff_groups: 1515 for staff_groupo, staff_groupc in paired_staff_groups: 1516 if staff_groupo == staff_groupc: 1517 # if equal, avoid _annotated_staff_group_diff call 1518 staff_groupsub_op, staff_groupsub_cost = [], 0 1519 else: 1520 staff_groupsub_op, staff_groupsub_cost = ( 1521 Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc) 1522 ) 1523 cost += staff_groupsub_cost 1524 op_list.extend(staff_groupsub_op) 1525 1526 return op_list, cost 1527 1528 @staticmethod 1529 def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]): 1530 """ 1531 Compare all the possible voices permutations, considering also deletion and 1532 insertion (equation on office lens). 1533 original [list] -- a list of Voice 1534 compare_to [list] -- a list of Voice 1535 """ 1536 if len(original) == 0 and len(compare_to) == 0: # stop the recursion 1537 return [], 0 1538 1539 if len(original) == 0: 1540 # insertion 1541 op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:]) 1542 # add for the inserted voice 1543 op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size())) 1544 cost += compare_to[0].notation_size() 1545 return op_list, cost 1546 1547 if len(compare_to) == 0: 1548 # deletion 1549 op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to) 1550 # add for the deleted voice 1551 op_list.append(("voicedel", original[0], None, original[0].notation_size())) 1552 cost += original[0].notation_size() 1553 return op_list, cost 1554 1555 cost = {} 1556 op_list = {} 1557 # deletion 1558 op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive( 1559 original[1:], compare_to 1560 ) 1561 op_list["voicedel"].append( 1562 ("voicedel", original[0], None, original[0].notation_size()) 1563 ) 1564 cost["voicedel"] += original[0].notation_size() 1565 for i, c in enumerate(compare_to): 1566 # substitution 1567 ( 1568 op_list["voicesub" + str(i)], 1569 cost["voicesub" + str(i)], 1570 ) = Comparison._voices_coupling_recursive( 1571 original[1:], compare_to[:i] + compare_to[i + 1:] 1572 ) 1573 if ( 1574 compare_to[0] != original[0] 1575 ): # add the cost of the sub and the operations from inside_bar_diff 1576 op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin( 1577 original[0].annot_notes, c.annot_notes 1578 ) # compute the distance from original[0] and compare_to[i] 1579 op_list["voicesub" + str(i)].extend(op_list_inside_bar) 1580 cost["voicesub" + str(i)] += cost_inside_bar 1581 # compute the minimum of the possibilities 1582 min_key = min(cost, key=cost.get) 1583 out = op_list[min_key], cost[min_key] 1584 return out 1585 1586 @staticmethod 1587 def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]: 1588 ''' 1589 Compare two annotated scores, computing an operations list and the cost of applying those 1590 operations to the first score to generate the second score. 1591 1592 Args: 1593 score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare. 1594 score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare. 1595 1596 Returns: 1597 list[tuple], int: The operations list and the cost 1598 ''' 1599 # Clear all memoizer caches, in case we are called again with different scores. 1600 # The cached results are no longer valid. 1601 Comparison._clear_memoizer_caches() 1602 1603 op_list_total: list[tuple] = [] 1604 cost_total: int = 0 1605 1606 if score1.n_of_parts == score2.n_of_parts: 1607 n_of_parts = score1.n_of_parts 1608 else: 1609 # The two scores have differing number of parts. For now, assume that 1610 # the parts are in the same order in both scores, and that the missing 1611 # parts are the ones that should have been at the end of the smaller 1612 # score. In future we could do something like we do with voices, where 1613 # we try all the combinations and compare the most-similar pairs of 1614 # parts, and the rest are considered to be the extra parts (deleted 1615 # from score1, or inserted into score2). 1616 n_of_parts = min(score1.n_of_parts, score2.n_of_parts) 1617 if score1.n_of_parts > score2.n_of_parts: 1618 # score1 has more parts that must be deleted 1619 for part_idx in range(score2.n_of_parts, score1.n_of_parts): 1620 deleted_part = score1.part_list[part_idx] 1621 op_list_total.append( 1622 ( 1623 "delpart", 1624 deleted_part, 1625 None, 1626 deleted_part.notation_size() 1627 ) 1628 ) 1629 cost_total += deleted_part.notation_size() 1630 else: 1631 # score2 has more parts that must be inserted 1632 for part_idx in range(score1.n_of_parts, score2.n_of_parts): 1633 inserted_part = score2.part_list[part_idx] 1634 op_list_total.append( 1635 ( 1636 "inspart", 1637 None, 1638 inserted_part, 1639 inserted_part.notation_size() 1640 ) 1641 ) 1642 cost_total += inserted_part.notation_size() 1643 1644 # iterate over parts that exist in both scores 1645 for p_number in range(n_of_parts): 1646 # compute non-common-subseq 1647 ncs = Comparison._non_common_subsequences_of_measures( 1648 score1.part_list[p_number].bar_list, 1649 score2.part_list[p_number].bar_list, 1650 ) 1651 # compute blockdiff 1652 for subseq in ncs: 1653 op_list_block, cost_block = Comparison._block_diff_lin( 1654 subseq["original"], subseq["compare_to"] 1655 ) 1656 op_list_total.extend(op_list_block) 1657 cost_total += cost_block 1658 1659 # compare the staff groups 1660 groups_op_list, groups_cost = Comparison._staff_groups_set_distance( 1661 score1.staff_group_list, score2.staff_group_list 1662 ) 1663 op_list_total.extend(groups_op_list) 1664 cost_total += groups_cost 1665 1666 # compare the metadata items 1667 mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance( 1668 score1.metadata_items_list, score2.metadata_items_list 1669 ) 1670 op_list_total.extend(mditems_op_list) 1671 cost_total += mditems_cost 1672 1673 # Add the cost of any syntax errors in score1 that were fixed during parsing. 1674 # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency. 1675 total_syms: int = score1.notation_size() + score2.notation_size() 1676 cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed 1677 if cost_plus_errors > total_syms: 1678 adjustment: int = cost_plus_errors - total_syms 1679 score1.num_syntax_errors_fixed -= adjustment 1680 1681 cost_total += score1.num_syntax_errors_fixed 1682 1683 return op_list_total, cost_total
@staticmethod
def
annotated_scores_diff( score1: musicdiff.annotation.AnnScore, score2: musicdiff.annotation.AnnScore) -> tuple[list[tuple], int]:
1586 @staticmethod 1587 def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]: 1588 ''' 1589 Compare two annotated scores, computing an operations list and the cost of applying those 1590 operations to the first score to generate the second score. 1591 1592 Args: 1593 score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare. 1594 score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare. 1595 1596 Returns: 1597 list[tuple], int: The operations list and the cost 1598 ''' 1599 # Clear all memoizer caches, in case we are called again with different scores. 1600 # The cached results are no longer valid. 1601 Comparison._clear_memoizer_caches() 1602 1603 op_list_total: list[tuple] = [] 1604 cost_total: int = 0 1605 1606 if score1.n_of_parts == score2.n_of_parts: 1607 n_of_parts = score1.n_of_parts 1608 else: 1609 # The two scores have differing number of parts. For now, assume that 1610 # the parts are in the same order in both scores, and that the missing 1611 # parts are the ones that should have been at the end of the smaller 1612 # score. In future we could do something like we do with voices, where 1613 # we try all the combinations and compare the most-similar pairs of 1614 # parts, and the rest are considered to be the extra parts (deleted 1615 # from score1, or inserted into score2). 1616 n_of_parts = min(score1.n_of_parts, score2.n_of_parts) 1617 if score1.n_of_parts > score2.n_of_parts: 1618 # score1 has more parts that must be deleted 1619 for part_idx in range(score2.n_of_parts, score1.n_of_parts): 1620 deleted_part = score1.part_list[part_idx] 1621 op_list_total.append( 1622 ( 1623 "delpart", 1624 deleted_part, 1625 None, 1626 deleted_part.notation_size() 1627 ) 1628 ) 1629 cost_total += deleted_part.notation_size() 1630 else: 1631 # score2 has more parts that must be inserted 1632 for part_idx in range(score1.n_of_parts, score2.n_of_parts): 1633 inserted_part = score2.part_list[part_idx] 1634 op_list_total.append( 1635 ( 1636 "inspart", 1637 None, 1638 inserted_part, 1639 inserted_part.notation_size() 1640 ) 1641 ) 1642 cost_total += inserted_part.notation_size() 1643 1644 # iterate over parts that exist in both scores 1645 for p_number in range(n_of_parts): 1646 # compute non-common-subseq 1647 ncs = Comparison._non_common_subsequences_of_measures( 1648 score1.part_list[p_number].bar_list, 1649 score2.part_list[p_number].bar_list, 1650 ) 1651 # compute blockdiff 1652 for subseq in ncs: 1653 op_list_block, cost_block = Comparison._block_diff_lin( 1654 subseq["original"], subseq["compare_to"] 1655 ) 1656 op_list_total.extend(op_list_block) 1657 cost_total += cost_block 1658 1659 # compare the staff groups 1660 groups_op_list, groups_cost = Comparison._staff_groups_set_distance( 1661 score1.staff_group_list, score2.staff_group_list 1662 ) 1663 op_list_total.extend(groups_op_list) 1664 cost_total += groups_cost 1665 1666 # compare the metadata items 1667 mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance( 1668 score1.metadata_items_list, score2.metadata_items_list 1669 ) 1670 op_list_total.extend(mditems_op_list) 1671 cost_total += mditems_cost 1672 1673 # Add the cost of any syntax errors in score1 that were fixed during parsing. 1674 # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency. 1675 total_syms: int = score1.notation_size() + score2.notation_size() 1676 cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed 1677 if cost_plus_errors > total_syms: 1678 adjustment: int = cost_plus_errors - total_syms 1679 score1.num_syntax_errors_fixed -= adjustment 1680 1681 cost_total += score1.num_syntax_errors_fixed 1682 1683 return op_list_total, cost_total
Compare two annotated scores, computing an operations list and the cost of applying those operations to the first score to generate the second score.
Arguments:
- score1 (
musicdiff.annotation.AnnScore
): The first annotated score to compare. - score2 (
musicdiff.annotation.AnnScore
): The second annotated score to compare.
Returns:
list[tuple], int: The operations list and the cost