musicdiff.comparison

   1# ------------------------------------------------------------------------------
   2# Purpose:       comparison is a music comparison package for use by musicdiff.
   3#                musicdiff is a package for comparing music scores using music21.
   4#
   5# Authors:       Greg Chapman <gregc@mac.com>
   6#                musicdiff is derived from:
   7#                   https://github.com/fosfrancesco/music-score-diff.git
   8#                   by Francesco Foscarin <foscarin.francesco@gmail.com>
   9#
  10# Copyright:     (c) 2022-2025 Francesco Foscarin, Greg Chapman
  11# License:       MIT, see LICENSE
  12# ------------------------------------------------------------------------------
  13
  14__docformat__ = "google"
  15
  16import copy
  17from collections import namedtuple
  18from difflib import ndiff
  19from pathlib import Path
  20
  21# import typing as t
  22import numpy as np
  23
  24from music21.common import OffsetQL
  25from musicdiff.annotation import AnnScore, AnnNote, AnnVoice, AnnExtra, AnnLyric
  26from musicdiff.annotation import AnnStaffGroup, AnnMetadataItem
  27from musicdiff import M21Utils
  28
  29class EvaluationMetrics:
  30    def __init__(
  31        self,
  32        gt_path: Path,
  33        pred_path: Path,
  34        gt_numsyms: int,
  35        pred_numsyms: int,
  36        omr_edit_distance: int,
  37        edit_distances_dict: dict[str, int],
  38        omr_ned: float
  39    ):
  40        self.gt_path: Path = gt_path
  41        self.pred_path: Path = pred_path
  42        self.gt_numsyms: int = gt_numsyms
  43        self.pred_numsyms: int = pred_numsyms
  44        self.omr_edit_distance: int = omr_edit_distance
  45        self.edit_distances_dict: dict[str, int] = edit_distances_dict
  46        self.omr_ned: float = omr_ned
  47
  48# memoizers to speed up the recursive computation
  49def _memoize_notes_set_distance(func):
  50    def memoizer(original, compare_to):
  51        key = repr(original) + repr(compare_to)
  52        if key not in Comparison._memoizer_mem:
  53            Comparison._memoizer_mem[key] = func(original, compare_to)
  54        return copy.deepcopy(Comparison._memoizer_mem[key])
  55
  56    return memoizer
  57
  58def _memoize_extras_set_distance(func):
  59    def memoizer(original, compare_to):
  60        key = repr(original) + repr(compare_to)
  61        if key not in Comparison._memoizer_mem:
  62            Comparison._memoizer_mem[key] = func(original, compare_to)
  63        return copy.deepcopy(Comparison._memoizer_mem[key])
  64
  65    return memoizer
  66
  67def _memoize_staff_groups_set_distance(func):
  68    def memoizer(original, compare_to):
  69        key = repr(original) + repr(compare_to)
  70        if key not in Comparison._memoizer_mem:
  71            Comparison._memoizer_mem[key] = func(original, compare_to)
  72        return copy.deepcopy(Comparison._memoizer_mem[key])
  73
  74    return memoizer
  75
  76def _memoize_metadata_items_set_distance(func):
  77    def memoizer(original, compare_to):
  78        key = repr(original) + repr(compare_to)
  79        if key not in Comparison._memoizer_mem:
  80            Comparison._memoizer_mem[key] = func(original, compare_to)
  81        return copy.deepcopy(Comparison._memoizer_mem[key])
  82
  83    return memoizer
  84
  85def _memoize_inside_bars_diff_lin(func):
  86    def memoizer(original, compare_to):
  87        key = repr(original) + repr(compare_to)
  88        if key not in Comparison._memoizer_mem:
  89            Comparison._memoizer_mem[key] = func(original, compare_to)
  90        return copy.deepcopy(Comparison._memoizer_mem[key])
  91
  92    return memoizer
  93
  94def _memoize_lyrics_diff_lin(func):
  95    def memoizer(original, compare_to):
  96        key = repr(original) + repr(compare_to)
  97        if key not in Comparison._memoizer_mem:
  98            Comparison._memoizer_mem[key] = func(original, compare_to)
  99        return copy.deepcopy(Comparison._memoizer_mem[key])
 100
 101    return memoizer
 102
 103def _memoize_block_diff_lin(func):
 104    def memoizer(original, compare_to):
 105        key = repr(original) + repr(compare_to)
 106        if key not in Comparison._memoizer_mem:
 107            Comparison._memoizer_mem[key] = func(original, compare_to)
 108        return copy.deepcopy(Comparison._memoizer_mem[key])
 109
 110    return memoizer
 111
 112def _memoize_pitches_lev_diff(func):
 113    def memoizer(original, compare_to, noteNode1, noteNode2, ids):
 114        key = (
 115            repr(original)
 116            + repr(compare_to)
 117            + repr(noteNode1)
 118            + repr(noteNode2)
 119            + repr(ids)
 120        )
 121        if key not in Comparison._memoizer_mem:
 122            Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, ids)
 123        return copy.deepcopy(Comparison._memoizer_mem[key])
 124
 125    return memoizer
 126
 127def _memoize_beamtuplet_lev_diff(func):
 128    def memoizer(original, compare_to, noteNode1, noteNode2, which):
 129        key = (
 130            repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which
 131        )
 132        if key not in Comparison._memoizer_mem:
 133            Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which)
 134        return copy.deepcopy(Comparison._memoizer_mem[key])
 135
 136    return memoizer
 137
 138def _memoize_generic_lev_diff(func):
 139    def memoizer(original, compare_to, noteNode1, noteNode2, which):
 140        key = (
 141            repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which
 142        )
 143        if key not in Comparison._memoizer_mem:
 144            Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which)
 145        return copy.deepcopy(Comparison._memoizer_mem[key])
 146
 147    return memoizer
 148
 149class Comparison:
 150    _memoizer_mem: dict = {}
 151
 152    @staticmethod
 153    def _clear_memoizer_caches():
 154        Comparison._memoizer_mem = {}
 155
 156    @staticmethod
 157    def _myers_diff(a_lines, b_lines):
 158        # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2)
 159        # This marks the farthest-right point along each diagonal in the edit
 160        # graph, along with the history that got it there
 161        Frontier = namedtuple("Frontier", ["x", "history"])
 162        frontier = {1: Frontier(0, [])}
 163
 164        a_max = len(a_lines)
 165        b_max = len(b_lines)
 166        for d in range(0, a_max + b_max + 1):
 167            for k in range(-d, d + 1, 2):
 168                # This determines whether our next search point will be going down
 169                # in the edit graph, or to the right.
 170                #
 171                # The intuition for this is that we should go down if we're on the
 172                # left edge (k == -d) to make sure that the left edge is fully
 173                # explored.
 174                #
 175                # If we aren't on the top (k != d), then only go down if going down
 176                # would take us to territory that hasn't sufficiently been explored
 177                # yet.
 178                go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x)
 179
 180                # Figure out the starting point of this iteration. The diagonal
 181                # offsets come from the geometry of the edit grid - if you're going
 182                # down, your diagonal is lower, and if you're going right, your
 183                # diagonal is higher.
 184                if go_down:
 185                    old_x, history = frontier[k + 1]
 186                    x = old_x
 187                else:
 188                    old_x, history = frontier[k - 1]
 189                    x = old_x + 1
 190
 191                # We want to avoid modifying the old history, since some other step
 192                # may decide to use it.
 193                history = history[:]
 194                y = x - k
 195
 196                # We start at the invalid point (0, 0) - we should only start building
 197                # up history when we move off of it.
 198                if 1 <= y <= b_max and go_down:
 199                    history.append((1, b_lines[y - 1][1]))  # add comparetostep
 200                elif 1 <= x <= a_max:
 201                    history.append((0, a_lines[x - 1][1]))  # add originalstep
 202
 203                # Chew up as many diagonal moves as we can - these correspond to common lines,
 204                # and they're considered "free" by the algorithm because we want to maximize
 205                # the number of these in the output.
 206                while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]:
 207                    x += 1
 208                    y += 1
 209                    history.append((2, a_lines[x - 1][1]))  # add equal step
 210
 211                if x >= a_max and y >= b_max:
 212                    # If we're here, then we've traversed through the bottom-left corner,
 213                    # and are done.
 214                    return np.array(history)
 215
 216                frontier[k] = Frontier(x, history)
 217
 218        assert False, "Could not find edit script"
 219
 220    @staticmethod
 221    def _non_common_subsequences_myers(original, compare_to):
 222        # Both original and compare_to are list of lists, or numpy arrays with 2 columns.
 223        # This is necessary because bars need two representation at the same time.
 224        # One without the id (for comparison), and one with the id (to retrieve the bar
 225        # at the end).
 226
 227        # get the list of operations
 228        op_list = Comparison._myers_diff(
 229            np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64)
 230        )[::-1]
 231        # retrieve the non common subsequences
 232        non_common_subsequences = []
 233        non_common_subsequences.append({"original": [], "compare_to": []})
 234        ind = 0
 235        for op in op_list[::-1]:
 236            if op[0] == 2:  # equal
 237                non_common_subsequences.append({"original": [], "compare_to": []})
 238                ind += 1
 239            elif op[0] == 0:  # original step
 240                non_common_subsequences[ind]["original"].append(op[1])
 241            elif op[0] == 1:  # compare to step
 242                non_common_subsequences[ind]["compare_to"].append(op[1])
 243        # remove the empty dict from the list
 244        non_common_subsequences = [
 245            s for s in non_common_subsequences if s != {"original": [], "compare_to": []}
 246        ]
 247        return non_common_subsequences
 248
 249    @staticmethod
 250    def _non_common_subsequences_of_measures(original_m, compare_to_m):
 251        # Take the hash for each measure to run faster comparison
 252        # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison),
 253        # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after
 254        # computation)
 255        original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m]
 256        compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m]
 257        ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int)
 258        # retrieve the original pointers to measures
 259        new_out = []
 260        for e in ncs:
 261            new_out.append({})
 262            for k in e.keys():
 263                new_out[-1][k] = []
 264                for repr_hash in e[k]:
 265                    if k == "original":
 266                        new_out[-1][k].append(
 267                            next(m for m in original_m if m.precomputed_repr == repr_hash)
 268                        )
 269                    else:
 270                        new_out[-1][k].append(
 271                            next(m for m in compare_to_m if m.precomputed_repr == repr_hash)
 272                        )
 273
 274        return new_out
 275
 276    @staticmethod
 277    @_memoize_pitches_lev_diff
 278    def _pitches_levenshtein_diff(
 279        original: list[tuple[str, str, bool]],
 280        compare_to: list[tuple[str, str, bool]],
 281        noteNode1: AnnNote,
 282        noteNode2: AnnNote,
 283        ids: tuple[int, int]
 284    ):
 285        """
 286        Compute the levenshtein distance between two sequences of pitches.
 287        Arguments:
 288            original {list} -- list of pitches
 289            compare_to {list} -- list of pitches
 290            noteNode1 {annotatedNote} --for referencing
 291            noteNode2 {annotatedNote} --for referencing
 292            ids {tuple} -- a tuple of 2 elements with the indices of the notes considered
 293        """
 294        if len(original) == 0 and len(compare_to) == 0:
 295            return [], 0
 296
 297        if len(original) == 0:
 298            op_list, cost = Comparison._pitches_levenshtein_diff(
 299                original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 300            )
 301            op_list.append(
 302                ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 303            )
 304            cost += M21Utils.pitch_size(compare_to[0])
 305            return op_list, cost
 306
 307        if len(compare_to) == 0:
 308            op_list, cost = Comparison._pitches_levenshtein_diff(
 309                original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 310            )
 311            op_list.append(
 312                ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 313            )
 314            cost += M21Utils.pitch_size(original[0])
 315            return op_list, cost
 316
 317        # compute the cost and the op_list for the many possibilities of recursion
 318        cost_dict = {}
 319        op_list_dict = {}
 320        # del-pitch
 321        op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff(
 322            original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 323        )
 324        cost_dict["delpitch"] += M21Utils.pitch_size(original[0])
 325        op_list_dict["delpitch"].append(
 326            ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 327        )
 328        # ins-pitch
 329        op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff(
 330            original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 331        )
 332        cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0])
 333        op_list_dict["inspitch"].append(
 334            ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 335        )
 336        # edit-pitch
 337        op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff(
 338            original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1)
 339        )
 340        if original[0] == compare_to[0]:  # to avoid perform the pitch_diff
 341            pitch_diff_op_list = []
 342            pitch_diff_cost = 0
 343        else:
 344            pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff(
 345                original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1])
 346            )
 347        cost_dict["editpitch"] += pitch_diff_cost
 348        op_list_dict["editpitch"].extend(pitch_diff_op_list)
 349        # compute the minimum of the possibilities
 350        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 351        out = op_list_dict[min_key], cost_dict[min_key]
 352        return out
 353
 354    @staticmethod
 355    def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids):
 356        """
 357        Compute the differences between two pitch (definition from the paper).
 358        a pitch consist of a tuple: pitch name (letter+number), accidental, tie.
 359        param : pitch1. The music_notation_repr tuple of note1
 360        param : pitch2. The music_notation_repr tuple of note2
 361        param : noteNode1. The noteNode where pitch1 belongs
 362        param : noteNode2. The noteNode where pitch2 belongs
 363        param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord
 364        Returns:
 365            [list] -- the list of differences
 366            [int] -- the cost of diff
 367        """
 368        cost = 0
 369        op_list = []
 370        # add for pitch name differences
 371        if pitch1[0] != pitch2[0]:
 372            cost += 1
 373            # TODO: select the note in a more precise way in case of a chord
 374            # rest to note
 375            if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"):  # xor
 376                op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids))
 377            else:  # they are two notes
 378                op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids))
 379
 380        # add for the accidentals
 381        if pitch1[1] != pitch2[1]:  # if the accidental is different
 382            if pitch1[1] == "None":
 383                assert pitch2[1] != "None"
 384                cost += 1
 385                op_list.append(("accidentins", noteNode1, noteNode2, 1, ids))
 386            elif pitch2[1] == "None":
 387                assert pitch1[1] != "None"
 388                cost += 1
 389                op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids))
 390            else:  # a different type of alteration is present
 391                cost += 2  # delete then add
 392                op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids))
 393        # add for the ties
 394        if pitch1[2] != pitch2[2]:
 395            # exclusive or. Add if one is tied and not the other.
 396            # probably to revise for chords
 397            cost += 1
 398            if pitch1[2]:
 399                assert not pitch2[2]
 400                op_list.append(("tiedel", noteNode1, noteNode2, 1, ids))
 401            elif pitch2[2]:
 402                assert not pitch1[2]
 403                op_list.append(("tieins", noteNode1, noteNode2, 1, ids))
 404        return op_list, cost
 405
 406    @staticmethod
 407    @_memoize_block_diff_lin
 408    def _block_diff_lin(original, compare_to):
 409        if len(original) == 0 and len(compare_to) == 0:
 410            return [], 0
 411
 412        if len(original) == 0:
 413            op_list, cost = Comparison._block_diff_lin(original, compare_to[1:])
 414            cost += compare_to[0].notation_size()
 415            op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size()))
 416            return op_list, cost
 417
 418        if len(compare_to) == 0:
 419            op_list, cost = Comparison._block_diff_lin(original[1:], compare_to)
 420            cost += original[0].notation_size()
 421            op_list.append(("delbar", original[0], None, original[0].notation_size()))
 422            return op_list, cost
 423
 424        # compute the cost and the op_list for the many possibilities of recursion
 425        cost_dict = {}
 426        op_list_dict = {}
 427        # del-bar
 428        op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin(
 429            original[1:], compare_to
 430        )
 431        cost_dict["delbar"] += original[0].notation_size()
 432        op_list_dict["delbar"].append(
 433            ("delbar", original[0], None, original[0].notation_size())
 434        )
 435        # ins-bar
 436        op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin(
 437            original, compare_to[1:]
 438        )
 439        cost_dict["insbar"] += compare_to[0].notation_size()
 440        op_list_dict["insbar"].append(
 441            ("insbar", None, compare_to[0], compare_to[0].notation_size())
 442        )
 443        # edit-bar
 444        op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin(
 445            original[1:], compare_to[1:]
 446        )
 447        if (
 448            original[0] == compare_to[0]
 449        ):  # to avoid performing the _voices_coupling_recursive/_notes_set_distance
 450            # if it's not needed
 451            inside_bar_op_list = []
 452            inside_bar_cost = 0
 453        else:
 454            # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras
 455            # instead of lists of AnnNotes)
 456            extras_op_list, extras_cost = Comparison._extras_set_distance(
 457                original[0].extras_list, compare_to[0].extras_list
 458            )
 459
 460            # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras)
 461            lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin(
 462                original[0].lyrics_list, compare_to[0].lyrics_list
 463            )
 464
 465            if original[0].includes_voicing:
 466                # run the voice coupling algorithm, and add to inside_bar_op_list
 467                # and inside_bar_cost
 468                inside_bar_op_list, inside_bar_cost = (
 469                    Comparison._voices_coupling_recursive(
 470                        original[0].voices_list, compare_to[0].voices_list
 471                    )
 472                )
 473            else:
 474                # run the set distance algorithm, and add to inside_bar_op_list
 475                # and inside_bar_cost
 476                inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance(
 477                    original[0].annot_notes, compare_to[0].annot_notes
 478                )
 479
 480            inside_bar_op_list.extend(extras_op_list)
 481            inside_bar_cost += extras_cost
 482            inside_bar_op_list.extend(lyrics_op_list)
 483            inside_bar_cost += lyrics_cost
 484
 485        cost_dict["editbar"] += inside_bar_cost
 486        op_list_dict["editbar"].extend(inside_bar_op_list)
 487        # compute the minimum of the possibilities
 488        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 489        out = op_list_dict[min_key], cost_dict[min_key]
 490        return out
 491
 492    @staticmethod
 493    @_memoize_lyrics_diff_lin
 494    def _lyrics_diff_lin(original, compare_to):
 495        # original and compare to are two lists of AnnLyric
 496        if len(original) == 0 and len(compare_to) == 0:
 497            return [], 0
 498
 499        if len(original) == 0:
 500            cost = 0
 501            op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:])
 502            op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size()))
 503            cost += compare_to[0].notation_size()
 504            return op_list, cost
 505
 506        if len(compare_to) == 0:
 507            cost = 0
 508            op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to)
 509            op_list.append(("lyricdel", original[0], None, original[0].notation_size()))
 510            cost += original[0].notation_size()
 511            return op_list, cost
 512
 513        # compute the cost and the op_list for the many possibilities of recursion
 514        cost = {}
 515        op_list = {}
 516        # lyricdel
 517        op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin(
 518            original[1:], compare_to
 519        )
 520        cost["lyricdel"] += original[0].notation_size()
 521        op_list["lyricdel"].append(
 522            ("lyricdel", original[0], None, original[0].notation_size())
 523        )
 524        # lyricins
 525        op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin(
 526            original, compare_to[1:]
 527        )
 528        cost["lyricins"] += compare_to[0].notation_size()
 529        op_list["lyricins"].append(
 530            ("lyricins", None, compare_to[0], compare_to[0].notation_size())
 531        )
 532        # lyricsub
 533        op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin(
 534            original[1:], compare_to[1:]
 535        )
 536        if (
 537            original[0] == compare_to[0]
 538        ):  # avoid call another function if they are equal
 539            lyricsub_op, lyricsub_cost = [], 0
 540        else:
 541            lyricsub_op, lyricsub_cost = (
 542                Comparison._annotated_lyric_diff(original[0], compare_to[0])
 543            )
 544        cost["lyricsub"] += lyricsub_cost
 545        op_list["lyricsub"].extend(lyricsub_op)
 546        # compute the minimum of the possibilities
 547        min_key = min(cost, key=cost.get)
 548        out = op_list[min_key], cost[min_key]
 549        return out
 550
 551    @staticmethod
 552    def _strings_levenshtein_distance(str1: str, str2: str):
 553        counter: dict = {"+": 0, "-": 0}
 554        distance: int = 0
 555        for edit_code in ndiff(str1, str2):
 556            if edit_code[0] == " ":
 557                distance += max(counter.values())
 558                counter = {"+": 0, "-": 0}
 559            else:
 560                counter[edit_code[0]] += 1
 561        distance += max(counter.values())
 562        return distance
 563
 564    @staticmethod
 565    def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool:
 566        if off1 == off2:
 567            return False
 568
 569        # this should never happen, but...
 570        if off1 is None or off2 is None:
 571            if off1 is None and off2 is not None:
 572                return True
 573            if off1 is not None and off2 is None:
 574                return True
 575            return False  # both are None, therefore not different at all
 576
 577        diff: OffsetQL = off1 - off2
 578        if diff < 0:
 579            diff = -diff
 580
 581        if diff > 0.0001:
 582            return True
 583        return False
 584
 585    @staticmethod
 586    def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra):
 587        """
 588        Compute the differences between two annotated extras.
 589        Each annotated extra consists of three values: content, offset, and duration
 590        """
 591        cost = 0
 592        op_list = []
 593
 594        # add for the content
 595        if annExtra1.content != annExtra2.content:
 596            content_cost: int = (
 597                Comparison._strings_levenshtein_distance(
 598                    annExtra1.content or '',
 599                    annExtra2.content or ''
 600                )
 601            )
 602            cost += content_cost
 603            op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost))
 604
 605        # add for the symbolic (cost 2: delete one symbol, add the other)
 606        if annExtra1.symbolic != annExtra2.symbolic:
 607            cost += 2
 608            op_list.append(("extrasymboledit", annExtra1, annExtra2, 2))
 609
 610        # add for the infodict
 611        if annExtra1.infodict != annExtra2.infodict:
 612            info_cost: int = 0
 613            # handle everything in annExtra1 (whether or not it is in annExtra2)
 614            for k, v in annExtra1.infodict.items():
 615                if k not in annExtra2.infodict:
 616                    # not in annExtra2: delete a symbol
 617                    info_cost += 1
 618                elif v != annExtra2.infodict[k]:
 619                    # different in annExtra2: delete a symbol, add a symbol
 620                    info_cost += 2
 621            # handle everything in annExtra2 that is not in annExtra1
 622            for k in annExtra2.infodict:
 623                if k not in annExtra1.infodict:
 624                    # add a symbol
 625                    info_cost += 1
 626            cost += info_cost
 627            op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost))
 628
 629        # add for the offset
 630        # Note: offset here is a float, and some file formats have only four
 631        # decimal places of precision.  So we should not compare exactly here.
 632        if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset):
 633            cost += 1
 634            op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1))
 635
 636        # add for the duration
 637        # Note: duration here is a float, and some file formats have only four
 638        # decimal places of precision.  So we should not compare exactly here.
 639        if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration):
 640            cost += 1
 641            op_list.append(("extradurationedit", annExtra1, annExtra2, 1))
 642
 643        # add for the style
 644        if annExtra1.styledict != annExtra2.styledict:
 645            cost += 1  # someday we might count different items in the styledict
 646            op_list.append(("extrastyleedit", annExtra1, annExtra2, 1))
 647
 648        return op_list, cost
 649
 650    @staticmethod
 651    def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric):
 652        """
 653        Compute the differences between two annotated lyrics.
 654        Each annotated lyric consists of five values: lyric, verse_id, offset, duration,
 655        and styledict.
 656        """
 657        cost = 0
 658        op_list = []
 659
 660        # add for the content
 661        if annLyric1.lyric != annLyric2.lyric:
 662            content_cost: int = (
 663                Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric)
 664            )
 665            cost += content_cost
 666            op_list.append(("lyricedit", annLyric1, annLyric2, content_cost))
 667
 668        # add for the number
 669        if annLyric1.number != annLyric2.number:
 670            number_cost: int
 671            if annLyric1.number == 0 or annLyric2.number == 0:
 672                # add or delete number
 673                number_cost = 1
 674            else:
 675                # add and delete number
 676                number_cost = 2
 677            cost += number_cost
 678            op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost))
 679
 680        # add for the identifier
 681        if annLyric1.identifier != annLyric2.identifier:
 682            # someday we might do a Levenshtein distance of the two ids
 683            id_cost: int
 684            if not annLyric1.identifier or not annLyric1.identifier:
 685                # add or delete identifier
 686                id_cost = 1
 687            else:
 688                # add and delete identifier
 689                id_cost = 2
 690            cost += id_cost
 691            op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost))
 692
 693        # add for the offset
 694        # Note: offset here is a float, and some file formats have only four
 695        # decimal places of precision.  So we should not compare exactly here.
 696        if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset):
 697            cost += 1
 698            op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1))
 699
 700        # add for the style
 701        if annLyric1.styledict != annLyric2.styledict:
 702            cost += 1  # someday we might count different items in the styledict
 703            op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1))
 704
 705        return op_list, cost
 706
 707    @staticmethod
 708    def _annotated_metadata_item_diff(
 709        annMetadataItem1: AnnMetadataItem,
 710        annMetadataItem2: AnnMetadataItem
 711    ):
 712        """
 713        Compute the differences between two annotated metadata items.
 714        Each annotated metadata item has two values: key: str, value: t.Any,
 715        """
 716        cost = 0
 717        op_list = []
 718
 719        # we don't compare items that don't have the same key.
 720        if annMetadataItem1.key != annMetadataItem2.key:
 721            raise ValueError('unexpected comparison of metadata items with different keys')
 722
 723        # add for the value
 724        if annMetadataItem1.value != annMetadataItem2.value:
 725            value_cost: int = (
 726                Comparison._strings_levenshtein_distance(
 727                    str(annMetadataItem1.value),
 728                    str(annMetadataItem2.value)
 729                )
 730            )
 731            cost += value_cost
 732            op_list.append(
 733                ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost)
 734            )
 735
 736        return op_list, cost
 737
 738    @staticmethod
 739    def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup):
 740        """
 741        Compute the differences between two annotated staff groups.
 742        Each annotated staff group consists of five values: name, abbreviation,
 743        symbol, barTogether, part_indices.
 744        """
 745        cost = 0
 746        op_list = []
 747
 748        # add for the name
 749        if annStaffGroup1.name != annStaffGroup2.name:
 750            name_cost: int = (
 751                Comparison._strings_levenshtein_distance(
 752                    annStaffGroup1.name,
 753                    annStaffGroup2.name
 754                )
 755            )
 756            cost += name_cost
 757            op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost))
 758
 759        # add for the abbreviation
 760        if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation:
 761            abbreviation_cost: int = (
 762                Comparison._strings_levenshtein_distance(
 763                    annStaffGroup1.abbreviation,
 764                    annStaffGroup2.abbreviation
 765                )
 766            )
 767            cost += abbreviation_cost
 768            op_list.append((
 769                "staffgrpabbreviationedit",
 770                annStaffGroup1,
 771                annStaffGroup2,
 772                abbreviation_cost
 773            ))
 774
 775        # add for the symbol
 776        if annStaffGroup1.symbol != annStaffGroup2.symbol:
 777            symbol_cost: int
 778            if not annStaffGroup1.symbol or not annStaffGroup2.symbol:
 779                # add or delete symbol
 780                symbol_cost = 1
 781            else:
 782                # add and delete symbol
 783                symbol_cost = 2
 784            cost += symbol_cost
 785            op_list.append(
 786                ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost)
 787            )
 788
 789        # add for barTogether
 790        if annStaffGroup1.barTogether != annStaffGroup2.barTogether:
 791            barTogether_cost: int = 1
 792            cost += barTogether_cost
 793            op_list.append(
 794                ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost)
 795            )
 796
 797        # add for partIndices (sorted list of int)
 798        if annStaffGroup1.part_indices != annStaffGroup2.part_indices:
 799            partIndices_cost: int = 0
 800            if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]:
 801                partIndices_cost += 1  # vertical start
 802            if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]:
 803                partIndices_cost += 1  # vertical height
 804            if partIndices_cost == 0:
 805                # should never get here, but we have to have a cost
 806                partIndices_cost = 1
 807            cost += partIndices_cost
 808            op_list.append(
 809                ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost)
 810            )
 811
 812        return op_list, cost
 813
 814    @staticmethod
 815    @_memoize_inside_bars_diff_lin
 816    def _inside_bars_diff_lin(original, compare_to):
 817        # original and compare to are two lists of annotatedNote
 818        if len(original) == 0 and len(compare_to) == 0:
 819            return [], 0
 820
 821        if len(original) == 0:
 822            cost = 0
 823            op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:])
 824            op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size()))
 825            cost += compare_to[0].notation_size()
 826            return op_list, cost
 827
 828        if len(compare_to) == 0:
 829            cost = 0
 830            op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to)
 831            op_list.append(("notedel", original[0], None, original[0].notation_size()))
 832            cost += original[0].notation_size()
 833            return op_list, cost
 834
 835        # compute the cost and the op_list for the many possibilities of recursion
 836        cost = {}
 837        op_list = {}
 838        # notedel
 839        op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin(
 840            original[1:], compare_to
 841        )
 842        cost["notedel"] += original[0].notation_size()
 843        op_list["notedel"].append(
 844            ("notedel", original[0], None, original[0].notation_size())
 845        )
 846        # noteins
 847        op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin(
 848            original, compare_to[1:]
 849        )
 850        cost["noteins"] += compare_to[0].notation_size()
 851        op_list["noteins"].append(
 852            ("noteins", None, compare_to[0], compare_to[0].notation_size())
 853        )
 854        # notesub
 855        op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin(
 856            original[1:], compare_to[1:]
 857        )
 858        if (
 859            original[0] == compare_to[0]
 860        ):  # avoid call another function if they are equal
 861            notesub_op, notesub_cost = [], 0
 862        else:
 863            notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0])
 864        cost["notesub"] += notesub_cost
 865        op_list["notesub"].extend(notesub_op)
 866        # compute the minimum of the possibilities
 867        min_key = min(cost, key=cost.get)
 868        out = op_list[min_key], cost[min_key]
 869        return out
 870
 871    @staticmethod
 872    def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote):
 873        """
 874        Compute the differences between two annotated notes.
 875        Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets)
 876        where pitches is a list.
 877        Arguments:
 878            noteNode1 {[AnnNote]} -- original AnnNote
 879            noteNode2 {[AnnNote]} -- compare_to AnnNote
 880        """
 881        cost = 0
 882        op_list = []
 883        # add for the pitches
 884        # if they are equal
 885        if annNote1.pitches == annNote2.pitches:
 886            op_list_pitch, cost_pitch = [], 0
 887        else:
 888            # pitches diff is computed using Levenshtein distances (they are already ordered)
 889            op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff(
 890                annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0)
 891            )
 892        op_list.extend(op_list_pitch)
 893        cost += cost_pitch
 894        # add for the notehead
 895        if annNote1.note_head != annNote2.note_head:
 896            # delete one note head, add the other (this isn't noteshape, this is
 897            # just quarter-note note head vs half-note note head, etc)
 898            cost += 2
 899            op_list.append(("headedit", annNote1, annNote2, 2))
 900        # add for the dots
 901        if annNote1.dots != annNote2.dots:
 902            # add one for each added (or deleted) dot
 903            dots_diff = abs(annNote1.dots - annNote2.dots)
 904            cost += dots_diff
 905            if annNote1.dots > annNote2.dots:
 906                op_list.append(("dotdel", annNote1, annNote2, dots_diff))
 907            else:
 908                op_list.append(("dotins", annNote1, annNote2, dots_diff))
 909        if annNote1.graceType != annNote2.graceType:
 910            # accented vs unaccented vs not a grace note (delete the wrong, add the right)
 911            cost += 2
 912            op_list.append(("graceedit", annNote1, annNote2, 2))
 913        if annNote1.graceSlash != annNote2.graceSlash:
 914            # add or delete the slash
 915            cost += 1
 916            op_list.append(("graceslashedit", annNote1, annNote2, 1))
 917        # add for the beamings
 918        if annNote1.beamings != annNote2.beamings:
 919            beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff(
 920                annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam"
 921            )
 922            op_list.extend(beam_op_list)
 923            cost += beam_cost
 924        # add for the tuplet types
 925        if annNote1.tuplets != annNote2.tuplets:
 926            tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff(
 927                annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet"
 928            )
 929            op_list.extend(tuplet_op_list)
 930            cost += tuplet_cost
 931        # add for the tuplet info
 932        if annNote1.tuplet_info != annNote2.tuplet_info:
 933            tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff(
 934                annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet"
 935            )
 936            op_list.extend(tuplet_info_op_list)
 937            cost += tuplet_info_cost
 938        # add for the articulations
 939        if annNote1.articulations != annNote2.articulations:
 940            artic_op_list, artic_cost = Comparison._generic_levenshtein_diff(
 941                annNote1.articulations,
 942                annNote2.articulations,
 943                annNote1,
 944                annNote2,
 945                "articulation",
 946            )
 947            op_list.extend(artic_op_list)
 948            cost += artic_cost
 949        # add for the expressions
 950        if annNote1.expressions != annNote2.expressions:
 951            expr_op_list, expr_cost = Comparison._generic_levenshtein_diff(
 952                annNote1.expressions,
 953                annNote2.expressions,
 954                annNote1,
 955                annNote2,
 956                "expression",
 957            )
 958            op_list.extend(expr_op_list)
 959            cost += expr_cost
 960
 961        # add for gap from previous note or start of measure if first note in measure
 962        # (i.e. horizontal position shift)
 963        if annNote1.gap_dur != annNote2.gap_dur:
 964            # in all cases, the edit is a simple horizontal shift of the note
 965            cost += 1
 966            if annNote1.gap_dur == 0:
 967                op_list.append(("insspace", annNote1, annNote2, 1))
 968            elif annNote2.gap_dur == 0:
 969                op_list.append(("delspace", annNote1, annNote2, 1))
 970            else:
 971                # neither is zero
 972                op_list.append(("editspace", annNote1, annNote2, 1))
 973
 974        # add for noteshape
 975        if annNote1.noteshape != annNote2.noteshape:
 976            # always delete existing note shape and add the new one
 977            cost += 2
 978            op_list.append(("editnoteshape", annNote1, annNote2, 2))
 979        # add for noteheadFill
 980        if annNote1.noteheadFill != annNote2.noteheadFill:
 981            # always delete existing note fill and add the new one
 982            cost += 2
 983            op_list.append(("editnoteheadfill", annNote1, annNote2, 2))
 984        # add for noteheadParenthesis (True or False)
 985        if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis:
 986            # always either add or delete parentheses
 987            cost += 1
 988            op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1))
 989        # add for stemDirection
 990        if annNote1.stemDirection != annNote2.stemDirection:
 991            stemdir_cost: int
 992            if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem':
 993                # gonna add a stem
 994                stemdir_cost = 1
 995            else:
 996                # gonna change a stem (add then delete)
 997                stemdir_cost = 2
 998            cost += stemdir_cost
 999            op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost))
1000        # add for the styledict
1001        if annNote1.styledict != annNote2.styledict:
1002            cost += 1
1003            op_list.append(("editstyle", annNote1, annNote2, 1))
1004
1005        return op_list, cost
1006
1007    @staticmethod
1008    @_memoize_beamtuplet_lev_diff
1009    def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which):
1010        """
1011        Compute the levenshtein distance between two sequences of beaming or tuples.
1012        Arguments:
1013            original {list} -- list of strings (start, stop, continue or partial)
1014            compare_to {list} -- list of strings (start, stop, continue or partial)
1015            note1 {AnnNote} -- the note for referencing in the score
1016            note2 {AnnNote} -- the note for referencing in the score
1017            which -- a string: "beam" or "tuplet" depending what we are comparing
1018        """
1019        if which not in ("beam", "tuplet"):
1020            raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'")
1021
1022        if len(original) == 0 and len(compare_to) == 0:
1023            return [], 0
1024
1025        if len(original) == 0:
1026            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1027                original, compare_to[1:], note1, note2, which
1028            )
1029            op_list.append(("ins" + which, note1, note2, 1))
1030            cost += 1
1031            return op_list, cost
1032
1033        if len(compare_to) == 0:
1034            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1035                original[1:], compare_to, note1, note2, which
1036            )
1037            op_list.append(("del" + which, note1, note2, 1))
1038            cost += 1
1039            return op_list, cost
1040
1041        # compute the cost and the op_list for the many possibilities of recursion
1042        cost = {}
1043        op_list = {}
1044        # delwhich
1045        op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff(
1046            original[1:], compare_to, note1, note2, which
1047        )
1048        cost["del" + which] += 1
1049        op_list["del" + which].append(("del" + which, note1, note2, 1))
1050        # inswhich
1051        op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff(
1052            original, compare_to[1:], note1, note2, which
1053        )
1054        cost["ins" + which] += 1
1055        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1056        # editwhich
1057        op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff(
1058            original[1:], compare_to[1:], note1, note2, which
1059        )
1060        if original[0] == compare_to[0]:
1061            beam_diff_op_list = []
1062            beam_diff_cost = 0
1063        else:
1064            beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1
1065        cost["edit" + which] += beam_diff_cost
1066        op_list["edit" + which].extend(beam_diff_op_list)
1067        # compute the minimum of the possibilities
1068        min_key = min(cost, key=cost.get)
1069        out = op_list[min_key], cost[min_key]
1070        return out
1071
1072    @staticmethod
1073    @_memoize_generic_lev_diff
1074    def _generic_levenshtein_diff(original, compare_to, note1, note2, which):
1075        """
1076        Compute the Levenshtein distance between two generic sequences of symbols
1077        (e.g., articulations).
1078        Arguments:
1079            original {list} -- list of strings
1080            compare_to {list} -- list of strings
1081            note1 {AnnNote} -- the note for referencing in the score
1082            note2 {AnnNote} -- the note for referencing in the score
1083            which -- a string: e.g. "articulation" depending what we are comparing
1084        """
1085        if len(original) == 0 and len(compare_to) == 0:
1086            return [], 0
1087
1088        if len(original) == 0:
1089            op_list, cost = Comparison._generic_levenshtein_diff(
1090                original, compare_to[1:], note1, note2, which
1091            )
1092            op_list.append(("ins" + which, note1, note2, 1))
1093            cost += 1
1094            return op_list, cost
1095
1096        if len(compare_to) == 0:
1097            op_list, cost = Comparison._generic_levenshtein_diff(
1098                original[1:], compare_to, note1, note2, which
1099            )
1100            op_list.append(("del" + which, note1, note2, 1))
1101            cost += 1
1102            return op_list, cost
1103
1104        # compute the cost and the op_list for the many possibilities of recursion
1105        cost = {}
1106        op_list = {}
1107        # delwhich
1108        op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff(
1109            original[1:], compare_to, note1, note2, which
1110        )
1111        cost["del" + which] += 1
1112        op_list["del" + which].append(("del" + which, note1, note2, 1))
1113        # inswhich
1114        op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff(
1115            original, compare_to[1:], note1, note2, which
1116        )
1117        cost["ins" + which] += 1
1118        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1119        # editwhich
1120        op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff(
1121            original[1:], compare_to[1:], note1, note2, which
1122        )
1123        if original[0] == compare_to[0]:  # to avoid perform the diff
1124            generic_diff_op_list = []
1125            generic_diff_cost = 0
1126        else:
1127            generic_diff_op_list, generic_diff_cost = (
1128                [("edit" + which, note1, note2, 1)],
1129                1,
1130            )
1131        cost["edit" + which] += generic_diff_cost
1132        op_list["edit" + which].extend(generic_diff_op_list)
1133        # compute the minimum of the possibilities
1134        min_key = min(cost, key=cost.get)
1135        out = op_list[min_key], cost[min_key]
1136        return out
1137
1138    @staticmethod
1139    @_memoize_notes_set_distance
1140    def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]):
1141        """
1142        Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in
1143        that order of importance).  If you can't find an exactly matching note, try again without
1144        visual duration.
1145        original [list] -- a list of AnnNote (which are never chords)
1146        compare_to [list] -- a list of AnnNote (which are never chords)
1147        """
1148        paired_notes: list[tuple[AnnNote, AnnNote]] = []
1149        unpaired_orig_notes: list[AnnNote] = []
1150        unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to)
1151
1152        for orig_n in original:
1153            fallback: AnnNote | None = None
1154            fallback_i: int = -1
1155            found_it: bool = False
1156            for i, comp_n in enumerate(unpaired_comp_notes):
1157                if orig_n.pitches[0][0] != comp_n.pitches[0][0]:
1158                    # this pitch comparison (1) assumes the note is not a chord
1159                    # (because we don't do chords when Voicing is not set, and
1160                    # we only call _notes_set_distance when Voicing is not set),
1161                    # and (2) only compares the visual position of the note (we
1162                    # are ignoring the accidental here).  This is so that an
1163                    # accidental change will show up as a pitch edit, not a
1164                    # note remove/insert.
1165                    continue
1166                if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset):
1167                    continue
1168                if orig_n.note_is_grace != comp_n.note_is_grace:
1169                    continue
1170                if fallback is None:
1171                    fallback = comp_n
1172                    fallback_i = i
1173
1174                if orig_n.note_dur_type != comp_n.note_dur_type:
1175                    continue
1176                if orig_n.note_dur_dots != comp_n.note_dur_dots:
1177                    continue
1178
1179                # found a perfect match
1180                paired_notes.append((orig_n, comp_n))
1181
1182                # remove comp_n from unpaired_comp_notes
1183                unpaired_comp_notes.pop(i)  # remove(comp_n) would sometimes get the wrong one
1184
1185                found_it = True
1186                break
1187
1188            if found_it:
1189                # on to the next original note
1190                continue
1191
1192            # did we find a fallback (matched except for duration)?
1193            if fallback is not None:
1194                paired_notes.append((orig_n, fallback))
1195                unpaired_comp_notes.pop(fallback_i)
1196                continue
1197
1198            # we found nothing
1199            unpaired_orig_notes.append(orig_n)
1200
1201        # compute the cost and the op_list
1202        cost: int = 0
1203        op_list: list = []
1204
1205        # notedel
1206        if unpaired_orig_notes:
1207            for an in unpaired_orig_notes:
1208                cost += an.notation_size()
1209                op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord))
1210
1211        # noteins
1212        if unpaired_comp_notes:
1213            for an in unpaired_comp_notes:
1214                cost += an.notation_size()
1215                op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord))
1216
1217        # notesub
1218        if paired_notes:
1219            for ano, anc in paired_notes:
1220                if ano == anc:
1221                    # if equal, avoid _annotated_note_diff call
1222                    notesub_op, notesub_cost = [], 0
1223                else:
1224                    notesub_op, notesub_cost = (
1225                        Comparison._annotated_note_diff(ano, anc)
1226                    )
1227                cost += notesub_cost
1228                op_list.extend(notesub_op)
1229
1230        return op_list, cost
1231
1232    @staticmethod
1233    @_memoize_extras_set_distance
1234    def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]):
1235        """
1236        Gather up pairs of matching extras (using kind, offset, and visual duration, in
1237        that order of importance).  If you can't find an exactly matching extra, try again
1238        without visual duration.
1239        original [list] -- a list of AnnExtras
1240        compare_to [list] -- a list of AnnExtras
1241        """
1242        paired_extras: list[tuple[AnnExtra, AnnExtra]] = []
1243        unpaired_orig_extras: list[AnnExtra] = []
1244        unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to)
1245
1246        for orig_x in original:
1247            fallback: AnnExtra | None = None
1248            fallback_i: int = -1
1249            found_it: bool = False
1250            for i, comp_x in enumerate(unpaired_comp_extras):
1251                # kind and offset are required for pairing
1252                if orig_x.kind != comp_x.kind:
1253                    continue
1254                if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset):
1255                    continue
1256                if fallback is None:
1257                    fallback = comp_x
1258                    fallback_i = i
1259
1260                # duration is preferred for pairing
1261                if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration):
1262                    continue
1263
1264                # there are a few kind-specific elements that are also preferred:
1265                #   'direction'/'ending': content (visible text)
1266                #   'keysig'/'timesig'/'clef': symbolic (there are sometimes two
1267                #       simultaneous keysigs, timesigs, or clefs, and we don't
1268                #       want to confuse which one is which, producing a diff where
1269                #       there actually isn't one)
1270                #   'slur': placements (because there are often two identical slurs
1271                #       whose only difference is 'above' vs 'below', producing a diff
1272                #       where there actually isn't one)
1273                if orig_x.kind in ('direction', 'ending'):
1274                    if orig_x.content != comp_x.content:
1275                        continue
1276                if orig_x.kind == 'clef':
1277                    if orig_x.symbolic != comp_x.symbolic:
1278                        continue
1279                if orig_x.kind in ('keysig', 'timesig'):
1280                    if orig_x.infodict != comp_x.infodict:
1281                        continue
1282                if orig_x.kind == 'slur':
1283                    orig_placement: str = orig_x.styledict.get('placement', '')
1284                    comp_placement: str = comp_x.styledict.get('placement', '')
1285                    if orig_placement != comp_placement:
1286                        continue
1287
1288                # found a perfect match
1289                paired_extras.append((orig_x, comp_x))
1290
1291                # remove comp_n from unpaired_comp_extras
1292                unpaired_comp_extras.pop(i)  # remove(comp_n) would sometimes get the wrong one
1293
1294                found_it = True
1295                break
1296
1297            if found_it:
1298                # on to the next original extra
1299                continue
1300
1301            # did we find a fallback (matched except for duration)?
1302            if fallback is not None:
1303                paired_extras.append((orig_x, fallback))
1304                unpaired_comp_extras.pop(fallback_i)
1305                continue
1306
1307            # we found nothing
1308            unpaired_orig_extras.append(orig_x)
1309
1310        # compute the cost and the op_list
1311        cost: int = 0
1312        op_list: list = []
1313
1314        # extradel
1315        for extra in unpaired_orig_extras:
1316            cost += extra.notation_size()
1317            op_list.append(("extradel", extra, None, extra.notation_size()))
1318
1319        # extrains
1320        for extra in unpaired_comp_extras:
1321            cost += extra.notation_size()
1322            op_list.append(("extrains", None, extra, extra.notation_size()))
1323
1324        # extrasub
1325        if paired_extras:
1326            for extrao, extrac in paired_extras:
1327                if extrao == extrac:
1328                    # if equal, avoid _annotated_extra_diff call
1329                    extrasub_op, extrasub_cost = [], 0
1330                else:
1331                    extrasub_op, extrasub_cost = (
1332                        Comparison._annotated_extra_diff(extrao, extrac)
1333                    )
1334                cost += extrasub_cost
1335                op_list.extend(extrasub_op)
1336
1337        return op_list, cost
1338
1339    @staticmethod
1340    @_memoize_metadata_items_set_distance
1341    def _metadata_items_set_distance(
1342        original: list[AnnMetadataItem],
1343        compare_to: list[AnnMetadataItem]
1344    ):
1345        """
1346        Gather up pairs of matching metadata_items (using key and value).  If you can't find
1347        an exactly matching metadata_item, try again without value.
1348        original [list] -- a list of AnnMetadataItems
1349        compare_to [list] -- a list of AnnMetadataItems
1350        """
1351        paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = []
1352        unpaired_orig_metadata_items: list[AnnMetadataItem] = []
1353        unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to)
1354
1355        # first look for perfect matches
1356        for orig_mdi in original:
1357            found_it: bool = False
1358            for i, comp_mdi in enumerate(unpaired_comp_metadata_items):
1359                # key is required for perfect match
1360                if orig_mdi.key != comp_mdi.key:
1361                    continue
1362
1363                # value is required for perfect match
1364                if orig_mdi.value != comp_mdi.value:
1365                    continue
1366
1367                # found a perfect match
1368                paired_metadata_items.append((orig_mdi, comp_mdi))
1369
1370                # remove comp_mdi from unpaired_comp_metadata_items
1371                unpaired_comp_metadata_items.pop(i)
1372
1373                found_it = True
1374                break
1375
1376            if found_it:
1377                # on to the next original metadata_item
1378                continue
1379
1380            # we found no perfect match
1381            unpaired_orig_metadata_items.append(orig_mdi)
1382
1383        # now look among the unpaired remainders for key match only
1384        remove_unpaired_orig_items: list[AnnMetadataItem] = []
1385        for orig_mdi in unpaired_orig_metadata_items:
1386            for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items):
1387                # key is required for key-only match
1388                if orig_mdi.key != comp_mdi.key:
1389                    continue
1390
1391                # found a key-only match
1392                paired_metadata_items.append((orig_mdi, comp_mdi))
1393
1394                # remove comp_mdi from unpaired_comp_metadata_items
1395                unpaired_comp_metadata_items.pop(comp_idx)
1396
1397                # make a note of unpaired_orig_metadata_item to remove later
1398                remove_unpaired_orig_items.append(orig_mdi)
1399                break
1400
1401        for remove_mdi in remove_unpaired_orig_items:
1402            unpaired_orig_metadata_items.remove(remove_mdi)
1403
1404        # compute the cost and the op_list
1405        cost: int = 0
1406        op_list: list = []
1407
1408        # mditemdel
1409        for metadata_item in unpaired_orig_metadata_items:
1410            cost += metadata_item.notation_size()
1411            op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size()))
1412
1413        # mditemins
1414        for metadata_item in unpaired_comp_metadata_items:
1415            cost += metadata_item.notation_size()
1416            op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size()))
1417
1418        # mditemsub
1419        if paired_metadata_items:
1420            for metadata_itemo, metadata_itemc in paired_metadata_items:
1421                if metadata_itemo == metadata_itemc:
1422                    # if equal, avoid _annotated_metadata_item_diff call
1423                    metadata_itemsub_op, metadata_itemsub_cost = [], 0
1424                else:
1425                    metadata_itemsub_op, metadata_itemsub_cost = (
1426                        Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc)
1427                    )
1428                cost += metadata_itemsub_cost
1429                op_list.extend(metadata_itemsub_op)
1430
1431        return op_list, cost
1432
1433    @staticmethod
1434    @_memoize_staff_groups_set_distance
1435    def _staff_groups_set_distance(
1436        original: list[AnnStaffGroup],
1437        compare_to: list[AnnStaffGroup]
1438    ):
1439        """
1440        Gather up pairs of matching staffgroups (using start_index and end_index, in
1441        that order of importance).  If you can't find an exactly matching staffgroup,
1442        try again without end_index.
1443        original [list] -- a list of AnnStaffGroups
1444        compare_to [list] -- a list of AnnStaffGroups
1445        """
1446        paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = []
1447        unpaired_orig_staff_groups: list[AnnStaffGroup] = []
1448        unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to)
1449
1450        for orig_sg in original:
1451            fallback: AnnStaffGroup | None = None
1452            fallback_i: int = -1
1453            found_it: bool = False
1454            for i, comp_sg in enumerate(unpaired_comp_staff_groups):
1455                # start index is required for pairing
1456                if orig_sg.part_indices[0] != comp_sg.part_indices[0]:
1457                    continue
1458                if fallback is None:
1459                    fallback = comp_sg
1460                    fallback_i = i
1461
1462                # end index and symbol (brace, bracket, etc) is preferred for pairing
1463                if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]:
1464                    continue
1465                if orig_sg.symbol != comp_sg.symbol:
1466                    continue
1467
1468                # found a perfect match
1469                paired_staff_groups.append((orig_sg, comp_sg))
1470
1471                # remove comp_n from unpaired_comp_staff_groups
1472                unpaired_comp_staff_groups.pop(i)
1473
1474                found_it = True
1475                break
1476
1477            if found_it:
1478                # on to the next original staff_group
1479                continue
1480
1481            # did we find a fallback (matched except for duration)?
1482            if fallback is not None:
1483                paired_staff_groups.append((orig_sg, fallback))
1484                unpaired_comp_staff_groups.pop(fallback_i)
1485                continue
1486
1487            # we found nothing
1488            unpaired_orig_staff_groups.append(orig_sg)
1489
1490        # compute the cost and the op_list
1491        cost: int = 0
1492        op_list: list = []
1493
1494        # staffgrpdel
1495        for staff_group in unpaired_orig_staff_groups:
1496            cost += staff_group.notation_size()
1497            op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size()))
1498
1499        # staffgrpins
1500        for staff_group in unpaired_comp_staff_groups:
1501            cost += staff_group.notation_size()
1502            op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size()))
1503
1504        # staffgrpsub
1505        if paired_staff_groups:
1506            for staff_groupo, staff_groupc in paired_staff_groups:
1507                if staff_groupo == staff_groupc:
1508                    # if equal, avoid _annotated_staff_group_diff call
1509                    staff_groupsub_op, staff_groupsub_cost = [], 0
1510                else:
1511                    staff_groupsub_op, staff_groupsub_cost = (
1512                        Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc)
1513                    )
1514                cost += staff_groupsub_cost
1515                op_list.extend(staff_groupsub_op)
1516
1517        return op_list, cost
1518
1519    @staticmethod
1520    def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]):
1521        """
1522        Compare all the possible voices permutations, considering also deletion and
1523        insertion (equation on office lens).
1524        original [list] -- a list of Voice
1525        compare_to [list] -- a list of Voice
1526        """
1527        if len(original) == 0 and len(compare_to) == 0:  # stop the recursion
1528            return [], 0
1529
1530        if len(original) == 0:
1531            # insertion
1532            op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:])
1533            # add for the inserted voice
1534            op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size()))
1535            cost += compare_to[0].notation_size()
1536            return op_list, cost
1537
1538        if len(compare_to) == 0:
1539            # deletion
1540            op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to)
1541            # add for the deleted voice
1542            op_list.append(("voicedel", original[0], None, original[0].notation_size()))
1543            cost += original[0].notation_size()
1544            return op_list, cost
1545
1546        cost = {}
1547        op_list = {}
1548        # deletion
1549        op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive(
1550            original[1:], compare_to
1551        )
1552        op_list["voicedel"].append(
1553            ("voicedel", original[0], None, original[0].notation_size())
1554        )
1555        cost["voicedel"] += original[0].notation_size()
1556        for i, c in enumerate(compare_to):
1557            # substitution
1558            (
1559                op_list["voicesub" + str(i)],
1560                cost["voicesub" + str(i)],
1561            ) = Comparison._voices_coupling_recursive(
1562                original[1:], compare_to[:i] + compare_to[i + 1:]
1563            )
1564            if (
1565                compare_to[0] != original[0]
1566            ):  # add the cost of the sub and the operations from inside_bar_diff
1567                op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin(
1568                    original[0].annot_notes, c.annot_notes
1569                )  # compute the distance from original[0] and compare_to[i]
1570                op_list["voicesub" + str(i)].extend(op_list_inside_bar)
1571                cost["voicesub" + str(i)] += cost_inside_bar
1572        # compute the minimum of the possibilities
1573        min_key = min(cost, key=cost.get)
1574        out = op_list[min_key], cost[min_key]
1575        return out
1576
1577    @staticmethod
1578    def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]:
1579        '''
1580        Compare two annotated scores, computing an operations list and the cost of applying those
1581        operations to the first score to generate the second score.
1582
1583        Args:
1584            score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare.
1585            score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare.
1586
1587        Returns:
1588            list[tuple], int: The operations list and the cost
1589        '''
1590        # Clear all memoizer caches, in case we are called again with different scores.
1591        # The cached results are no longer valid.
1592        Comparison._clear_memoizer_caches()
1593
1594        op_list_total: list[tuple] = []
1595        cost_total: int = 0
1596
1597        if score1.n_of_parts == score2.n_of_parts:
1598            n_of_parts = score1.n_of_parts
1599        else:
1600            # The two scores have differing number of parts.  For now, assume that
1601            # the parts are in the same order in both scores, and that the missing
1602            # parts are the ones that should have been at the end of the smaller
1603            # score. In future we could do something like we do with voices, where
1604            # we try all the combinations and compare the most-similar pairs of
1605            # parts, and the rest are considered to be the extra parts (deleted
1606            # from score1, or inserted into score2).
1607            n_of_parts = min(score1.n_of_parts, score2.n_of_parts)
1608            if score1.n_of_parts > score2.n_of_parts:
1609                # score1 has more parts that must be deleted
1610                for part_idx in range(score2.n_of_parts, score1.n_of_parts):
1611                    deleted_part = score1.part_list[part_idx]
1612                    op_list_total.append(
1613                        (
1614                            "delpart",
1615                            deleted_part,
1616                            None,
1617                            deleted_part.notation_size()
1618                        )
1619                    )
1620                    cost_total += deleted_part.notation_size()
1621            else:
1622                # score2 has more parts that must be inserted
1623                for part_idx in range(score1.n_of_parts, score2.n_of_parts):
1624                    inserted_part = score2.part_list[part_idx]
1625                    op_list_total.append(
1626                        (
1627                            "inspart",
1628                            None,
1629                            inserted_part,
1630                            inserted_part.notation_size()
1631                        )
1632                    )
1633                    cost_total += inserted_part.notation_size()
1634
1635        # iterate over parts that exist in both scores
1636        for p_number in range(n_of_parts):
1637            # compute non-common-subseq
1638            ncs = Comparison._non_common_subsequences_of_measures(
1639                score1.part_list[p_number].bar_list,
1640                score2.part_list[p_number].bar_list,
1641            )
1642            # compute blockdiff
1643            for subseq in ncs:
1644                op_list_block, cost_block = Comparison._block_diff_lin(
1645                    subseq["original"], subseq["compare_to"]
1646                )
1647                op_list_total.extend(op_list_block)
1648                cost_total += cost_block
1649
1650        # compare the staff groups
1651        groups_op_list, groups_cost = Comparison._staff_groups_set_distance(
1652            score1.staff_group_list, score2.staff_group_list
1653        )
1654        op_list_total.extend(groups_op_list)
1655        cost_total += groups_cost
1656
1657        # compare the metadata items
1658        mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance(
1659            score1.metadata_items_list, score2.metadata_items_list
1660        )
1661        op_list_total.extend(mditems_op_list)
1662        cost_total += mditems_cost
1663
1664        # Add the cost of any syntax errors in score1 that were fixed during parsing.
1665        # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency.
1666        total_syms: int = score1.notation_size() + score2.notation_size()
1667        cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed
1668        if cost_plus_errors > total_syms:
1669            adjustment: int = cost_plus_errors - total_syms
1670            score1.num_syntax_errors_fixed -= adjustment
1671
1672        cost_total += score1.num_syntax_errors_fixed
1673
1674        return op_list_total, cost_total
class EvaluationMetrics:
30class EvaluationMetrics:
31    def __init__(
32        self,
33        gt_path: Path,
34        pred_path: Path,
35        gt_numsyms: int,
36        pred_numsyms: int,
37        omr_edit_distance: int,
38        edit_distances_dict: dict[str, int],
39        omr_ned: float
40    ):
41        self.gt_path: Path = gt_path
42        self.pred_path: Path = pred_path
43        self.gt_numsyms: int = gt_numsyms
44        self.pred_numsyms: int = pred_numsyms
45        self.omr_edit_distance: int = omr_edit_distance
46        self.edit_distances_dict: dict[str, int] = edit_distances_dict
47        self.omr_ned: float = omr_ned
EvaluationMetrics( gt_path: pathlib.Path, pred_path: pathlib.Path, gt_numsyms: int, pred_numsyms: int, omr_edit_distance: int, edit_distances_dict: dict[str, int], omr_ned: float)
31    def __init__(
32        self,
33        gt_path: Path,
34        pred_path: Path,
35        gt_numsyms: int,
36        pred_numsyms: int,
37        omr_edit_distance: int,
38        edit_distances_dict: dict[str, int],
39        omr_ned: float
40    ):
41        self.gt_path: Path = gt_path
42        self.pred_path: Path = pred_path
43        self.gt_numsyms: int = gt_numsyms
44        self.pred_numsyms: int = pred_numsyms
45        self.omr_edit_distance: int = omr_edit_distance
46        self.edit_distances_dict: dict[str, int] = edit_distances_dict
47        self.omr_ned: float = omr_ned
gt_path: pathlib.Path
pred_path: pathlib.Path
gt_numsyms: int
pred_numsyms: int
omr_edit_distance: int
edit_distances_dict: dict[str, int]
omr_ned: float
class Comparison:
 150class Comparison:
 151    _memoizer_mem: dict = {}
 152
 153    @staticmethod
 154    def _clear_memoizer_caches():
 155        Comparison._memoizer_mem = {}
 156
 157    @staticmethod
 158    def _myers_diff(a_lines, b_lines):
 159        # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2)
 160        # This marks the farthest-right point along each diagonal in the edit
 161        # graph, along with the history that got it there
 162        Frontier = namedtuple("Frontier", ["x", "history"])
 163        frontier = {1: Frontier(0, [])}
 164
 165        a_max = len(a_lines)
 166        b_max = len(b_lines)
 167        for d in range(0, a_max + b_max + 1):
 168            for k in range(-d, d + 1, 2):
 169                # This determines whether our next search point will be going down
 170                # in the edit graph, or to the right.
 171                #
 172                # The intuition for this is that we should go down if we're on the
 173                # left edge (k == -d) to make sure that the left edge is fully
 174                # explored.
 175                #
 176                # If we aren't on the top (k != d), then only go down if going down
 177                # would take us to territory that hasn't sufficiently been explored
 178                # yet.
 179                go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x)
 180
 181                # Figure out the starting point of this iteration. The diagonal
 182                # offsets come from the geometry of the edit grid - if you're going
 183                # down, your diagonal is lower, and if you're going right, your
 184                # diagonal is higher.
 185                if go_down:
 186                    old_x, history = frontier[k + 1]
 187                    x = old_x
 188                else:
 189                    old_x, history = frontier[k - 1]
 190                    x = old_x + 1
 191
 192                # We want to avoid modifying the old history, since some other step
 193                # may decide to use it.
 194                history = history[:]
 195                y = x - k
 196
 197                # We start at the invalid point (0, 0) - we should only start building
 198                # up history when we move off of it.
 199                if 1 <= y <= b_max and go_down:
 200                    history.append((1, b_lines[y - 1][1]))  # add comparetostep
 201                elif 1 <= x <= a_max:
 202                    history.append((0, a_lines[x - 1][1]))  # add originalstep
 203
 204                # Chew up as many diagonal moves as we can - these correspond to common lines,
 205                # and they're considered "free" by the algorithm because we want to maximize
 206                # the number of these in the output.
 207                while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]:
 208                    x += 1
 209                    y += 1
 210                    history.append((2, a_lines[x - 1][1]))  # add equal step
 211
 212                if x >= a_max and y >= b_max:
 213                    # If we're here, then we've traversed through the bottom-left corner,
 214                    # and are done.
 215                    return np.array(history)
 216
 217                frontier[k] = Frontier(x, history)
 218
 219        assert False, "Could not find edit script"
 220
 221    @staticmethod
 222    def _non_common_subsequences_myers(original, compare_to):
 223        # Both original and compare_to are list of lists, or numpy arrays with 2 columns.
 224        # This is necessary because bars need two representation at the same time.
 225        # One without the id (for comparison), and one with the id (to retrieve the bar
 226        # at the end).
 227
 228        # get the list of operations
 229        op_list = Comparison._myers_diff(
 230            np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64)
 231        )[::-1]
 232        # retrieve the non common subsequences
 233        non_common_subsequences = []
 234        non_common_subsequences.append({"original": [], "compare_to": []})
 235        ind = 0
 236        for op in op_list[::-1]:
 237            if op[0] == 2:  # equal
 238                non_common_subsequences.append({"original": [], "compare_to": []})
 239                ind += 1
 240            elif op[0] == 0:  # original step
 241                non_common_subsequences[ind]["original"].append(op[1])
 242            elif op[0] == 1:  # compare to step
 243                non_common_subsequences[ind]["compare_to"].append(op[1])
 244        # remove the empty dict from the list
 245        non_common_subsequences = [
 246            s for s in non_common_subsequences if s != {"original": [], "compare_to": []}
 247        ]
 248        return non_common_subsequences
 249
 250    @staticmethod
 251    def _non_common_subsequences_of_measures(original_m, compare_to_m):
 252        # Take the hash for each measure to run faster comparison
 253        # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison),
 254        # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after
 255        # computation)
 256        original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m]
 257        compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m]
 258        ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int)
 259        # retrieve the original pointers to measures
 260        new_out = []
 261        for e in ncs:
 262            new_out.append({})
 263            for k in e.keys():
 264                new_out[-1][k] = []
 265                for repr_hash in e[k]:
 266                    if k == "original":
 267                        new_out[-1][k].append(
 268                            next(m for m in original_m if m.precomputed_repr == repr_hash)
 269                        )
 270                    else:
 271                        new_out[-1][k].append(
 272                            next(m for m in compare_to_m if m.precomputed_repr == repr_hash)
 273                        )
 274
 275        return new_out
 276
 277    @staticmethod
 278    @_memoize_pitches_lev_diff
 279    def _pitches_levenshtein_diff(
 280        original: list[tuple[str, str, bool]],
 281        compare_to: list[tuple[str, str, bool]],
 282        noteNode1: AnnNote,
 283        noteNode2: AnnNote,
 284        ids: tuple[int, int]
 285    ):
 286        """
 287        Compute the levenshtein distance between two sequences of pitches.
 288        Arguments:
 289            original {list} -- list of pitches
 290            compare_to {list} -- list of pitches
 291            noteNode1 {annotatedNote} --for referencing
 292            noteNode2 {annotatedNote} --for referencing
 293            ids {tuple} -- a tuple of 2 elements with the indices of the notes considered
 294        """
 295        if len(original) == 0 and len(compare_to) == 0:
 296            return [], 0
 297
 298        if len(original) == 0:
 299            op_list, cost = Comparison._pitches_levenshtein_diff(
 300                original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 301            )
 302            op_list.append(
 303                ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 304            )
 305            cost += M21Utils.pitch_size(compare_to[0])
 306            return op_list, cost
 307
 308        if len(compare_to) == 0:
 309            op_list, cost = Comparison._pitches_levenshtein_diff(
 310                original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 311            )
 312            op_list.append(
 313                ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 314            )
 315            cost += M21Utils.pitch_size(original[0])
 316            return op_list, cost
 317
 318        # compute the cost and the op_list for the many possibilities of recursion
 319        cost_dict = {}
 320        op_list_dict = {}
 321        # del-pitch
 322        op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff(
 323            original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 324        )
 325        cost_dict["delpitch"] += M21Utils.pitch_size(original[0])
 326        op_list_dict["delpitch"].append(
 327            ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 328        )
 329        # ins-pitch
 330        op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff(
 331            original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 332        )
 333        cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0])
 334        op_list_dict["inspitch"].append(
 335            ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 336        )
 337        # edit-pitch
 338        op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff(
 339            original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1)
 340        )
 341        if original[0] == compare_to[0]:  # to avoid perform the pitch_diff
 342            pitch_diff_op_list = []
 343            pitch_diff_cost = 0
 344        else:
 345            pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff(
 346                original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1])
 347            )
 348        cost_dict["editpitch"] += pitch_diff_cost
 349        op_list_dict["editpitch"].extend(pitch_diff_op_list)
 350        # compute the minimum of the possibilities
 351        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 352        out = op_list_dict[min_key], cost_dict[min_key]
 353        return out
 354
 355    @staticmethod
 356    def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids):
 357        """
 358        Compute the differences between two pitch (definition from the paper).
 359        a pitch consist of a tuple: pitch name (letter+number), accidental, tie.
 360        param : pitch1. The music_notation_repr tuple of note1
 361        param : pitch2. The music_notation_repr tuple of note2
 362        param : noteNode1. The noteNode where pitch1 belongs
 363        param : noteNode2. The noteNode where pitch2 belongs
 364        param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord
 365        Returns:
 366            [list] -- the list of differences
 367            [int] -- the cost of diff
 368        """
 369        cost = 0
 370        op_list = []
 371        # add for pitch name differences
 372        if pitch1[0] != pitch2[0]:
 373            cost += 1
 374            # TODO: select the note in a more precise way in case of a chord
 375            # rest to note
 376            if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"):  # xor
 377                op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids))
 378            else:  # they are two notes
 379                op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids))
 380
 381        # add for the accidentals
 382        if pitch1[1] != pitch2[1]:  # if the accidental is different
 383            if pitch1[1] == "None":
 384                assert pitch2[1] != "None"
 385                cost += 1
 386                op_list.append(("accidentins", noteNode1, noteNode2, 1, ids))
 387            elif pitch2[1] == "None":
 388                assert pitch1[1] != "None"
 389                cost += 1
 390                op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids))
 391            else:  # a different type of alteration is present
 392                cost += 2  # delete then add
 393                op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids))
 394        # add for the ties
 395        if pitch1[2] != pitch2[2]:
 396            # exclusive or. Add if one is tied and not the other.
 397            # probably to revise for chords
 398            cost += 1
 399            if pitch1[2]:
 400                assert not pitch2[2]
 401                op_list.append(("tiedel", noteNode1, noteNode2, 1, ids))
 402            elif pitch2[2]:
 403                assert not pitch1[2]
 404                op_list.append(("tieins", noteNode1, noteNode2, 1, ids))
 405        return op_list, cost
 406
 407    @staticmethod
 408    @_memoize_block_diff_lin
 409    def _block_diff_lin(original, compare_to):
 410        if len(original) == 0 and len(compare_to) == 0:
 411            return [], 0
 412
 413        if len(original) == 0:
 414            op_list, cost = Comparison._block_diff_lin(original, compare_to[1:])
 415            cost += compare_to[0].notation_size()
 416            op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size()))
 417            return op_list, cost
 418
 419        if len(compare_to) == 0:
 420            op_list, cost = Comparison._block_diff_lin(original[1:], compare_to)
 421            cost += original[0].notation_size()
 422            op_list.append(("delbar", original[0], None, original[0].notation_size()))
 423            return op_list, cost
 424
 425        # compute the cost and the op_list for the many possibilities of recursion
 426        cost_dict = {}
 427        op_list_dict = {}
 428        # del-bar
 429        op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin(
 430            original[1:], compare_to
 431        )
 432        cost_dict["delbar"] += original[0].notation_size()
 433        op_list_dict["delbar"].append(
 434            ("delbar", original[0], None, original[0].notation_size())
 435        )
 436        # ins-bar
 437        op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin(
 438            original, compare_to[1:]
 439        )
 440        cost_dict["insbar"] += compare_to[0].notation_size()
 441        op_list_dict["insbar"].append(
 442            ("insbar", None, compare_to[0], compare_to[0].notation_size())
 443        )
 444        # edit-bar
 445        op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin(
 446            original[1:], compare_to[1:]
 447        )
 448        if (
 449            original[0] == compare_to[0]
 450        ):  # to avoid performing the _voices_coupling_recursive/_notes_set_distance
 451            # if it's not needed
 452            inside_bar_op_list = []
 453            inside_bar_cost = 0
 454        else:
 455            # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras
 456            # instead of lists of AnnNotes)
 457            extras_op_list, extras_cost = Comparison._extras_set_distance(
 458                original[0].extras_list, compare_to[0].extras_list
 459            )
 460
 461            # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras)
 462            lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin(
 463                original[0].lyrics_list, compare_to[0].lyrics_list
 464            )
 465
 466            if original[0].includes_voicing:
 467                # run the voice coupling algorithm, and add to inside_bar_op_list
 468                # and inside_bar_cost
 469                inside_bar_op_list, inside_bar_cost = (
 470                    Comparison._voices_coupling_recursive(
 471                        original[0].voices_list, compare_to[0].voices_list
 472                    )
 473                )
 474            else:
 475                # run the set distance algorithm, and add to inside_bar_op_list
 476                # and inside_bar_cost
 477                inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance(
 478                    original[0].annot_notes, compare_to[0].annot_notes
 479                )
 480
 481            inside_bar_op_list.extend(extras_op_list)
 482            inside_bar_cost += extras_cost
 483            inside_bar_op_list.extend(lyrics_op_list)
 484            inside_bar_cost += lyrics_cost
 485
 486        cost_dict["editbar"] += inside_bar_cost
 487        op_list_dict["editbar"].extend(inside_bar_op_list)
 488        # compute the minimum of the possibilities
 489        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 490        out = op_list_dict[min_key], cost_dict[min_key]
 491        return out
 492
 493    @staticmethod
 494    @_memoize_lyrics_diff_lin
 495    def _lyrics_diff_lin(original, compare_to):
 496        # original and compare to are two lists of AnnLyric
 497        if len(original) == 0 and len(compare_to) == 0:
 498            return [], 0
 499
 500        if len(original) == 0:
 501            cost = 0
 502            op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:])
 503            op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size()))
 504            cost += compare_to[0].notation_size()
 505            return op_list, cost
 506
 507        if len(compare_to) == 0:
 508            cost = 0
 509            op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to)
 510            op_list.append(("lyricdel", original[0], None, original[0].notation_size()))
 511            cost += original[0].notation_size()
 512            return op_list, cost
 513
 514        # compute the cost and the op_list for the many possibilities of recursion
 515        cost = {}
 516        op_list = {}
 517        # lyricdel
 518        op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin(
 519            original[1:], compare_to
 520        )
 521        cost["lyricdel"] += original[0].notation_size()
 522        op_list["lyricdel"].append(
 523            ("lyricdel", original[0], None, original[0].notation_size())
 524        )
 525        # lyricins
 526        op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin(
 527            original, compare_to[1:]
 528        )
 529        cost["lyricins"] += compare_to[0].notation_size()
 530        op_list["lyricins"].append(
 531            ("lyricins", None, compare_to[0], compare_to[0].notation_size())
 532        )
 533        # lyricsub
 534        op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin(
 535            original[1:], compare_to[1:]
 536        )
 537        if (
 538            original[0] == compare_to[0]
 539        ):  # avoid call another function if they are equal
 540            lyricsub_op, lyricsub_cost = [], 0
 541        else:
 542            lyricsub_op, lyricsub_cost = (
 543                Comparison._annotated_lyric_diff(original[0], compare_to[0])
 544            )
 545        cost["lyricsub"] += lyricsub_cost
 546        op_list["lyricsub"].extend(lyricsub_op)
 547        # compute the minimum of the possibilities
 548        min_key = min(cost, key=cost.get)
 549        out = op_list[min_key], cost[min_key]
 550        return out
 551
 552    @staticmethod
 553    def _strings_levenshtein_distance(str1: str, str2: str):
 554        counter: dict = {"+": 0, "-": 0}
 555        distance: int = 0
 556        for edit_code in ndiff(str1, str2):
 557            if edit_code[0] == " ":
 558                distance += max(counter.values())
 559                counter = {"+": 0, "-": 0}
 560            else:
 561                counter[edit_code[0]] += 1
 562        distance += max(counter.values())
 563        return distance
 564
 565    @staticmethod
 566    def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool:
 567        if off1 == off2:
 568            return False
 569
 570        # this should never happen, but...
 571        if off1 is None or off2 is None:
 572            if off1 is None and off2 is not None:
 573                return True
 574            if off1 is not None and off2 is None:
 575                return True
 576            return False  # both are None, therefore not different at all
 577
 578        diff: OffsetQL = off1 - off2
 579        if diff < 0:
 580            diff = -diff
 581
 582        if diff > 0.0001:
 583            return True
 584        return False
 585
 586    @staticmethod
 587    def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra):
 588        """
 589        Compute the differences between two annotated extras.
 590        Each annotated extra consists of three values: content, offset, and duration
 591        """
 592        cost = 0
 593        op_list = []
 594
 595        # add for the content
 596        if annExtra1.content != annExtra2.content:
 597            content_cost: int = (
 598                Comparison._strings_levenshtein_distance(
 599                    annExtra1.content or '',
 600                    annExtra2.content or ''
 601                )
 602            )
 603            cost += content_cost
 604            op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost))
 605
 606        # add for the symbolic (cost 2: delete one symbol, add the other)
 607        if annExtra1.symbolic != annExtra2.symbolic:
 608            cost += 2
 609            op_list.append(("extrasymboledit", annExtra1, annExtra2, 2))
 610
 611        # add for the infodict
 612        if annExtra1.infodict != annExtra2.infodict:
 613            info_cost: int = 0
 614            # handle everything in annExtra1 (whether or not it is in annExtra2)
 615            for k, v in annExtra1.infodict.items():
 616                if k not in annExtra2.infodict:
 617                    # not in annExtra2: delete a symbol
 618                    info_cost += 1
 619                elif v != annExtra2.infodict[k]:
 620                    # different in annExtra2: delete a symbol, add a symbol
 621                    info_cost += 2
 622            # handle everything in annExtra2 that is not in annExtra1
 623            for k in annExtra2.infodict:
 624                if k not in annExtra1.infodict:
 625                    # add a symbol
 626                    info_cost += 1
 627            cost += info_cost
 628            op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost))
 629
 630        # add for the offset
 631        # Note: offset here is a float, and some file formats have only four
 632        # decimal places of precision.  So we should not compare exactly here.
 633        if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset):
 634            cost += 1
 635            op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1))
 636
 637        # add for the duration
 638        # Note: duration here is a float, and some file formats have only four
 639        # decimal places of precision.  So we should not compare exactly here.
 640        if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration):
 641            cost += 1
 642            op_list.append(("extradurationedit", annExtra1, annExtra2, 1))
 643
 644        # add for the style
 645        if annExtra1.styledict != annExtra2.styledict:
 646            cost += 1  # someday we might count different items in the styledict
 647            op_list.append(("extrastyleedit", annExtra1, annExtra2, 1))
 648
 649        return op_list, cost
 650
 651    @staticmethod
 652    def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric):
 653        """
 654        Compute the differences between two annotated lyrics.
 655        Each annotated lyric consists of five values: lyric, verse_id, offset, duration,
 656        and styledict.
 657        """
 658        cost = 0
 659        op_list = []
 660
 661        # add for the content
 662        if annLyric1.lyric != annLyric2.lyric:
 663            content_cost: int = (
 664                Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric)
 665            )
 666            cost += content_cost
 667            op_list.append(("lyricedit", annLyric1, annLyric2, content_cost))
 668
 669        # add for the number
 670        if annLyric1.number != annLyric2.number:
 671            number_cost: int
 672            if annLyric1.number == 0 or annLyric2.number == 0:
 673                # add or delete number
 674                number_cost = 1
 675            else:
 676                # add and delete number
 677                number_cost = 2
 678            cost += number_cost
 679            op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost))
 680
 681        # add for the identifier
 682        if annLyric1.identifier != annLyric2.identifier:
 683            # someday we might do a Levenshtein distance of the two ids
 684            id_cost: int
 685            if not annLyric1.identifier or not annLyric1.identifier:
 686                # add or delete identifier
 687                id_cost = 1
 688            else:
 689                # add and delete identifier
 690                id_cost = 2
 691            cost += id_cost
 692            op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost))
 693
 694        # add for the offset
 695        # Note: offset here is a float, and some file formats have only four
 696        # decimal places of precision.  So we should not compare exactly here.
 697        if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset):
 698            cost += 1
 699            op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1))
 700
 701        # add for the style
 702        if annLyric1.styledict != annLyric2.styledict:
 703            cost += 1  # someday we might count different items in the styledict
 704            op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1))
 705
 706        return op_list, cost
 707
 708    @staticmethod
 709    def _annotated_metadata_item_diff(
 710        annMetadataItem1: AnnMetadataItem,
 711        annMetadataItem2: AnnMetadataItem
 712    ):
 713        """
 714        Compute the differences between two annotated metadata items.
 715        Each annotated metadata item has two values: key: str, value: t.Any,
 716        """
 717        cost = 0
 718        op_list = []
 719
 720        # we don't compare items that don't have the same key.
 721        if annMetadataItem1.key != annMetadataItem2.key:
 722            raise ValueError('unexpected comparison of metadata items with different keys')
 723
 724        # add for the value
 725        if annMetadataItem1.value != annMetadataItem2.value:
 726            value_cost: int = (
 727                Comparison._strings_levenshtein_distance(
 728                    str(annMetadataItem1.value),
 729                    str(annMetadataItem2.value)
 730                )
 731            )
 732            cost += value_cost
 733            op_list.append(
 734                ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost)
 735            )
 736
 737        return op_list, cost
 738
 739    @staticmethod
 740    def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup):
 741        """
 742        Compute the differences between two annotated staff groups.
 743        Each annotated staff group consists of five values: name, abbreviation,
 744        symbol, barTogether, part_indices.
 745        """
 746        cost = 0
 747        op_list = []
 748
 749        # add for the name
 750        if annStaffGroup1.name != annStaffGroup2.name:
 751            name_cost: int = (
 752                Comparison._strings_levenshtein_distance(
 753                    annStaffGroup1.name,
 754                    annStaffGroup2.name
 755                )
 756            )
 757            cost += name_cost
 758            op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost))
 759
 760        # add for the abbreviation
 761        if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation:
 762            abbreviation_cost: int = (
 763                Comparison._strings_levenshtein_distance(
 764                    annStaffGroup1.abbreviation,
 765                    annStaffGroup2.abbreviation
 766                )
 767            )
 768            cost += abbreviation_cost
 769            op_list.append((
 770                "staffgrpabbreviationedit",
 771                annStaffGroup1,
 772                annStaffGroup2,
 773                abbreviation_cost
 774            ))
 775
 776        # add for the symbol
 777        if annStaffGroup1.symbol != annStaffGroup2.symbol:
 778            symbol_cost: int
 779            if not annStaffGroup1.symbol or not annStaffGroup2.symbol:
 780                # add or delete symbol
 781                symbol_cost = 1
 782            else:
 783                # add and delete symbol
 784                symbol_cost = 2
 785            cost += symbol_cost
 786            op_list.append(
 787                ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost)
 788            )
 789
 790        # add for barTogether
 791        if annStaffGroup1.barTogether != annStaffGroup2.barTogether:
 792            barTogether_cost: int = 1
 793            cost += barTogether_cost
 794            op_list.append(
 795                ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost)
 796            )
 797
 798        # add for partIndices (sorted list of int)
 799        if annStaffGroup1.part_indices != annStaffGroup2.part_indices:
 800            partIndices_cost: int = 0
 801            if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]:
 802                partIndices_cost += 1  # vertical start
 803            if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]:
 804                partIndices_cost += 1  # vertical height
 805            if partIndices_cost == 0:
 806                # should never get here, but we have to have a cost
 807                partIndices_cost = 1
 808            cost += partIndices_cost
 809            op_list.append(
 810                ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost)
 811            )
 812
 813        return op_list, cost
 814
 815    @staticmethod
 816    @_memoize_inside_bars_diff_lin
 817    def _inside_bars_diff_lin(original, compare_to):
 818        # original and compare to are two lists of annotatedNote
 819        if len(original) == 0 and len(compare_to) == 0:
 820            return [], 0
 821
 822        if len(original) == 0:
 823            cost = 0
 824            op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:])
 825            op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size()))
 826            cost += compare_to[0].notation_size()
 827            return op_list, cost
 828
 829        if len(compare_to) == 0:
 830            cost = 0
 831            op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to)
 832            op_list.append(("notedel", original[0], None, original[0].notation_size()))
 833            cost += original[0].notation_size()
 834            return op_list, cost
 835
 836        # compute the cost and the op_list for the many possibilities of recursion
 837        cost = {}
 838        op_list = {}
 839        # notedel
 840        op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin(
 841            original[1:], compare_to
 842        )
 843        cost["notedel"] += original[0].notation_size()
 844        op_list["notedel"].append(
 845            ("notedel", original[0], None, original[0].notation_size())
 846        )
 847        # noteins
 848        op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin(
 849            original, compare_to[1:]
 850        )
 851        cost["noteins"] += compare_to[0].notation_size()
 852        op_list["noteins"].append(
 853            ("noteins", None, compare_to[0], compare_to[0].notation_size())
 854        )
 855        # notesub
 856        op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin(
 857            original[1:], compare_to[1:]
 858        )
 859        if (
 860            original[0] == compare_to[0]
 861        ):  # avoid call another function if they are equal
 862            notesub_op, notesub_cost = [], 0
 863        else:
 864            notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0])
 865        cost["notesub"] += notesub_cost
 866        op_list["notesub"].extend(notesub_op)
 867        # compute the minimum of the possibilities
 868        min_key = min(cost, key=cost.get)
 869        out = op_list[min_key], cost[min_key]
 870        return out
 871
 872    @staticmethod
 873    def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote):
 874        """
 875        Compute the differences between two annotated notes.
 876        Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets)
 877        where pitches is a list.
 878        Arguments:
 879            noteNode1 {[AnnNote]} -- original AnnNote
 880            noteNode2 {[AnnNote]} -- compare_to AnnNote
 881        """
 882        cost = 0
 883        op_list = []
 884        # add for the pitches
 885        # if they are equal
 886        if annNote1.pitches == annNote2.pitches:
 887            op_list_pitch, cost_pitch = [], 0
 888        else:
 889            # pitches diff is computed using Levenshtein distances (they are already ordered)
 890            op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff(
 891                annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0)
 892            )
 893        op_list.extend(op_list_pitch)
 894        cost += cost_pitch
 895        # add for the notehead
 896        if annNote1.note_head != annNote2.note_head:
 897            # delete one note head, add the other (this isn't noteshape, this is
 898            # just quarter-note note head vs half-note note head, etc)
 899            cost += 2
 900            op_list.append(("headedit", annNote1, annNote2, 2))
 901        # add for the dots
 902        if annNote1.dots != annNote2.dots:
 903            # add one for each added (or deleted) dot
 904            dots_diff = abs(annNote1.dots - annNote2.dots)
 905            cost += dots_diff
 906            if annNote1.dots > annNote2.dots:
 907                op_list.append(("dotdel", annNote1, annNote2, dots_diff))
 908            else:
 909                op_list.append(("dotins", annNote1, annNote2, dots_diff))
 910        if annNote1.graceType != annNote2.graceType:
 911            # accented vs unaccented vs not a grace note (delete the wrong, add the right)
 912            cost += 2
 913            op_list.append(("graceedit", annNote1, annNote2, 2))
 914        if annNote1.graceSlash != annNote2.graceSlash:
 915            # add or delete the slash
 916            cost += 1
 917            op_list.append(("graceslashedit", annNote1, annNote2, 1))
 918        # add for the beamings
 919        if annNote1.beamings != annNote2.beamings:
 920            beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff(
 921                annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam"
 922            )
 923            op_list.extend(beam_op_list)
 924            cost += beam_cost
 925        # add for the tuplet types
 926        if annNote1.tuplets != annNote2.tuplets:
 927            tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff(
 928                annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet"
 929            )
 930            op_list.extend(tuplet_op_list)
 931            cost += tuplet_cost
 932        # add for the tuplet info
 933        if annNote1.tuplet_info != annNote2.tuplet_info:
 934            tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff(
 935                annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet"
 936            )
 937            op_list.extend(tuplet_info_op_list)
 938            cost += tuplet_info_cost
 939        # add for the articulations
 940        if annNote1.articulations != annNote2.articulations:
 941            artic_op_list, artic_cost = Comparison._generic_levenshtein_diff(
 942                annNote1.articulations,
 943                annNote2.articulations,
 944                annNote1,
 945                annNote2,
 946                "articulation",
 947            )
 948            op_list.extend(artic_op_list)
 949            cost += artic_cost
 950        # add for the expressions
 951        if annNote1.expressions != annNote2.expressions:
 952            expr_op_list, expr_cost = Comparison._generic_levenshtein_diff(
 953                annNote1.expressions,
 954                annNote2.expressions,
 955                annNote1,
 956                annNote2,
 957                "expression",
 958            )
 959            op_list.extend(expr_op_list)
 960            cost += expr_cost
 961
 962        # add for gap from previous note or start of measure if first note in measure
 963        # (i.e. horizontal position shift)
 964        if annNote1.gap_dur != annNote2.gap_dur:
 965            # in all cases, the edit is a simple horizontal shift of the note
 966            cost += 1
 967            if annNote1.gap_dur == 0:
 968                op_list.append(("insspace", annNote1, annNote2, 1))
 969            elif annNote2.gap_dur == 0:
 970                op_list.append(("delspace", annNote1, annNote2, 1))
 971            else:
 972                # neither is zero
 973                op_list.append(("editspace", annNote1, annNote2, 1))
 974
 975        # add for noteshape
 976        if annNote1.noteshape != annNote2.noteshape:
 977            # always delete existing note shape and add the new one
 978            cost += 2
 979            op_list.append(("editnoteshape", annNote1, annNote2, 2))
 980        # add for noteheadFill
 981        if annNote1.noteheadFill != annNote2.noteheadFill:
 982            # always delete existing note fill and add the new one
 983            cost += 2
 984            op_list.append(("editnoteheadfill", annNote1, annNote2, 2))
 985        # add for noteheadParenthesis (True or False)
 986        if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis:
 987            # always either add or delete parentheses
 988            cost += 1
 989            op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1))
 990        # add for stemDirection
 991        if annNote1.stemDirection != annNote2.stemDirection:
 992            stemdir_cost: int
 993            if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem':
 994                # gonna add a stem
 995                stemdir_cost = 1
 996            else:
 997                # gonna change a stem (add then delete)
 998                stemdir_cost = 2
 999            cost += stemdir_cost
1000            op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost))
1001        # add for the styledict
1002        if annNote1.styledict != annNote2.styledict:
1003            cost += 1
1004            op_list.append(("editstyle", annNote1, annNote2, 1))
1005
1006        return op_list, cost
1007
1008    @staticmethod
1009    @_memoize_beamtuplet_lev_diff
1010    def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which):
1011        """
1012        Compute the levenshtein distance between two sequences of beaming or tuples.
1013        Arguments:
1014            original {list} -- list of strings (start, stop, continue or partial)
1015            compare_to {list} -- list of strings (start, stop, continue or partial)
1016            note1 {AnnNote} -- the note for referencing in the score
1017            note2 {AnnNote} -- the note for referencing in the score
1018            which -- a string: "beam" or "tuplet" depending what we are comparing
1019        """
1020        if which not in ("beam", "tuplet"):
1021            raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'")
1022
1023        if len(original) == 0 and len(compare_to) == 0:
1024            return [], 0
1025
1026        if len(original) == 0:
1027            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1028                original, compare_to[1:], note1, note2, which
1029            )
1030            op_list.append(("ins" + which, note1, note2, 1))
1031            cost += 1
1032            return op_list, cost
1033
1034        if len(compare_to) == 0:
1035            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1036                original[1:], compare_to, note1, note2, which
1037            )
1038            op_list.append(("del" + which, note1, note2, 1))
1039            cost += 1
1040            return op_list, cost
1041
1042        # compute the cost and the op_list for the many possibilities of recursion
1043        cost = {}
1044        op_list = {}
1045        # delwhich
1046        op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff(
1047            original[1:], compare_to, note1, note2, which
1048        )
1049        cost["del" + which] += 1
1050        op_list["del" + which].append(("del" + which, note1, note2, 1))
1051        # inswhich
1052        op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff(
1053            original, compare_to[1:], note1, note2, which
1054        )
1055        cost["ins" + which] += 1
1056        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1057        # editwhich
1058        op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff(
1059            original[1:], compare_to[1:], note1, note2, which
1060        )
1061        if original[0] == compare_to[0]:
1062            beam_diff_op_list = []
1063            beam_diff_cost = 0
1064        else:
1065            beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1
1066        cost["edit" + which] += beam_diff_cost
1067        op_list["edit" + which].extend(beam_diff_op_list)
1068        # compute the minimum of the possibilities
1069        min_key = min(cost, key=cost.get)
1070        out = op_list[min_key], cost[min_key]
1071        return out
1072
1073    @staticmethod
1074    @_memoize_generic_lev_diff
1075    def _generic_levenshtein_diff(original, compare_to, note1, note2, which):
1076        """
1077        Compute the Levenshtein distance between two generic sequences of symbols
1078        (e.g., articulations).
1079        Arguments:
1080            original {list} -- list of strings
1081            compare_to {list} -- list of strings
1082            note1 {AnnNote} -- the note for referencing in the score
1083            note2 {AnnNote} -- the note for referencing in the score
1084            which -- a string: e.g. "articulation" depending what we are comparing
1085        """
1086        if len(original) == 0 and len(compare_to) == 0:
1087            return [], 0
1088
1089        if len(original) == 0:
1090            op_list, cost = Comparison._generic_levenshtein_diff(
1091                original, compare_to[1:], note1, note2, which
1092            )
1093            op_list.append(("ins" + which, note1, note2, 1))
1094            cost += 1
1095            return op_list, cost
1096
1097        if len(compare_to) == 0:
1098            op_list, cost = Comparison._generic_levenshtein_diff(
1099                original[1:], compare_to, note1, note2, which
1100            )
1101            op_list.append(("del" + which, note1, note2, 1))
1102            cost += 1
1103            return op_list, cost
1104
1105        # compute the cost and the op_list for the many possibilities of recursion
1106        cost = {}
1107        op_list = {}
1108        # delwhich
1109        op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff(
1110            original[1:], compare_to, note1, note2, which
1111        )
1112        cost["del" + which] += 1
1113        op_list["del" + which].append(("del" + which, note1, note2, 1))
1114        # inswhich
1115        op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff(
1116            original, compare_to[1:], note1, note2, which
1117        )
1118        cost["ins" + which] += 1
1119        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1120        # editwhich
1121        op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff(
1122            original[1:], compare_to[1:], note1, note2, which
1123        )
1124        if original[0] == compare_to[0]:  # to avoid perform the diff
1125            generic_diff_op_list = []
1126            generic_diff_cost = 0
1127        else:
1128            generic_diff_op_list, generic_diff_cost = (
1129                [("edit" + which, note1, note2, 1)],
1130                1,
1131            )
1132        cost["edit" + which] += generic_diff_cost
1133        op_list["edit" + which].extend(generic_diff_op_list)
1134        # compute the minimum of the possibilities
1135        min_key = min(cost, key=cost.get)
1136        out = op_list[min_key], cost[min_key]
1137        return out
1138
1139    @staticmethod
1140    @_memoize_notes_set_distance
1141    def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]):
1142        """
1143        Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in
1144        that order of importance).  If you can't find an exactly matching note, try again without
1145        visual duration.
1146        original [list] -- a list of AnnNote (which are never chords)
1147        compare_to [list] -- a list of AnnNote (which are never chords)
1148        """
1149        paired_notes: list[tuple[AnnNote, AnnNote]] = []
1150        unpaired_orig_notes: list[AnnNote] = []
1151        unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to)
1152
1153        for orig_n in original:
1154            fallback: AnnNote | None = None
1155            fallback_i: int = -1
1156            found_it: bool = False
1157            for i, comp_n in enumerate(unpaired_comp_notes):
1158                if orig_n.pitches[0][0] != comp_n.pitches[0][0]:
1159                    # this pitch comparison (1) assumes the note is not a chord
1160                    # (because we don't do chords when Voicing is not set, and
1161                    # we only call _notes_set_distance when Voicing is not set),
1162                    # and (2) only compares the visual position of the note (we
1163                    # are ignoring the accidental here).  This is so that an
1164                    # accidental change will show up as a pitch edit, not a
1165                    # note remove/insert.
1166                    continue
1167                if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset):
1168                    continue
1169                if orig_n.note_is_grace != comp_n.note_is_grace:
1170                    continue
1171                if fallback is None:
1172                    fallback = comp_n
1173                    fallback_i = i
1174
1175                if orig_n.note_dur_type != comp_n.note_dur_type:
1176                    continue
1177                if orig_n.note_dur_dots != comp_n.note_dur_dots:
1178                    continue
1179
1180                # found a perfect match
1181                paired_notes.append((orig_n, comp_n))
1182
1183                # remove comp_n from unpaired_comp_notes
1184                unpaired_comp_notes.pop(i)  # remove(comp_n) would sometimes get the wrong one
1185
1186                found_it = True
1187                break
1188
1189            if found_it:
1190                # on to the next original note
1191                continue
1192
1193            # did we find a fallback (matched except for duration)?
1194            if fallback is not None:
1195                paired_notes.append((orig_n, fallback))
1196                unpaired_comp_notes.pop(fallback_i)
1197                continue
1198
1199            # we found nothing
1200            unpaired_orig_notes.append(orig_n)
1201
1202        # compute the cost and the op_list
1203        cost: int = 0
1204        op_list: list = []
1205
1206        # notedel
1207        if unpaired_orig_notes:
1208            for an in unpaired_orig_notes:
1209                cost += an.notation_size()
1210                op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord))
1211
1212        # noteins
1213        if unpaired_comp_notes:
1214            for an in unpaired_comp_notes:
1215                cost += an.notation_size()
1216                op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord))
1217
1218        # notesub
1219        if paired_notes:
1220            for ano, anc in paired_notes:
1221                if ano == anc:
1222                    # if equal, avoid _annotated_note_diff call
1223                    notesub_op, notesub_cost = [], 0
1224                else:
1225                    notesub_op, notesub_cost = (
1226                        Comparison._annotated_note_diff(ano, anc)
1227                    )
1228                cost += notesub_cost
1229                op_list.extend(notesub_op)
1230
1231        return op_list, cost
1232
1233    @staticmethod
1234    @_memoize_extras_set_distance
1235    def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]):
1236        """
1237        Gather up pairs of matching extras (using kind, offset, and visual duration, in
1238        that order of importance).  If you can't find an exactly matching extra, try again
1239        without visual duration.
1240        original [list] -- a list of AnnExtras
1241        compare_to [list] -- a list of AnnExtras
1242        """
1243        paired_extras: list[tuple[AnnExtra, AnnExtra]] = []
1244        unpaired_orig_extras: list[AnnExtra] = []
1245        unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to)
1246
1247        for orig_x in original:
1248            fallback: AnnExtra | None = None
1249            fallback_i: int = -1
1250            found_it: bool = False
1251            for i, comp_x in enumerate(unpaired_comp_extras):
1252                # kind and offset are required for pairing
1253                if orig_x.kind != comp_x.kind:
1254                    continue
1255                if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset):
1256                    continue
1257                if fallback is None:
1258                    fallback = comp_x
1259                    fallback_i = i
1260
1261                # duration is preferred for pairing
1262                if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration):
1263                    continue
1264
1265                # there are a few kind-specific elements that are also preferred:
1266                #   'direction'/'ending': content (visible text)
1267                #   'keysig'/'timesig'/'clef': symbolic (there are sometimes two
1268                #       simultaneous keysigs, timesigs, or clefs, and we don't
1269                #       want to confuse which one is which, producing a diff where
1270                #       there actually isn't one)
1271                #   'slur': placements (because there are often two identical slurs
1272                #       whose only difference is 'above' vs 'below', producing a diff
1273                #       where there actually isn't one)
1274                if orig_x.kind in ('direction', 'ending'):
1275                    if orig_x.content != comp_x.content:
1276                        continue
1277                if orig_x.kind == 'clef':
1278                    if orig_x.symbolic != comp_x.symbolic:
1279                        continue
1280                if orig_x.kind in ('keysig', 'timesig'):
1281                    if orig_x.infodict != comp_x.infodict:
1282                        continue
1283                if orig_x.kind == 'slur':
1284                    orig_placement: str = orig_x.styledict.get('placement', '')
1285                    comp_placement: str = comp_x.styledict.get('placement', '')
1286                    if orig_placement != comp_placement:
1287                        continue
1288
1289                # found a perfect match
1290                paired_extras.append((orig_x, comp_x))
1291
1292                # remove comp_n from unpaired_comp_extras
1293                unpaired_comp_extras.pop(i)  # remove(comp_n) would sometimes get the wrong one
1294
1295                found_it = True
1296                break
1297
1298            if found_it:
1299                # on to the next original extra
1300                continue
1301
1302            # did we find a fallback (matched except for duration)?
1303            if fallback is not None:
1304                paired_extras.append((orig_x, fallback))
1305                unpaired_comp_extras.pop(fallback_i)
1306                continue
1307
1308            # we found nothing
1309            unpaired_orig_extras.append(orig_x)
1310
1311        # compute the cost and the op_list
1312        cost: int = 0
1313        op_list: list = []
1314
1315        # extradel
1316        for extra in unpaired_orig_extras:
1317            cost += extra.notation_size()
1318            op_list.append(("extradel", extra, None, extra.notation_size()))
1319
1320        # extrains
1321        for extra in unpaired_comp_extras:
1322            cost += extra.notation_size()
1323            op_list.append(("extrains", None, extra, extra.notation_size()))
1324
1325        # extrasub
1326        if paired_extras:
1327            for extrao, extrac in paired_extras:
1328                if extrao == extrac:
1329                    # if equal, avoid _annotated_extra_diff call
1330                    extrasub_op, extrasub_cost = [], 0
1331                else:
1332                    extrasub_op, extrasub_cost = (
1333                        Comparison._annotated_extra_diff(extrao, extrac)
1334                    )
1335                cost += extrasub_cost
1336                op_list.extend(extrasub_op)
1337
1338        return op_list, cost
1339
1340    @staticmethod
1341    @_memoize_metadata_items_set_distance
1342    def _metadata_items_set_distance(
1343        original: list[AnnMetadataItem],
1344        compare_to: list[AnnMetadataItem]
1345    ):
1346        """
1347        Gather up pairs of matching metadata_items (using key and value).  If you can't find
1348        an exactly matching metadata_item, try again without value.
1349        original [list] -- a list of AnnMetadataItems
1350        compare_to [list] -- a list of AnnMetadataItems
1351        """
1352        paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = []
1353        unpaired_orig_metadata_items: list[AnnMetadataItem] = []
1354        unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to)
1355
1356        # first look for perfect matches
1357        for orig_mdi in original:
1358            found_it: bool = False
1359            for i, comp_mdi in enumerate(unpaired_comp_metadata_items):
1360                # key is required for perfect match
1361                if orig_mdi.key != comp_mdi.key:
1362                    continue
1363
1364                # value is required for perfect match
1365                if orig_mdi.value != comp_mdi.value:
1366                    continue
1367
1368                # found a perfect match
1369                paired_metadata_items.append((orig_mdi, comp_mdi))
1370
1371                # remove comp_mdi from unpaired_comp_metadata_items
1372                unpaired_comp_metadata_items.pop(i)
1373
1374                found_it = True
1375                break
1376
1377            if found_it:
1378                # on to the next original metadata_item
1379                continue
1380
1381            # we found no perfect match
1382            unpaired_orig_metadata_items.append(orig_mdi)
1383
1384        # now look among the unpaired remainders for key match only
1385        remove_unpaired_orig_items: list[AnnMetadataItem] = []
1386        for orig_mdi in unpaired_orig_metadata_items:
1387            for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items):
1388                # key is required for key-only match
1389                if orig_mdi.key != comp_mdi.key:
1390                    continue
1391
1392                # found a key-only match
1393                paired_metadata_items.append((orig_mdi, comp_mdi))
1394
1395                # remove comp_mdi from unpaired_comp_metadata_items
1396                unpaired_comp_metadata_items.pop(comp_idx)
1397
1398                # make a note of unpaired_orig_metadata_item to remove later
1399                remove_unpaired_orig_items.append(orig_mdi)
1400                break
1401
1402        for remove_mdi in remove_unpaired_orig_items:
1403            unpaired_orig_metadata_items.remove(remove_mdi)
1404
1405        # compute the cost and the op_list
1406        cost: int = 0
1407        op_list: list = []
1408
1409        # mditemdel
1410        for metadata_item in unpaired_orig_metadata_items:
1411            cost += metadata_item.notation_size()
1412            op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size()))
1413
1414        # mditemins
1415        for metadata_item in unpaired_comp_metadata_items:
1416            cost += metadata_item.notation_size()
1417            op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size()))
1418
1419        # mditemsub
1420        if paired_metadata_items:
1421            for metadata_itemo, metadata_itemc in paired_metadata_items:
1422                if metadata_itemo == metadata_itemc:
1423                    # if equal, avoid _annotated_metadata_item_diff call
1424                    metadata_itemsub_op, metadata_itemsub_cost = [], 0
1425                else:
1426                    metadata_itemsub_op, metadata_itemsub_cost = (
1427                        Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc)
1428                    )
1429                cost += metadata_itemsub_cost
1430                op_list.extend(metadata_itemsub_op)
1431
1432        return op_list, cost
1433
1434    @staticmethod
1435    @_memoize_staff_groups_set_distance
1436    def _staff_groups_set_distance(
1437        original: list[AnnStaffGroup],
1438        compare_to: list[AnnStaffGroup]
1439    ):
1440        """
1441        Gather up pairs of matching staffgroups (using start_index and end_index, in
1442        that order of importance).  If you can't find an exactly matching staffgroup,
1443        try again without end_index.
1444        original [list] -- a list of AnnStaffGroups
1445        compare_to [list] -- a list of AnnStaffGroups
1446        """
1447        paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = []
1448        unpaired_orig_staff_groups: list[AnnStaffGroup] = []
1449        unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to)
1450
1451        for orig_sg in original:
1452            fallback: AnnStaffGroup | None = None
1453            fallback_i: int = -1
1454            found_it: bool = False
1455            for i, comp_sg in enumerate(unpaired_comp_staff_groups):
1456                # start index is required for pairing
1457                if orig_sg.part_indices[0] != comp_sg.part_indices[0]:
1458                    continue
1459                if fallback is None:
1460                    fallback = comp_sg
1461                    fallback_i = i
1462
1463                # end index and symbol (brace, bracket, etc) is preferred for pairing
1464                if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]:
1465                    continue
1466                if orig_sg.symbol != comp_sg.symbol:
1467                    continue
1468
1469                # found a perfect match
1470                paired_staff_groups.append((orig_sg, comp_sg))
1471
1472                # remove comp_n from unpaired_comp_staff_groups
1473                unpaired_comp_staff_groups.pop(i)
1474
1475                found_it = True
1476                break
1477
1478            if found_it:
1479                # on to the next original staff_group
1480                continue
1481
1482            # did we find a fallback (matched except for duration)?
1483            if fallback is not None:
1484                paired_staff_groups.append((orig_sg, fallback))
1485                unpaired_comp_staff_groups.pop(fallback_i)
1486                continue
1487
1488            # we found nothing
1489            unpaired_orig_staff_groups.append(orig_sg)
1490
1491        # compute the cost and the op_list
1492        cost: int = 0
1493        op_list: list = []
1494
1495        # staffgrpdel
1496        for staff_group in unpaired_orig_staff_groups:
1497            cost += staff_group.notation_size()
1498            op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size()))
1499
1500        # staffgrpins
1501        for staff_group in unpaired_comp_staff_groups:
1502            cost += staff_group.notation_size()
1503            op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size()))
1504
1505        # staffgrpsub
1506        if paired_staff_groups:
1507            for staff_groupo, staff_groupc in paired_staff_groups:
1508                if staff_groupo == staff_groupc:
1509                    # if equal, avoid _annotated_staff_group_diff call
1510                    staff_groupsub_op, staff_groupsub_cost = [], 0
1511                else:
1512                    staff_groupsub_op, staff_groupsub_cost = (
1513                        Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc)
1514                    )
1515                cost += staff_groupsub_cost
1516                op_list.extend(staff_groupsub_op)
1517
1518        return op_list, cost
1519
1520    @staticmethod
1521    def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]):
1522        """
1523        Compare all the possible voices permutations, considering also deletion and
1524        insertion (equation on office lens).
1525        original [list] -- a list of Voice
1526        compare_to [list] -- a list of Voice
1527        """
1528        if len(original) == 0 and len(compare_to) == 0:  # stop the recursion
1529            return [], 0
1530
1531        if len(original) == 0:
1532            # insertion
1533            op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:])
1534            # add for the inserted voice
1535            op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size()))
1536            cost += compare_to[0].notation_size()
1537            return op_list, cost
1538
1539        if len(compare_to) == 0:
1540            # deletion
1541            op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to)
1542            # add for the deleted voice
1543            op_list.append(("voicedel", original[0], None, original[0].notation_size()))
1544            cost += original[0].notation_size()
1545            return op_list, cost
1546
1547        cost = {}
1548        op_list = {}
1549        # deletion
1550        op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive(
1551            original[1:], compare_to
1552        )
1553        op_list["voicedel"].append(
1554            ("voicedel", original[0], None, original[0].notation_size())
1555        )
1556        cost["voicedel"] += original[0].notation_size()
1557        for i, c in enumerate(compare_to):
1558            # substitution
1559            (
1560                op_list["voicesub" + str(i)],
1561                cost["voicesub" + str(i)],
1562            ) = Comparison._voices_coupling_recursive(
1563                original[1:], compare_to[:i] + compare_to[i + 1:]
1564            )
1565            if (
1566                compare_to[0] != original[0]
1567            ):  # add the cost of the sub and the operations from inside_bar_diff
1568                op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin(
1569                    original[0].annot_notes, c.annot_notes
1570                )  # compute the distance from original[0] and compare_to[i]
1571                op_list["voicesub" + str(i)].extend(op_list_inside_bar)
1572                cost["voicesub" + str(i)] += cost_inside_bar
1573        # compute the minimum of the possibilities
1574        min_key = min(cost, key=cost.get)
1575        out = op_list[min_key], cost[min_key]
1576        return out
1577
1578    @staticmethod
1579    def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]:
1580        '''
1581        Compare two annotated scores, computing an operations list and the cost of applying those
1582        operations to the first score to generate the second score.
1583
1584        Args:
1585            score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare.
1586            score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare.
1587
1588        Returns:
1589            list[tuple], int: The operations list and the cost
1590        '''
1591        # Clear all memoizer caches, in case we are called again with different scores.
1592        # The cached results are no longer valid.
1593        Comparison._clear_memoizer_caches()
1594
1595        op_list_total: list[tuple] = []
1596        cost_total: int = 0
1597
1598        if score1.n_of_parts == score2.n_of_parts:
1599            n_of_parts = score1.n_of_parts
1600        else:
1601            # The two scores have differing number of parts.  For now, assume that
1602            # the parts are in the same order in both scores, and that the missing
1603            # parts are the ones that should have been at the end of the smaller
1604            # score. In future we could do something like we do with voices, where
1605            # we try all the combinations and compare the most-similar pairs of
1606            # parts, and the rest are considered to be the extra parts (deleted
1607            # from score1, or inserted into score2).
1608            n_of_parts = min(score1.n_of_parts, score2.n_of_parts)
1609            if score1.n_of_parts > score2.n_of_parts:
1610                # score1 has more parts that must be deleted
1611                for part_idx in range(score2.n_of_parts, score1.n_of_parts):
1612                    deleted_part = score1.part_list[part_idx]
1613                    op_list_total.append(
1614                        (
1615                            "delpart",
1616                            deleted_part,
1617                            None,
1618                            deleted_part.notation_size()
1619                        )
1620                    )
1621                    cost_total += deleted_part.notation_size()
1622            else:
1623                # score2 has more parts that must be inserted
1624                for part_idx in range(score1.n_of_parts, score2.n_of_parts):
1625                    inserted_part = score2.part_list[part_idx]
1626                    op_list_total.append(
1627                        (
1628                            "inspart",
1629                            None,
1630                            inserted_part,
1631                            inserted_part.notation_size()
1632                        )
1633                    )
1634                    cost_total += inserted_part.notation_size()
1635
1636        # iterate over parts that exist in both scores
1637        for p_number in range(n_of_parts):
1638            # compute non-common-subseq
1639            ncs = Comparison._non_common_subsequences_of_measures(
1640                score1.part_list[p_number].bar_list,
1641                score2.part_list[p_number].bar_list,
1642            )
1643            # compute blockdiff
1644            for subseq in ncs:
1645                op_list_block, cost_block = Comparison._block_diff_lin(
1646                    subseq["original"], subseq["compare_to"]
1647                )
1648                op_list_total.extend(op_list_block)
1649                cost_total += cost_block
1650
1651        # compare the staff groups
1652        groups_op_list, groups_cost = Comparison._staff_groups_set_distance(
1653            score1.staff_group_list, score2.staff_group_list
1654        )
1655        op_list_total.extend(groups_op_list)
1656        cost_total += groups_cost
1657
1658        # compare the metadata items
1659        mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance(
1660            score1.metadata_items_list, score2.metadata_items_list
1661        )
1662        op_list_total.extend(mditems_op_list)
1663        cost_total += mditems_cost
1664
1665        # Add the cost of any syntax errors in score1 that were fixed during parsing.
1666        # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency.
1667        total_syms: int = score1.notation_size() + score2.notation_size()
1668        cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed
1669        if cost_plus_errors > total_syms:
1670            adjustment: int = cost_plus_errors - total_syms
1671            score1.num_syntax_errors_fixed -= adjustment
1672
1673        cost_total += score1.num_syntax_errors_fixed
1674
1675        return op_list_total, cost_total
@staticmethod
def annotated_scores_diff( score1: musicdiff.annotation.AnnScore, score2: musicdiff.annotation.AnnScore) -> tuple[list[tuple], int]:
1578    @staticmethod
1579    def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]:
1580        '''
1581        Compare two annotated scores, computing an operations list and the cost of applying those
1582        operations to the first score to generate the second score.
1583
1584        Args:
1585            score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare.
1586            score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare.
1587
1588        Returns:
1589            list[tuple], int: The operations list and the cost
1590        '''
1591        # Clear all memoizer caches, in case we are called again with different scores.
1592        # The cached results are no longer valid.
1593        Comparison._clear_memoizer_caches()
1594
1595        op_list_total: list[tuple] = []
1596        cost_total: int = 0
1597
1598        if score1.n_of_parts == score2.n_of_parts:
1599            n_of_parts = score1.n_of_parts
1600        else:
1601            # The two scores have differing number of parts.  For now, assume that
1602            # the parts are in the same order in both scores, and that the missing
1603            # parts are the ones that should have been at the end of the smaller
1604            # score. In future we could do something like we do with voices, where
1605            # we try all the combinations and compare the most-similar pairs of
1606            # parts, and the rest are considered to be the extra parts (deleted
1607            # from score1, or inserted into score2).
1608            n_of_parts = min(score1.n_of_parts, score2.n_of_parts)
1609            if score1.n_of_parts > score2.n_of_parts:
1610                # score1 has more parts that must be deleted
1611                for part_idx in range(score2.n_of_parts, score1.n_of_parts):
1612                    deleted_part = score1.part_list[part_idx]
1613                    op_list_total.append(
1614                        (
1615                            "delpart",
1616                            deleted_part,
1617                            None,
1618                            deleted_part.notation_size()
1619                        )
1620                    )
1621                    cost_total += deleted_part.notation_size()
1622            else:
1623                # score2 has more parts that must be inserted
1624                for part_idx in range(score1.n_of_parts, score2.n_of_parts):
1625                    inserted_part = score2.part_list[part_idx]
1626                    op_list_total.append(
1627                        (
1628                            "inspart",
1629                            None,
1630                            inserted_part,
1631                            inserted_part.notation_size()
1632                        )
1633                    )
1634                    cost_total += inserted_part.notation_size()
1635
1636        # iterate over parts that exist in both scores
1637        for p_number in range(n_of_parts):
1638            # compute non-common-subseq
1639            ncs = Comparison._non_common_subsequences_of_measures(
1640                score1.part_list[p_number].bar_list,
1641                score2.part_list[p_number].bar_list,
1642            )
1643            # compute blockdiff
1644            for subseq in ncs:
1645                op_list_block, cost_block = Comparison._block_diff_lin(
1646                    subseq["original"], subseq["compare_to"]
1647                )
1648                op_list_total.extend(op_list_block)
1649                cost_total += cost_block
1650
1651        # compare the staff groups
1652        groups_op_list, groups_cost = Comparison._staff_groups_set_distance(
1653            score1.staff_group_list, score2.staff_group_list
1654        )
1655        op_list_total.extend(groups_op_list)
1656        cost_total += groups_cost
1657
1658        # compare the metadata items
1659        mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance(
1660            score1.metadata_items_list, score2.metadata_items_list
1661        )
1662        op_list_total.extend(mditems_op_list)
1663        cost_total += mditems_cost
1664
1665        # Add the cost of any syntax errors in score1 that were fixed during parsing.
1666        # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency.
1667        total_syms: int = score1.notation_size() + score2.notation_size()
1668        cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed
1669        if cost_plus_errors > total_syms:
1670            adjustment: int = cost_plus_errors - total_syms
1671            score1.num_syntax_errors_fixed -= adjustment
1672
1673        cost_total += score1.num_syntax_errors_fixed
1674
1675        return op_list_total, cost_total

Compare two annotated scores, computing an operations list and the cost of applying those operations to the first score to generate the second score.

Arguments:
Returns:

list[tuple], int: The operations list and the cost