musicdiff

  1# ------------------------------------------------------------------------------
  2# Purpose:       musicdiff is a package for comparing music scores using music21.
  3#
  4# Authors:       Greg Chapman <gregc@mac.com>
  5#                musicdiff is derived from:
  6#                   https://github.com/fosfrancesco/music-score-diff.git
  7#                   by Francesco Foscarin <foscarin.francesco@gmail.com>
  8#
  9# Copyright:     (c) 2022-2025 Francesco Foscarin, Greg Chapman
 10# License:       MIT, see LICENSE
 11# ------------------------------------------------------------------------------
 12
 13__docformat__ = "google"
 14
 15import sys
 16import os
 17import json
 18import re
 19import typing as t
 20from pathlib import Path
 21
 22import music21 as m21
 23import converter21
 24
 25from musicdiff.detaillevel import DetailLevel
 26from musicdiff.m21utils import M21Utils
 27from musicdiff.annotation import AnnScore
 28from musicdiff.comparison import Comparison
 29from musicdiff.comparison import EvaluationMetrics
 30from musicdiff.visualization import Visualization
 31
 32def _getInputExtensionsList() -> list[str]:
 33    c = m21.converter.Converter()
 34    inList = c.subConvertersList('input')
 35    result = []
 36    for subc in inList:
 37        for inputExt in subc.registerInputExtensions:
 38            result.append('.' + inputExt)
 39    return result
 40
 41def _printSupportedInputFormats() -> None:
 42    c = m21.converter.Converter()
 43    inList = c.subConvertersList('input')
 44    print("Supported input formats are:", file=sys.stderr)
 45    for subc in inList:
 46        if subc.registerInputExtensions:
 47            print('\tformats   : ' + ', '.join(subc.registerFormats)
 48                    + '\textensions: ' + ', '.join(subc.registerInputExtensions), file=sys.stderr)
 49
 50def diff(
 51    score1: str | Path | m21.stream.Score | m21.stream.Opus,
 52    score2: str | Path | m21.stream.Score | m21.stream.Opus,
 53    out_path1: str | Path | None = None,
 54    out_path2: str | Path | None = None,
 55    force_parse: bool = True,
 56    visualize_diffs: bool = True,
 57    print_text_output: bool = False,
 58    print_omr_ned_output: bool = False,
 59    fix_first_file_syntax: bool = False,
 60    detail: DetailLevel | int = DetailLevel.Default
 61) -> int | None:
 62    '''
 63    Compare two musical scores and optionally save/display the differences as two marked-up
 64    rendered PDFs.
 65
 66    Args:
 67        score1 (str, Path, music21.stream.Score): The first music score to compare. The score
 68            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 69            etc), or a music21 Score object.
 70        score2 (str, Path, music21.stream.Score): The second musical score to compare. The score
 71            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 72            etc), or a music21 Score object.
 73        out_path1 (str, Path): Where to save the first marked-up rendered score PDF.
 74            If out_path1 is None, both PDFs will be displayed in the default PDF viewer.
 75            (default is None)
 76        out_path2 (str, Path): Where to save the second marked-up rendered score PDF.
 77            If out_path2 is None, both PDFs will be displayed in the default PDF viewer.
 78            (default is None)
 79        force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed
 80            previously.
 81            (default is True)
 82        visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False,
 83            the only result of the call will be the return value (the number of differences).
 84            (default is True)
 85        print_text_output (bool): Whether or not to print diffs in diff-like text to stdout.
 86            (default is False)
 87        print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance
 88            (OMR-NED), which is computed as OMR edit distance divided by the total number of
 89            symbols in the two scores.
 90            (default is False)
 91        fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first
 92            file (and add the number of such fixes to the returned OMR edit distance).
 93            (default is False)
 94        detail (DetailLevel | int): What level of detail to use during the diff.
 95            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
 96            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
 97            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
 98            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
 99            Style, Metadata, Voicing, or NoteStaffPosition.
100
101    Returns:
102        int | None: The total OMR Edit Distance, i.e. the number of individual symbols
103            that must be added or deleted. (0 means that the scores were identical, and
104            None means that one or more of the input files failed to parse.)
105    '''
106    # Use the Humdrum/MEI importers from converter21 in place of the ones in music21...
107    # Comment out this line to go back to music21's built-in Humdrum/MEI importers.
108    converter21.register()
109
110    badArg1: bool = False
111    badArg2: bool = False
112    score1Name: str | Path | None = None
113    score2Name: str | Path | None = None
114
115    # Convert input strings to Paths
116    if isinstance(score1, str):
117        score1Name = score1
118        try:
119            score1 = Path(score1)
120        except Exception:  # pylint: disable=broad-exception-caught
121            print(f'score1 ({score1}) is not a valid path.', file=sys.stderr)
122            badArg1 = True
123
124    if isinstance(score2, str):
125        score2Name = score2
126        try:
127            score2 = Path(score2)
128        except Exception:  # pylint: disable=broad-exception-caught
129            print(f'score2 ({score2}) is not a valid path.', file=sys.stderr)
130            badArg2 = True
131
132    if badArg1 or badArg2:
133        return None
134
135    if isinstance(score1, Path):
136        if not score1Name:
137            score1Name = score1
138        fileName1 = score1.name
139        fileExt1 = score1.suffix
140
141        if fileExt1 not in _getInputExtensionsList():
142            print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr)
143            badArg1 = True
144
145        if not badArg1:
146            # pylint: disable=broad-except
147            try:
148                sc = m21.converter.parse(
149                    score1,
150                    forceSource=force_parse,
151                    acceptSyntaxErrors=fix_first_file_syntax
152                )
153                if t.TYPE_CHECKING:
154                    assert isinstance(sc, m21.stream.Score)
155                score1 = sc
156
157            except Exception as e:
158                print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr)
159                print(e, file=sys.stderr)
160                badArg1 = True
161            # pylint: enable=broad-except
162
163    if isinstance(score2, Path):
164        if not score2Name:
165            score2Name = score2
166        fileName2: str = score2.name
167        fileExt2: str = score2.suffix
168
169        if fileExt2 not in _getInputExtensionsList():
170            print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr)
171            badArg2 = True
172
173        if not badArg2:
174            # pylint: disable=broad-except
175            try:
176                sc = m21.converter.parse(score2, forceSource=force_parse)
177                if t.TYPE_CHECKING:
178                    assert isinstance(sc, m21.stream.Score)
179                score2 = sc
180            except Exception as e:
181                print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr)
182                print(e, file=sys.stderr)
183                badArg2 = True
184            # pylint: enable=broad-except
185
186    if badArg1 or badArg2:
187        return None
188
189    if t.TYPE_CHECKING:
190        assert isinstance(score1, (m21.stream.Score, m21.stream.Opus))
191        assert isinstance(score2, (m21.stream.Score, m21.stream.Opus))
192
193    total_cost: int = 0
194
195    # if both "scores" are actually Scores, the lists will be of length 1.
196    # If one or both are Opuses, the lists will be sized to fit the larger
197    # of the two, with the list for the shorter Opus (or maybe just a Score)
198    # padded with empty Scores, so the lists have the same length.
199    scoreList1: list[m21.stream.Score]
200    scoreList2: list[m21.stream.Score]
201    scoreList1, scoreList2 = _getScoreLists(score1, score2)
202
203    for sc1, sc2 in zip(scoreList1, scoreList2):
204        # scan each score, producing an annotated wrapper
205        annotated_score1: AnnScore = AnnScore(sc1, detail)
206        annotated_score2: AnnScore = AnnScore(sc2, detail)
207
208        diff_list: list
209        cost: int
210        diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2)
211
212        total_cost += cost
213
214        if cost != 0:
215            if visualize_diffs:
216                # you can change these three colors as you like...
217                # Visualization.INSERTED_COLOR = 'red'
218                # Visualization.DELETED_COLOR = 'red'
219                # Visualization.CHANGED_COLOR = 'red'
220
221                # color changed/deleted/inserted notes, add descriptive text for each change, etc
222                Visualization.mark_diffs(sc1, sc2, diff_list)
223
224                # ask music21 to display the scores as PDFs.  Composer's name will be prepended with
225                # 'score1 ' and 'score2 ', respectively, so you can see which is which.
226                Visualization.show_diffs(sc1, sc2, out_path1, out_path2)
227
228        if print_omr_ned_output:
229            omr_ned_output: dict = Visualization.get_omr_ned_output(
230                cost, annotated_score1, annotated_score2
231            )
232            jsonStr: str = json.dumps(omr_ned_output, indent=4)
233            print(jsonStr)
234
235        if print_text_output:
236            text_output: str = Visualization.get_text_output(
237                sc1, sc2, diff_list, score1Name=score1Name, score2Name=score2Name
238            )
239            if text_output:
240                if print_omr_ned_output and print_text_output:
241                    # put a blank line between them
242                    print('')
243                print(text_output)
244
245    return total_cost
246
247
248def _diff_omr_ned_metrics(
249    predpath: str | Path,
250    gtpath: str | Path,
251    detail: DetailLevel | int
252) -> EvaluationMetrics | None:
253    # Returns (numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned).
254    # Returns None if pred or gt is not a music21-importable format.
255    # If import is possible (correct format), but actually fails (incorrect content),
256    # the resulting score will be empty (and omr_ned will be 1.0).
257
258    # Convert input strings to Paths
259    if isinstance(predpath, str):
260        predpath = Path(predpath)
261    if isinstance(gtpath, str):
262        gtpath = Path(gtpath)
263
264    if predpath.suffix not in _getInputExtensionsList():
265        print(
266            f'predicted file extension ({predpath.suffix}) not supported by music21.',
267            file=sys.stderr
268        )
269        return None
270
271    try:
272        predscore = m21.converter.parse(
273            predpath,
274            forceSource=True,
275            acceptSyntaxErrors=True
276        )
277        if isinstance(predscore, m21.stream.Opus):
278            # for ML training we only compare the first score found in the file
279            predscore = predscore.scores[0]
280    except Exception:
281        predscore = m21.stream.Score()
282
283    if gtpath.suffix not in _getInputExtensionsList():
284        print(
285            f'ground truth file extension ({gtpath.suffix}) not supported by music21.',
286            file=sys.stderr
287        )
288        return None
289
290    try:
291        gtscore = m21.converter.parse(
292            gtpath,
293            forceSource=True,
294            acceptSyntaxErrors=False
295        )
296        if isinstance(gtscore, m21.stream.Opus):
297            # for ML training we only compare the first score found in the file
298            gtscore = gtscore.scores[0]
299    except Exception:
300        gtscore = m21.stream.Score()
301
302    if t.TYPE_CHECKING:
303        assert isinstance(gtscore, m21.stream.Score)
304        assert isinstance(predscore, m21.stream.Score)
305
306    numParts: int = len(list(gtscore.parts))
307    if numParts == 0:
308        return None
309
310    # scan each score, producing an annotated wrapper
311    if t.TYPE_CHECKING:
312        assert isinstance(predscore, m21.stream.Score)
313        assert isinstance(gtscore, m21.stream.Score)
314    ann_predscore: AnnScore = AnnScore(predscore, detail)
315    ann_gtscore: AnnScore = AnnScore(gtscore, detail)
316
317    numsyms_gt: int = ann_gtscore.notation_size()
318    numsyms_pred: int = ann_predscore.notation_size()
319    op_list: list
320    omr_edit_distance: int
321    op_list, omr_edit_distance = Comparison.annotated_scores_diff(ann_predscore, ann_gtscore)
322    edit_distances_dict: dict[str, int] = Visualization.get_edit_distances_dict(
323        op_list,
324        ann_predscore.num_syntax_errors_fixed,
325        detail
326    )
327    omr_ned = Visualization.get_omr_ned(omr_edit_distance, numsyms_pred, numsyms_gt)
328    metrics = EvaluationMetrics(
329        gtpath, predpath, numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned
330    )
331    return metrics
332
333
334def diff_ml_training(
335    predicted_folder: str,
336    ground_truth_folder: str,
337    output_folder: str,
338    detail: DetailLevel | int = DetailLevel.Default,
339) -> tuple[float, str]:
340    '''
341    Compare two folders of musical scores, and produce a CSV spreadsheet of results, including
342    the overall OMR-NED score for the batch.
343
344    Args:
345        predicted_folder (str): The folder full of predicted scores. The scores
346            can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc).
347            Each score must have the exact same filename as the corresponding ground
348            truth score.
349        ground_truth_folder (str): The folder full of ground truth scores. Each score must
350            have the exact same filename as the corresponding predicted score.
351        output_folder (str): The folder in which to save the output spreadsheet (output.csv).
352        detail (DetailLevel | int): What level of detail to use during the comparisons.
353            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
354            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
355            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
356            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
357            Style, Metadata, Voicing, or NoteStaffPosition.
358
359    Returns:
360        tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the
361            output spreadsheet (output.csv).
362    '''
363
364    converter21.register()
365
366    output_file_path: str = output_folder + '/output.csv'
367
368    # expand tildes
369    predicted_folder = os.path.expanduser(predicted_folder)
370    ground_truth_folder = os.path.expanduser(ground_truth_folder)
371    output_folder = os.path.expanduser(output_folder)
372
373    metrics_list: list[EvaluationMetrics] = []
374    for name in os.listdir(predicted_folder):
375        predpath: str = os.path.join(predicted_folder, name)
376
377        # check if it is a file
378        if not os.path.isfile(predpath):
379            continue
380
381        # check if there is a same-named file in ground_truth_folder
382        gtpath: str = os.path.join(ground_truth_folder, name)
383        if not os.path.isfile(gtpath):
384            continue
385
386        metrics: EvaluationMetrics | None = _diff_omr_ned_metrics(
387            predpath=predpath, gtpath=gtpath, detail=detail
388        )
389        if metrics is None:
390            continue
391
392        # append metrics to metrics_list
393        metrics_list.append(metrics)
394
395    # sort metrics_list the way you want it to appear in the csv file.
396    # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries
397    # are together at the top, and the omr_ned == 1.0 entries are together
398    # at the bottom.  Within each group of "same omr_ned", sort by filename.
399    def natsortkey(path: str):
400        # splits path into chunks of digits and non-digits. Converts the digit
401        # chunks to integers for numerical comparison and the non-digit chunks
402        # to lowercase for case-insensitive comparison.
403        key: list[int | str] = []
404        for chunk in re.split(r'(\d+)', path):
405            if chunk.isdigit():
406                key.append(int(chunk))
407            else:
408                key.append(chunk.lower())
409        return key
410
411    metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path))))
412    with open(output_file_path, 'wt', encoding='utf-8') as outf:
413        print(Visualization.get_output_csv_header(detail), file=outf)
414
415        for metrics in metrics_list:
416            # append CSV line to output file
417            # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned
418            print(Visualization.get_output_csv_line(metrics, detail), file=outf)
419
420        # append overall score to output file (currently average SER)
421        total_gt_numsyms: int = 0
422        total_pred_numsyms: int = 0
423        total_omr_edit_distance: int = 0
424        if metrics_list:
425            for metrics in metrics_list:
426                total_gt_numsyms += metrics.gt_numsyms
427                total_pred_numsyms += metrics.pred_numsyms
428                total_omr_edit_distance += metrics.omr_edit_distance
429
430        overall_score: float = Visualization.get_omr_ned(
431            total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms
432        )
433
434        print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf)
435        outf.flush()
436
437    return overall_score, output_file_path
438
439def _getScoreLists(
440    score1: m21.stream.Score | m21.stream.Opus,
441    score2: m21.stream.Score | m21.stream.Opus
442) -> tuple[list[m21.stream.Score], list[m21.stream.Score]]:
443    list1: list[m21.stream.Score] = []
444    list2: list[m21.stream.Score] = []
445    if isinstance(score1, m21.stream.Score):
446        list1 = [score1]
447    else:
448        list1 = list(score1.scores)
449    if isinstance(score2, m21.stream.Score):
450        list2 = [score2]
451    else:
452        list2 = list(score2.scores)
453
454    if len(list1) == len(list2):
455        return list1, list2
456
457    shortList: list[m21.stream.Score]
458    longList: list[m21.stream.Score]
459    if len(list1) > len(list2):
460        shortList = list2
461        longList = list1
462    else:
463        shortList = list1
464        longList = list2
465    numPad: int = len(longList) - len(shortList)
466    for _ in range(0, numPad):
467        shortList.append(m21.stream.Score())
468
469    return list1, list2
def diff( score1: str | pathlib.Path | music21.stream.base.Score | music21.stream.base.Opus, score2: str | pathlib.Path | music21.stream.base.Score | music21.stream.base.Opus, out_path1: str | pathlib.Path | None = None, out_path2: str | pathlib.Path | None = None, force_parse: bool = True, visualize_diffs: bool = True, print_text_output: bool = False, print_omr_ned_output: bool = False, fix_first_file_syntax: bool = False, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> int | None:
 51def diff(
 52    score1: str | Path | m21.stream.Score | m21.stream.Opus,
 53    score2: str | Path | m21.stream.Score | m21.stream.Opus,
 54    out_path1: str | Path | None = None,
 55    out_path2: str | Path | None = None,
 56    force_parse: bool = True,
 57    visualize_diffs: bool = True,
 58    print_text_output: bool = False,
 59    print_omr_ned_output: bool = False,
 60    fix_first_file_syntax: bool = False,
 61    detail: DetailLevel | int = DetailLevel.Default
 62) -> int | None:
 63    '''
 64    Compare two musical scores and optionally save/display the differences as two marked-up
 65    rendered PDFs.
 66
 67    Args:
 68        score1 (str, Path, music21.stream.Score): The first music score to compare. The score
 69            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 70            etc), or a music21 Score object.
 71        score2 (str, Path, music21.stream.Score): The second musical score to compare. The score
 72            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 73            etc), or a music21 Score object.
 74        out_path1 (str, Path): Where to save the first marked-up rendered score PDF.
 75            If out_path1 is None, both PDFs will be displayed in the default PDF viewer.
 76            (default is None)
 77        out_path2 (str, Path): Where to save the second marked-up rendered score PDF.
 78            If out_path2 is None, both PDFs will be displayed in the default PDF viewer.
 79            (default is None)
 80        force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed
 81            previously.
 82            (default is True)
 83        visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False,
 84            the only result of the call will be the return value (the number of differences).
 85            (default is True)
 86        print_text_output (bool): Whether or not to print diffs in diff-like text to stdout.
 87            (default is False)
 88        print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance
 89            (OMR-NED), which is computed as OMR edit distance divided by the total number of
 90            symbols in the two scores.
 91            (default is False)
 92        fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first
 93            file (and add the number of such fixes to the returned OMR edit distance).
 94            (default is False)
 95        detail (DetailLevel | int): What level of detail to use during the diff.
 96            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
 97            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
 98            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
 99            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
100            Style, Metadata, Voicing, or NoteStaffPosition.
101
102    Returns:
103        int | None: The total OMR Edit Distance, i.e. the number of individual symbols
104            that must be added or deleted. (0 means that the scores were identical, and
105            None means that one or more of the input files failed to parse.)
106    '''
107    # Use the Humdrum/MEI importers from converter21 in place of the ones in music21...
108    # Comment out this line to go back to music21's built-in Humdrum/MEI importers.
109    converter21.register()
110
111    badArg1: bool = False
112    badArg2: bool = False
113    score1Name: str | Path | None = None
114    score2Name: str | Path | None = None
115
116    # Convert input strings to Paths
117    if isinstance(score1, str):
118        score1Name = score1
119        try:
120            score1 = Path(score1)
121        except Exception:  # pylint: disable=broad-exception-caught
122            print(f'score1 ({score1}) is not a valid path.', file=sys.stderr)
123            badArg1 = True
124
125    if isinstance(score2, str):
126        score2Name = score2
127        try:
128            score2 = Path(score2)
129        except Exception:  # pylint: disable=broad-exception-caught
130            print(f'score2 ({score2}) is not a valid path.', file=sys.stderr)
131            badArg2 = True
132
133    if badArg1 or badArg2:
134        return None
135
136    if isinstance(score1, Path):
137        if not score1Name:
138            score1Name = score1
139        fileName1 = score1.name
140        fileExt1 = score1.suffix
141
142        if fileExt1 not in _getInputExtensionsList():
143            print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr)
144            badArg1 = True
145
146        if not badArg1:
147            # pylint: disable=broad-except
148            try:
149                sc = m21.converter.parse(
150                    score1,
151                    forceSource=force_parse,
152                    acceptSyntaxErrors=fix_first_file_syntax
153                )
154                if t.TYPE_CHECKING:
155                    assert isinstance(sc, m21.stream.Score)
156                score1 = sc
157
158            except Exception as e:
159                print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr)
160                print(e, file=sys.stderr)
161                badArg1 = True
162            # pylint: enable=broad-except
163
164    if isinstance(score2, Path):
165        if not score2Name:
166            score2Name = score2
167        fileName2: str = score2.name
168        fileExt2: str = score2.suffix
169
170        if fileExt2 not in _getInputExtensionsList():
171            print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr)
172            badArg2 = True
173
174        if not badArg2:
175            # pylint: disable=broad-except
176            try:
177                sc = m21.converter.parse(score2, forceSource=force_parse)
178                if t.TYPE_CHECKING:
179                    assert isinstance(sc, m21.stream.Score)
180                score2 = sc
181            except Exception as e:
182                print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr)
183                print(e, file=sys.stderr)
184                badArg2 = True
185            # pylint: enable=broad-except
186
187    if badArg1 or badArg2:
188        return None
189
190    if t.TYPE_CHECKING:
191        assert isinstance(score1, (m21.stream.Score, m21.stream.Opus))
192        assert isinstance(score2, (m21.stream.Score, m21.stream.Opus))
193
194    total_cost: int = 0
195
196    # if both "scores" are actually Scores, the lists will be of length 1.
197    # If one or both are Opuses, the lists will be sized to fit the larger
198    # of the two, with the list for the shorter Opus (or maybe just a Score)
199    # padded with empty Scores, so the lists have the same length.
200    scoreList1: list[m21.stream.Score]
201    scoreList2: list[m21.stream.Score]
202    scoreList1, scoreList2 = _getScoreLists(score1, score2)
203
204    for sc1, sc2 in zip(scoreList1, scoreList2):
205        # scan each score, producing an annotated wrapper
206        annotated_score1: AnnScore = AnnScore(sc1, detail)
207        annotated_score2: AnnScore = AnnScore(sc2, detail)
208
209        diff_list: list
210        cost: int
211        diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2)
212
213        total_cost += cost
214
215        if cost != 0:
216            if visualize_diffs:
217                # you can change these three colors as you like...
218                # Visualization.INSERTED_COLOR = 'red'
219                # Visualization.DELETED_COLOR = 'red'
220                # Visualization.CHANGED_COLOR = 'red'
221
222                # color changed/deleted/inserted notes, add descriptive text for each change, etc
223                Visualization.mark_diffs(sc1, sc2, diff_list)
224
225                # ask music21 to display the scores as PDFs.  Composer's name will be prepended with
226                # 'score1 ' and 'score2 ', respectively, so you can see which is which.
227                Visualization.show_diffs(sc1, sc2, out_path1, out_path2)
228
229        if print_omr_ned_output:
230            omr_ned_output: dict = Visualization.get_omr_ned_output(
231                cost, annotated_score1, annotated_score2
232            )
233            jsonStr: str = json.dumps(omr_ned_output, indent=4)
234            print(jsonStr)
235
236        if print_text_output:
237            text_output: str = Visualization.get_text_output(
238                sc1, sc2, diff_list, score1Name=score1Name, score2Name=score2Name
239            )
240            if text_output:
241                if print_omr_ned_output and print_text_output:
242                    # put a blank line between them
243                    print('')
244                print(text_output)
245
246    return total_cost

Compare two musical scores and optionally save/display the differences as two marked-up rendered PDFs.

Arguments:
  • score1 (str, Path, music21.stream.Score): The first music score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
  • score2 (str, Path, music21.stream.Score): The second musical score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
  • out_path1 (str, Path): Where to save the first marked-up rendered score PDF. If out_path1 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
  • out_path2 (str, Path): Where to save the second marked-up rendered score PDF. If out_path2 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
  • force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed previously. (default is True)
  • visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, the only result of the call will be the return value (the number of differences). (default is True)
  • print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. (default is False)
  • print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance (OMR-NED), which is computed as OMR edit distance divided by the total number of symbols in the two scores. (default is False)
  • fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first file (and add the number of such fixes to the returned OMR edit distance). (default is False)
  • detail (DetailLevel | int): What level of detail to use during the diff. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, Voicing, or NoteStaffPosition.
Returns:

int | None: The total OMR Edit Distance, i.e. the number of individual symbols that must be added or deleted. (0 means that the scores were identical, and None means that one or more of the input files failed to parse.)

def diff_ml_training( predicted_folder: str, ground_truth_folder: str, output_folder: str, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> tuple[float, str]:
335def diff_ml_training(
336    predicted_folder: str,
337    ground_truth_folder: str,
338    output_folder: str,
339    detail: DetailLevel | int = DetailLevel.Default,
340) -> tuple[float, str]:
341    '''
342    Compare two folders of musical scores, and produce a CSV spreadsheet of results, including
343    the overall OMR-NED score for the batch.
344
345    Args:
346        predicted_folder (str): The folder full of predicted scores. The scores
347            can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc).
348            Each score must have the exact same filename as the corresponding ground
349            truth score.
350        ground_truth_folder (str): The folder full of ground truth scores. Each score must
351            have the exact same filename as the corresponding predicted score.
352        output_folder (str): The folder in which to save the output spreadsheet (output.csv).
353        detail (DetailLevel | int): What level of detail to use during the comparisons.
354            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
355            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
356            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
357            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
358            Style, Metadata, Voicing, or NoteStaffPosition.
359
360    Returns:
361        tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the
362            output spreadsheet (output.csv).
363    '''
364
365    converter21.register()
366
367    output_file_path: str = output_folder + '/output.csv'
368
369    # expand tildes
370    predicted_folder = os.path.expanduser(predicted_folder)
371    ground_truth_folder = os.path.expanduser(ground_truth_folder)
372    output_folder = os.path.expanduser(output_folder)
373
374    metrics_list: list[EvaluationMetrics] = []
375    for name in os.listdir(predicted_folder):
376        predpath: str = os.path.join(predicted_folder, name)
377
378        # check if it is a file
379        if not os.path.isfile(predpath):
380            continue
381
382        # check if there is a same-named file in ground_truth_folder
383        gtpath: str = os.path.join(ground_truth_folder, name)
384        if not os.path.isfile(gtpath):
385            continue
386
387        metrics: EvaluationMetrics | None = _diff_omr_ned_metrics(
388            predpath=predpath, gtpath=gtpath, detail=detail
389        )
390        if metrics is None:
391            continue
392
393        # append metrics to metrics_list
394        metrics_list.append(metrics)
395
396    # sort metrics_list the way you want it to appear in the csv file.
397    # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries
398    # are together at the top, and the omr_ned == 1.0 entries are together
399    # at the bottom.  Within each group of "same omr_ned", sort by filename.
400    def natsortkey(path: str):
401        # splits path into chunks of digits and non-digits. Converts the digit
402        # chunks to integers for numerical comparison and the non-digit chunks
403        # to lowercase for case-insensitive comparison.
404        key: list[int | str] = []
405        for chunk in re.split(r'(\d+)', path):
406            if chunk.isdigit():
407                key.append(int(chunk))
408            else:
409                key.append(chunk.lower())
410        return key
411
412    metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path))))
413    with open(output_file_path, 'wt', encoding='utf-8') as outf:
414        print(Visualization.get_output_csv_header(detail), file=outf)
415
416        for metrics in metrics_list:
417            # append CSV line to output file
418            # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned
419            print(Visualization.get_output_csv_line(metrics, detail), file=outf)
420
421        # append overall score to output file (currently average SER)
422        total_gt_numsyms: int = 0
423        total_pred_numsyms: int = 0
424        total_omr_edit_distance: int = 0
425        if metrics_list:
426            for metrics in metrics_list:
427                total_gt_numsyms += metrics.gt_numsyms
428                total_pred_numsyms += metrics.pred_numsyms
429                total_omr_edit_distance += metrics.omr_edit_distance
430
431        overall_score: float = Visualization.get_omr_ned(
432            total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms
433        )
434
435        print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf)
436        outf.flush()
437
438    return overall_score, output_file_path

Compare two folders of musical scores, and produce a CSV spreadsheet of results, including the overall OMR-NED score for the batch.

Arguments:
  • predicted_folder (str): The folder full of predicted scores. The scores can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). Each score must have the exact same filename as the corresponding ground truth score.
  • ground_truth_folder (str): The folder full of ground truth scores. Each score must have the exact same filename as the corresponding predicted score.
  • output_folder (str): The folder in which to save the output spreadsheet (output.csv).
  • detail (DetailLevel | int): What level of detail to use during the comparisons. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, Voicing, or NoteStaffPosition.
Returns:

tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the output spreadsheet (output.csv).