musicdiff
1# ------------------------------------------------------------------------------ 2# Purpose: musicdiff is a package for comparing music scores using music21. 3# 4# Authors: Greg Chapman <gregc@mac.com> 5# musicdiff is derived from: 6# https://github.com/fosfrancesco/music-score-diff.git 7# by Francesco Foscarin <foscarin.francesco@gmail.com> 8# 9# Copyright: (c) 2022-2025 Francesco Foscarin, Greg Chapman 10# License: MIT, see LICENSE 11# ------------------------------------------------------------------------------ 12 13__docformat__ = "google" 14 15import sys 16import os 17import json 18import re 19import typing as t 20from pathlib import Path 21 22import music21 as m21 23import converter21 24 25from musicdiff.detaillevel import DetailLevel 26from musicdiff.m21utils import M21Utils 27from musicdiff.annotation import AnnScore 28from musicdiff.comparison import Comparison 29from musicdiff.comparison import EvaluationMetrics 30from musicdiff.visualization import Visualization 31 32def _getInputExtensionsList() -> list[str]: 33 c = m21.converter.Converter() 34 inList = c.subConvertersList('input') 35 result = [] 36 for subc in inList: 37 for inputExt in subc.registerInputExtensions: 38 result.append('.' + inputExt) 39 return result 40 41def _printSupportedInputFormats() -> None: 42 c = m21.converter.Converter() 43 inList = c.subConvertersList('input') 44 print("Supported input formats are:", file=sys.stderr) 45 for subc in inList: 46 if subc.registerInputExtensions: 47 print('\tformats : ' + ', '.join(subc.registerFormats) 48 + '\textensions: ' + ', '.join(subc.registerInputExtensions), file=sys.stderr) 49 50def diff( 51 score1: str | Path | m21.stream.Score | m21.stream.Opus, 52 score2: str | Path | m21.stream.Score | m21.stream.Opus, 53 out_path1: str | Path | None = None, 54 out_path2: str | Path | None = None, 55 force_parse: bool = True, 56 visualize_diffs: bool = True, 57 print_text_output: bool = False, 58 print_omr_ned_output: bool = False, 59 fix_first_file_syntax: bool = False, 60 detail: DetailLevel | int = DetailLevel.Default 61) -> int | None: 62 ''' 63 Compare two musical scores and optionally save/display the differences as two marked-up 64 rendered PDFs. 65 66 Args: 67 score1 (str, Path, music21.stream.Score): The first music score to compare. The score 68 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 69 etc), or a music21 Score object. 70 score2 (str, Path, music21.stream.Score): The second musical score to compare. The score 71 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 72 etc), or a music21 Score object. 73 out_path1 (str, Path): Where to save the first marked-up rendered score PDF. 74 If out_path1 is None, both PDFs will be displayed in the default PDF viewer. 75 (default is None) 76 out_path2 (str, Path): Where to save the second marked-up rendered score PDF. 77 If out_path2 is None, both PDFs will be displayed in the default PDF viewer. 78 (default is None) 79 force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed 80 previously. 81 (default is True) 82 visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, 83 the only result of the call will be the return value (the number of differences). 84 (default is True) 85 print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. 86 (default is False) 87 print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance 88 (OMR-NED), which is computed as OMR edit distance divided by the total number of 89 symbols in the two scores. 90 (default is False) 91 fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first 92 file (and add the number of such fixes to the returned OMR edit distance). 93 (default is False) 94 detail (DetailLevel | int): What level of detail to use during the diff. 95 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 96 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 97 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 98 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 99 Style, Metadata, Voicing, or NoteStaffPosition. 100 101 Returns: 102 int | None: The total OMR Edit Distance, i.e. the number of individual symbols 103 that must be added or deleted. (0 means that the scores were identical, and 104 None means that one or more of the input files failed to parse.) 105 ''' 106 # Use the Humdrum/MEI importers from converter21 in place of the ones in music21... 107 # Comment out this line to go back to music21's built-in Humdrum/MEI importers. 108 converter21.register() 109 110 badArg1: bool = False 111 badArg2: bool = False 112 score1Name: str | Path | None = None 113 score2Name: str | Path | None = None 114 115 # Convert input strings to Paths 116 if isinstance(score1, str): 117 score1Name = score1 118 try: 119 score1 = Path(score1) 120 except Exception: # pylint: disable=broad-exception-caught 121 print(f'score1 ({score1}) is not a valid path.', file=sys.stderr) 122 badArg1 = True 123 124 if isinstance(score2, str): 125 score2Name = score2 126 try: 127 score2 = Path(score2) 128 except Exception: # pylint: disable=broad-exception-caught 129 print(f'score2 ({score2}) is not a valid path.', file=sys.stderr) 130 badArg2 = True 131 132 if badArg1 or badArg2: 133 return None 134 135 if isinstance(score1, Path): 136 if not score1Name: 137 score1Name = score1 138 fileName1 = score1.name 139 fileExt1 = score1.suffix 140 141 if fileExt1 not in _getInputExtensionsList(): 142 print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr) 143 badArg1 = True 144 145 if not badArg1: 146 # pylint: disable=broad-except 147 try: 148 sc = m21.converter.parse( 149 score1, 150 forceSource=force_parse, 151 acceptSyntaxErrors=fix_first_file_syntax 152 ) 153 if t.TYPE_CHECKING: 154 assert isinstance(sc, m21.stream.Score) 155 score1 = sc 156 157 except Exception as e: 158 print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr) 159 print(e, file=sys.stderr) 160 badArg1 = True 161 # pylint: enable=broad-except 162 163 if isinstance(score2, Path): 164 if not score2Name: 165 score2Name = score2 166 fileName2: str = score2.name 167 fileExt2: str = score2.suffix 168 169 if fileExt2 not in _getInputExtensionsList(): 170 print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr) 171 badArg2 = True 172 173 if not badArg2: 174 # pylint: disable=broad-except 175 try: 176 sc = m21.converter.parse(score2, forceSource=force_parse) 177 if t.TYPE_CHECKING: 178 assert isinstance(sc, m21.stream.Score) 179 score2 = sc 180 except Exception as e: 181 print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr) 182 print(e, file=sys.stderr) 183 badArg2 = True 184 # pylint: enable=broad-except 185 186 if badArg1 or badArg2: 187 return None 188 189 if t.TYPE_CHECKING: 190 assert isinstance(score1, (m21.stream.Score, m21.stream.Opus)) 191 assert isinstance(score2, (m21.stream.Score, m21.stream.Opus)) 192 193 total_cost: int = 0 194 195 # if both "scores" are actually Scores, the lists will be of length 1. 196 # If one or both are Opuses, the lists will be sized to fit the larger 197 # of the two, with the list for the shorter Opus (or maybe just a Score) 198 # padded with empty Scores, so the lists have the same length. 199 scoreList1: list[m21.stream.Score] 200 scoreList2: list[m21.stream.Score] 201 scoreList1, scoreList2 = _getScoreLists(score1, score2) 202 203 for sc1, sc2 in zip(scoreList1, scoreList2): 204 # scan each score, producing an annotated wrapper 205 annotated_score1: AnnScore = AnnScore(sc1, detail) 206 annotated_score2: AnnScore = AnnScore(sc2, detail) 207 208 diff_list: list 209 cost: int 210 diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2) 211 212 total_cost += cost 213 214 if cost != 0: 215 if visualize_diffs: 216 # you can change these three colors as you like... 217 # Visualization.INSERTED_COLOR = 'red' 218 # Visualization.DELETED_COLOR = 'red' 219 # Visualization.CHANGED_COLOR = 'red' 220 221 # color changed/deleted/inserted notes, add descriptive text for each change, etc 222 Visualization.mark_diffs(sc1, sc2, diff_list) 223 224 # ask music21 to display the scores as PDFs. Composer's name will be prepended with 225 # 'score1 ' and 'score2 ', respectively, so you can see which is which. 226 Visualization.show_diffs(sc1, sc2, out_path1, out_path2) 227 228 if print_omr_ned_output: 229 omr_ned_output: dict = Visualization.get_omr_ned_output( 230 cost, annotated_score1, annotated_score2 231 ) 232 jsonStr: str = json.dumps(omr_ned_output, indent=4) 233 print(jsonStr) 234 235 if print_text_output: 236 text_output: str = Visualization.get_text_output( 237 sc1, sc2, diff_list, score1Name=score1Name, score2Name=score2Name 238 ) 239 if text_output: 240 if print_omr_ned_output and print_text_output: 241 # put a blank line between them 242 print('') 243 print(text_output) 244 245 return total_cost 246 247 248def _diff_omr_ned_metrics( 249 predpath: str | Path, 250 gtpath: str | Path, 251 detail: DetailLevel | int 252) -> EvaluationMetrics | None: 253 # Returns (numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned). 254 # Returns None if pred or gt is not a music21-importable format. 255 # If import is possible (correct format), but actually fails (incorrect content), 256 # the resulting score will be empty (and omr_ned will be 1.0). 257 258 # Convert input strings to Paths 259 if isinstance(predpath, str): 260 predpath = Path(predpath) 261 if isinstance(gtpath, str): 262 gtpath = Path(gtpath) 263 264 if predpath.suffix not in _getInputExtensionsList(): 265 print( 266 f'predicted file extension ({predpath.suffix}) not supported by music21.', 267 file=sys.stderr 268 ) 269 return None 270 271 try: 272 predscore = m21.converter.parse( 273 predpath, 274 forceSource=True, 275 acceptSyntaxErrors=True 276 ) 277 if isinstance(predscore, m21.stream.Opus): 278 # for ML training we only compare the first score found in the file 279 predscore = predscore.scores[0] 280 except Exception: 281 predscore = m21.stream.Score() 282 283 if gtpath.suffix not in _getInputExtensionsList(): 284 print( 285 f'ground truth file extension ({gtpath.suffix}) not supported by music21.', 286 file=sys.stderr 287 ) 288 return None 289 290 try: 291 gtscore = m21.converter.parse( 292 gtpath, 293 forceSource=True, 294 acceptSyntaxErrors=False 295 ) 296 if isinstance(gtscore, m21.stream.Opus): 297 # for ML training we only compare the first score found in the file 298 gtscore = gtscore.scores[0] 299 except Exception: 300 gtscore = m21.stream.Score() 301 302 if t.TYPE_CHECKING: 303 assert isinstance(gtscore, m21.stream.Score) 304 assert isinstance(predscore, m21.stream.Score) 305 306 numParts: int = len(list(gtscore.parts)) 307 if numParts == 0: 308 return None 309 310 # scan each score, producing an annotated wrapper 311 if t.TYPE_CHECKING: 312 assert isinstance(predscore, m21.stream.Score) 313 assert isinstance(gtscore, m21.stream.Score) 314 ann_predscore: AnnScore = AnnScore(predscore, detail) 315 ann_gtscore: AnnScore = AnnScore(gtscore, detail) 316 317 numsyms_gt: int = ann_gtscore.notation_size() 318 numsyms_pred: int = ann_predscore.notation_size() 319 op_list: list 320 omr_edit_distance: int 321 op_list, omr_edit_distance = Comparison.annotated_scores_diff(ann_predscore, ann_gtscore) 322 edit_distances_dict: dict[str, int] = Visualization.get_edit_distances_dict( 323 op_list, 324 ann_predscore.num_syntax_errors_fixed, 325 detail 326 ) 327 omr_ned = Visualization.get_omr_ned(omr_edit_distance, numsyms_pred, numsyms_gt) 328 metrics = EvaluationMetrics( 329 gtpath, predpath, numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned 330 ) 331 return metrics 332 333 334def diff_ml_training( 335 predicted_folder: str, 336 ground_truth_folder: str, 337 output_folder: str, 338 detail: DetailLevel | int = DetailLevel.Default, 339) -> tuple[float, str]: 340 ''' 341 Compare two folders of musical scores, and produce a CSV spreadsheet of results, including 342 the overall OMR-NED score for the batch. 343 344 Args: 345 predicted_folder (str): The folder full of predicted scores. The scores 346 can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). 347 Each score must have the exact same filename as the corresponding ground 348 truth score. 349 ground_truth_folder (str): The folder full of ground truth scores. Each score must 350 have the exact same filename as the corresponding predicted score. 351 output_folder (str): The folder in which to save the output spreadsheet (output.csv). 352 detail (DetailLevel | int): What level of detail to use during the comparisons. 353 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 354 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 355 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 356 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 357 Style, Metadata, Voicing, or NoteStaffPosition. 358 359 Returns: 360 tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the 361 output spreadsheet (output.csv). 362 ''' 363 364 converter21.register() 365 366 output_file_path: str = output_folder + '/output.csv' 367 368 # expand tildes 369 predicted_folder = os.path.expanduser(predicted_folder) 370 ground_truth_folder = os.path.expanduser(ground_truth_folder) 371 output_folder = os.path.expanduser(output_folder) 372 373 metrics_list: list[EvaluationMetrics] = [] 374 for name in os.listdir(predicted_folder): 375 predpath: str = os.path.join(predicted_folder, name) 376 377 # check if it is a file 378 if not os.path.isfile(predpath): 379 continue 380 381 # check if there is a same-named file in ground_truth_folder 382 gtpath: str = os.path.join(ground_truth_folder, name) 383 if not os.path.isfile(gtpath): 384 continue 385 386 metrics: EvaluationMetrics | None = _diff_omr_ned_metrics( 387 predpath=predpath, gtpath=gtpath, detail=detail 388 ) 389 if metrics is None: 390 continue 391 392 # append metrics to metrics_list 393 metrics_list.append(metrics) 394 395 # sort metrics_list the way you want it to appear in the csv file. 396 # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries 397 # are together at the top, and the omr_ned == 1.0 entries are together 398 # at the bottom. Within each group of "same omr_ned", sort by filename. 399 def natsortkey(path: str): 400 # splits path into chunks of digits and non-digits. Converts the digit 401 # chunks to integers for numerical comparison and the non-digit chunks 402 # to lowercase for case-insensitive comparison. 403 key: list[int | str] = [] 404 for chunk in re.split(r'(\d+)', path): 405 if chunk.isdigit(): 406 key.append(int(chunk)) 407 else: 408 key.append(chunk.lower()) 409 return key 410 411 metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path)))) 412 with open(output_file_path, 'wt', encoding='utf-8') as outf: 413 print(Visualization.get_output_csv_header(detail), file=outf) 414 415 for metrics in metrics_list: 416 # append CSV line to output file 417 # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned 418 print(Visualization.get_output_csv_line(metrics, detail), file=outf) 419 420 # append overall score to output file (currently average SER) 421 total_gt_numsyms: int = 0 422 total_pred_numsyms: int = 0 423 total_omr_edit_distance: int = 0 424 if metrics_list: 425 for metrics in metrics_list: 426 total_gt_numsyms += metrics.gt_numsyms 427 total_pred_numsyms += metrics.pred_numsyms 428 total_omr_edit_distance += metrics.omr_edit_distance 429 430 overall_score: float = Visualization.get_omr_ned( 431 total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms 432 ) 433 434 print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf) 435 outf.flush() 436 437 return overall_score, output_file_path 438 439def _getScoreLists( 440 score1: m21.stream.Score | m21.stream.Opus, 441 score2: m21.stream.Score | m21.stream.Opus 442) -> tuple[list[m21.stream.Score], list[m21.stream.Score]]: 443 list1: list[m21.stream.Score] = [] 444 list2: list[m21.stream.Score] = [] 445 if isinstance(score1, m21.stream.Score): 446 list1 = [score1] 447 else: 448 list1 = list(score1.scores) 449 if isinstance(score2, m21.stream.Score): 450 list2 = [score2] 451 else: 452 list2 = list(score2.scores) 453 454 if len(list1) == len(list2): 455 return list1, list2 456 457 shortList: list[m21.stream.Score] 458 longList: list[m21.stream.Score] 459 if len(list1) > len(list2): 460 shortList = list2 461 longList = list1 462 else: 463 shortList = list1 464 longList = list2 465 numPad: int = len(longList) - len(shortList) 466 for _ in range(0, numPad): 467 shortList.append(m21.stream.Score()) 468 469 return list1, list2
def
diff( score1: str | pathlib.Path | music21.stream.base.Score | music21.stream.base.Opus, score2: str | pathlib.Path | music21.stream.base.Score | music21.stream.base.Opus, out_path1: str | pathlib.Path | None = None, out_path2: str | pathlib.Path | None = None, force_parse: bool = True, visualize_diffs: bool = True, print_text_output: bool = False, print_omr_ned_output: bool = False, fix_first_file_syntax: bool = False, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> int | None:
51def diff( 52 score1: str | Path | m21.stream.Score | m21.stream.Opus, 53 score2: str | Path | m21.stream.Score | m21.stream.Opus, 54 out_path1: str | Path | None = None, 55 out_path2: str | Path | None = None, 56 force_parse: bool = True, 57 visualize_diffs: bool = True, 58 print_text_output: bool = False, 59 print_omr_ned_output: bool = False, 60 fix_first_file_syntax: bool = False, 61 detail: DetailLevel | int = DetailLevel.Default 62) -> int | None: 63 ''' 64 Compare two musical scores and optionally save/display the differences as two marked-up 65 rendered PDFs. 66 67 Args: 68 score1 (str, Path, music21.stream.Score): The first music score to compare. The score 69 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 70 etc), or a music21 Score object. 71 score2 (str, Path, music21.stream.Score): The second musical score to compare. The score 72 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 73 etc), or a music21 Score object. 74 out_path1 (str, Path): Where to save the first marked-up rendered score PDF. 75 If out_path1 is None, both PDFs will be displayed in the default PDF viewer. 76 (default is None) 77 out_path2 (str, Path): Where to save the second marked-up rendered score PDF. 78 If out_path2 is None, both PDFs will be displayed in the default PDF viewer. 79 (default is None) 80 force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed 81 previously. 82 (default is True) 83 visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, 84 the only result of the call will be the return value (the number of differences). 85 (default is True) 86 print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. 87 (default is False) 88 print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance 89 (OMR-NED), which is computed as OMR edit distance divided by the total number of 90 symbols in the two scores. 91 (default is False) 92 fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first 93 file (and add the number of such fixes to the returned OMR edit distance). 94 (default is False) 95 detail (DetailLevel | int): What level of detail to use during the diff. 96 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 97 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 98 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 99 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 100 Style, Metadata, Voicing, or NoteStaffPosition. 101 102 Returns: 103 int | None: The total OMR Edit Distance, i.e. the number of individual symbols 104 that must be added or deleted. (0 means that the scores were identical, and 105 None means that one or more of the input files failed to parse.) 106 ''' 107 # Use the Humdrum/MEI importers from converter21 in place of the ones in music21... 108 # Comment out this line to go back to music21's built-in Humdrum/MEI importers. 109 converter21.register() 110 111 badArg1: bool = False 112 badArg2: bool = False 113 score1Name: str | Path | None = None 114 score2Name: str | Path | None = None 115 116 # Convert input strings to Paths 117 if isinstance(score1, str): 118 score1Name = score1 119 try: 120 score1 = Path(score1) 121 except Exception: # pylint: disable=broad-exception-caught 122 print(f'score1 ({score1}) is not a valid path.', file=sys.stderr) 123 badArg1 = True 124 125 if isinstance(score2, str): 126 score2Name = score2 127 try: 128 score2 = Path(score2) 129 except Exception: # pylint: disable=broad-exception-caught 130 print(f'score2 ({score2}) is not a valid path.', file=sys.stderr) 131 badArg2 = True 132 133 if badArg1 or badArg2: 134 return None 135 136 if isinstance(score1, Path): 137 if not score1Name: 138 score1Name = score1 139 fileName1 = score1.name 140 fileExt1 = score1.suffix 141 142 if fileExt1 not in _getInputExtensionsList(): 143 print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr) 144 badArg1 = True 145 146 if not badArg1: 147 # pylint: disable=broad-except 148 try: 149 sc = m21.converter.parse( 150 score1, 151 forceSource=force_parse, 152 acceptSyntaxErrors=fix_first_file_syntax 153 ) 154 if t.TYPE_CHECKING: 155 assert isinstance(sc, m21.stream.Score) 156 score1 = sc 157 158 except Exception as e: 159 print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr) 160 print(e, file=sys.stderr) 161 badArg1 = True 162 # pylint: enable=broad-except 163 164 if isinstance(score2, Path): 165 if not score2Name: 166 score2Name = score2 167 fileName2: str = score2.name 168 fileExt2: str = score2.suffix 169 170 if fileExt2 not in _getInputExtensionsList(): 171 print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr) 172 badArg2 = True 173 174 if not badArg2: 175 # pylint: disable=broad-except 176 try: 177 sc = m21.converter.parse(score2, forceSource=force_parse) 178 if t.TYPE_CHECKING: 179 assert isinstance(sc, m21.stream.Score) 180 score2 = sc 181 except Exception as e: 182 print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr) 183 print(e, file=sys.stderr) 184 badArg2 = True 185 # pylint: enable=broad-except 186 187 if badArg1 or badArg2: 188 return None 189 190 if t.TYPE_CHECKING: 191 assert isinstance(score1, (m21.stream.Score, m21.stream.Opus)) 192 assert isinstance(score2, (m21.stream.Score, m21.stream.Opus)) 193 194 total_cost: int = 0 195 196 # if both "scores" are actually Scores, the lists will be of length 1. 197 # If one or both are Opuses, the lists will be sized to fit the larger 198 # of the two, with the list for the shorter Opus (or maybe just a Score) 199 # padded with empty Scores, so the lists have the same length. 200 scoreList1: list[m21.stream.Score] 201 scoreList2: list[m21.stream.Score] 202 scoreList1, scoreList2 = _getScoreLists(score1, score2) 203 204 for sc1, sc2 in zip(scoreList1, scoreList2): 205 # scan each score, producing an annotated wrapper 206 annotated_score1: AnnScore = AnnScore(sc1, detail) 207 annotated_score2: AnnScore = AnnScore(sc2, detail) 208 209 diff_list: list 210 cost: int 211 diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2) 212 213 total_cost += cost 214 215 if cost != 0: 216 if visualize_diffs: 217 # you can change these three colors as you like... 218 # Visualization.INSERTED_COLOR = 'red' 219 # Visualization.DELETED_COLOR = 'red' 220 # Visualization.CHANGED_COLOR = 'red' 221 222 # color changed/deleted/inserted notes, add descriptive text for each change, etc 223 Visualization.mark_diffs(sc1, sc2, diff_list) 224 225 # ask music21 to display the scores as PDFs. Composer's name will be prepended with 226 # 'score1 ' and 'score2 ', respectively, so you can see which is which. 227 Visualization.show_diffs(sc1, sc2, out_path1, out_path2) 228 229 if print_omr_ned_output: 230 omr_ned_output: dict = Visualization.get_omr_ned_output( 231 cost, annotated_score1, annotated_score2 232 ) 233 jsonStr: str = json.dumps(omr_ned_output, indent=4) 234 print(jsonStr) 235 236 if print_text_output: 237 text_output: str = Visualization.get_text_output( 238 sc1, sc2, diff_list, score1Name=score1Name, score2Name=score2Name 239 ) 240 if text_output: 241 if print_omr_ned_output and print_text_output: 242 # put a blank line between them 243 print('') 244 print(text_output) 245 246 return total_cost
Compare two musical scores and optionally save/display the differences as two marked-up rendered PDFs.
Arguments:
- score1 (str, Path, music21.stream.Score): The first music score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
- score2 (str, Path, music21.stream.Score): The second musical score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
- out_path1 (str, Path): Where to save the first marked-up rendered score PDF. If out_path1 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
- out_path2 (str, Path): Where to save the second marked-up rendered score PDF. If out_path2 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
- force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed previously. (default is True)
- visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, the only result of the call will be the return value (the number of differences). (default is True)
- print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. (default is False)
- print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance (OMR-NED), which is computed as OMR edit distance divided by the total number of symbols in the two scores. (default is False)
- fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first file (and add the number of such fixes to the returned OMR edit distance). (default is False)
- detail (DetailLevel | int): What level of detail to use during the diff. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, Voicing, or NoteStaffPosition.
Returns:
int | None: The total OMR Edit Distance, i.e. the number of individual symbols that must be added or deleted. (0 means that the scores were identical, and None means that one or more of the input files failed to parse.)
def
diff_ml_training( predicted_folder: str, ground_truth_folder: str, output_folder: str, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> tuple[float, str]:
335def diff_ml_training( 336 predicted_folder: str, 337 ground_truth_folder: str, 338 output_folder: str, 339 detail: DetailLevel | int = DetailLevel.Default, 340) -> tuple[float, str]: 341 ''' 342 Compare two folders of musical scores, and produce a CSV spreadsheet of results, including 343 the overall OMR-NED score for the batch. 344 345 Args: 346 predicted_folder (str): The folder full of predicted scores. The scores 347 can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). 348 Each score must have the exact same filename as the corresponding ground 349 truth score. 350 ground_truth_folder (str): The folder full of ground truth scores. Each score must 351 have the exact same filename as the corresponding predicted score. 352 output_folder (str): The folder in which to save the output spreadsheet (output.csv). 353 detail (DetailLevel | int): What level of detail to use during the comparisons. 354 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 355 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 356 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 357 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 358 Style, Metadata, Voicing, or NoteStaffPosition. 359 360 Returns: 361 tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the 362 output spreadsheet (output.csv). 363 ''' 364 365 converter21.register() 366 367 output_file_path: str = output_folder + '/output.csv' 368 369 # expand tildes 370 predicted_folder = os.path.expanduser(predicted_folder) 371 ground_truth_folder = os.path.expanduser(ground_truth_folder) 372 output_folder = os.path.expanduser(output_folder) 373 374 metrics_list: list[EvaluationMetrics] = [] 375 for name in os.listdir(predicted_folder): 376 predpath: str = os.path.join(predicted_folder, name) 377 378 # check if it is a file 379 if not os.path.isfile(predpath): 380 continue 381 382 # check if there is a same-named file in ground_truth_folder 383 gtpath: str = os.path.join(ground_truth_folder, name) 384 if not os.path.isfile(gtpath): 385 continue 386 387 metrics: EvaluationMetrics | None = _diff_omr_ned_metrics( 388 predpath=predpath, gtpath=gtpath, detail=detail 389 ) 390 if metrics is None: 391 continue 392 393 # append metrics to metrics_list 394 metrics_list.append(metrics) 395 396 # sort metrics_list the way you want it to appear in the csv file. 397 # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries 398 # are together at the top, and the omr_ned == 1.0 entries are together 399 # at the bottom. Within each group of "same omr_ned", sort by filename. 400 def natsortkey(path: str): 401 # splits path into chunks of digits and non-digits. Converts the digit 402 # chunks to integers for numerical comparison and the non-digit chunks 403 # to lowercase for case-insensitive comparison. 404 key: list[int | str] = [] 405 for chunk in re.split(r'(\d+)', path): 406 if chunk.isdigit(): 407 key.append(int(chunk)) 408 else: 409 key.append(chunk.lower()) 410 return key 411 412 metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path)))) 413 with open(output_file_path, 'wt', encoding='utf-8') as outf: 414 print(Visualization.get_output_csv_header(detail), file=outf) 415 416 for metrics in metrics_list: 417 # append CSV line to output file 418 # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned 419 print(Visualization.get_output_csv_line(metrics, detail), file=outf) 420 421 # append overall score to output file (currently average SER) 422 total_gt_numsyms: int = 0 423 total_pred_numsyms: int = 0 424 total_omr_edit_distance: int = 0 425 if metrics_list: 426 for metrics in metrics_list: 427 total_gt_numsyms += metrics.gt_numsyms 428 total_pred_numsyms += metrics.pred_numsyms 429 total_omr_edit_distance += metrics.omr_edit_distance 430 431 overall_score: float = Visualization.get_omr_ned( 432 total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms 433 ) 434 435 print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf) 436 outf.flush() 437 438 return overall_score, output_file_path
Compare two folders of musical scores, and produce a CSV spreadsheet of results, including the overall OMR-NED score for the batch.
Arguments:
- predicted_folder (str): The folder full of predicted scores. The scores can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). Each score must have the exact same filename as the corresponding ground truth score.
- ground_truth_folder (str): The folder full of ground truth scores. Each score must have the exact same filename as the corresponding predicted score.
- output_folder (str): The folder in which to save the output spreadsheet (output.csv).
- detail (DetailLevel | int): What level of detail to use during the comparisons. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, Voicing, or NoteStaffPosition.
Returns:
tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the output spreadsheet (output.csv).