Xylophone gag project code
From enfascination
Line 1: | Line 1: | ||
− | for [[ | + | for [[Xylophone_gag_project]] |
<pre> | <pre> | ||
### Seth Frey | ### Seth Frey |
Revision as of 01:07, 27 April 2010
### Seth Frey ### Error Detection, applied in this case to the Irish tune typical of the ### Looney Tunes Xylophone Gag ### Class Project for Chris Rafael's Music Procesing, INFO-I547 at IU ### Spring 2010 (4/26/10) ### Boilerplate rm( list=ls() ) library(tuneR) setWavPlayer("/usr/local/bin/playRWave") path <- "~/Desktop/projecto/music_processing/" ### useful library for error analysis #source(paste(path, "code/spectrogram.r", sep="" ), echo=FALSE) ### The score ### endearing young charms in MIDI #s_notes<- c(E4, D4, C4, D4, C4, C4, E4, G4, F4, A4, C5, C5, B4, A4) eyc_orig_notes <- c(64, 62, 60, 62, 60, 60, 64, 67, 65, 69, 72, 72)#, 71, 69) score <- eyc_orig_notes timing<- c(.25, .25, .75,.25, 0.5,0.5,0.5 , 0.5 , 0.5 , 0.5 , 0.5 , .75,.25,.25) ### test the above representation of the score: ### scale timing info #timing<- timing /sum(timing) ### less than two seconds #duration<- 150000 #sample <-c() #for (i in 1:length(score)){ sample<-c(sample, rep(score[i], timing[i]*duration))} #v = 2*pi*440.*2^((sample-69)/12)/sr #z = round((2^13)*sin(cumsum(v))) ### insert click at errors #z[ierrors] <- 32000 #u.samp = Wave(z, samp.rate = sr, bit=16) # make wave struct #play(u.samp) ### Audio of endearing young charms, with or without mistakes ### bad (these don't get recognized well by the HMM) w = readWave(paste(path, "final_project/bugs2001.wav", sep="" )) w = readWave(paste(path, "final_project/bugs1001.wav", sep="" )) ### maybe (these kind of get recognized well by the HMM, with mistakes ### that get labeled as player errors by my code) w = readWave(paste(path, "final_project/travis001good.wav", sep="" )) w = readWave(paste(path, "final_project/travis002good.wav", sep="" )) ### good (these get recognized well by the HMM, and errors in them get caught ### (though there are a few identification errors labeled as player errors ### in these samples)) w = readWave(paste(path, "final_project/travislow003.wav", sep="" )) w = readWave(paste(path, "final_project/decent001err2.wav", sep="" )) w = readWave(paste(path, "final_project/travismistake003.wav", sep="" )) w = readWave(paste(path, "final_project/travismistake006.wav", sep="" )) w = readWave(paste(path, "final_project/yosemite1001.wav", sep="" )) w = readWave(paste(path, "final_project/travislow004.wav", sep="" )) ### This project is a modification of the HMM code provided as the solution to HW6. ### http://www.music.informatics.indiana.edu/courses/I547/hw6_sln.r ### or ### http://enfascination.com/forever/xylophonegag/hw6_sln.r ### I will remove all of the original comments from the original code to high- ### light my own comments. For the unfamiliar, this may obscure the workings ### of the HMM. ### Extract some parameters from the audio bits = w@bit sr = w@samp.rate y = w@left #y = y[30000:1000000] ### take subset of audio, for faster testing u = Wave(y, samp.rate = sr, bit=bits) ### preserve original u.orig = Wave(y, samp.rate = sr, bit=bits) #play(u.orig) ### Other Important parameters lop = 59 # the lowest possible pitch hip = 96 # the highest possible pitch N = 1024 ### the frame length skiplen = N/2 frames = floor(length(y)/skiplen)-1 ### The number of frames, given N notes = length(lop:hip) ### Number of notes represented ### minimum number of frames constituting a played note (must be small ### for realistic run times) state_per_note = 4 ### minimum number of frames constituting a played error state_per_error <- 8 ### Transition probability matrix (almost all borrowed code) trans = matrix(0,notes*state_per_note,notes*state_per_note) for (i in 1:ncol(trans)){ if (i%%state_per_note>0){ trans[i,i] = 1 trans[i,i+1] = 1 } } ### I had some problems with identifying a note in the wrong octave. ### The code below makes note changes within an octave more likely ### than these more than 6 half steps away, in either direction. ### Maybe useless, haven't done a thorough error analysis. close_note_neighborhood <- 6 for (i in 1:notes) { for (j in 1:notes) { if (i!=j) { if (abs(i-j)<close_note_neighborhood) { trans[i*state_per_note,(j-1)*state_per_note+1] = 2 } else { trans[i*state_per_note,(j-1)*state_per_note+1] = 1 } } } } ### Normalize transition matrix for (i in 1:ncol(trans)) trans[i,]=trans[i,]/sum(trans[i,]) ####### Creating the emission probability function ####### ### create templates for each note. wid = 1 decay = 4 tplt = matrix(.01,hip,N/2) for (j in lop:hip) { f = 440*2^((j-69)/12) b = f*N/sr for (h in 1:15) { if (j == lop) next # lop is the rest = flat template tplt[j,] = tplt[j,] + 2^(-h/decay)*dnorm(1:(N/2),mean=b*h,sd = wid) } tplt[j,] = tplt[j,]/sum(tplt[j,]) # plot(tplt[j,],type='l') # scan() # wait for human to type key } t = seq(from=0,by=2*pi/N,length=N) win = (1 + cos(t-pi))/2 #### The function of computing the logorithm emission probabilities log_emit = function (observation,cs,energy) { if (energy > 50000000){ #### this is how to classify rest Y = fft(win*observation) log_emit = sum(log(tplt[floor((cs-1)/state_per_note)+lop,])*Mod(Y[1:(N/2)])) } else{ #### if the energy is less than a certain value (e.g. 50000000) the only possible states can generating this observation are rest states. log_emit = -Inf if (cs<=state_per_note) log_emit = 0 } log_emit } ### This is the core of the recognition ### All of this is untouched. Error detection doesn't start until after recog- ### nition and backtracking ### XXXXXXXXX ####### Dynamic programming ####### ### creating dynamic programming matrix lat = matrix(-Inf,ncol(trans),frames) # the array that holds the dynamic programming scores best = matrix(0,ncol(trans),frames) # the array of best predecessors. ### initialize lat[state_per_note*(1:notes)-(state_per_note-1),1] = 1/notes # possible start states: every first state of a note #### start computation for (j in 2:frames) { ### loop1: main loop of program (for all frames) s = (j-1)*skiplen +1 # start of frame observation = y[s:(s+N-1)] # the frame energy = sum(observation^2) ### cs = current_state ; ps = previous_state for (cs in 1:ncol(trans)) { ### loop2: loop for current state if (cs%%state_per_note==1) log_emit_value = log_emit(observation,cs,energy) for (ps in 1:ncol(trans)) { ### loop3: the possible predecessor states if (trans[ps,cs]==0) next temp = lat[ps,j-1] + log_emit_value + log(trans[ps,cs])# score by data model if (temp > lat[cs,j]){ lat[cs,j] = temp best[cs,j] = ps } }} } ####### Trace back ####### hat = rep(0,frames) # the optimal parse hat[frames] = which.max(lat[,frames]) # know the optimal last note for (j in frames:2) hat[j-1] = best[hat[j],j] # trace back optimal parse ### Visualize results hat = floor((hat-1)/state_per_note+lop) ##### every state_per_note states represent the same pitch hat.orig <- hat hat<- hat.orig plot(hat) ### XXXXXXXXX ### End of shameless copying, transitioning into slightly shameful copying ### Simplify this score (this project does not attempt timing problems, ### only hitting the wrong note) ### the best way to do this is to remove double notes and then make the ### system robust to silence, which is already represented in the HMM ### implementation above ### Removing doubles xlist <- c() for (i in 2:length(score)) { if (score[i] == score[i-1]) { xlist <- c(xlist, i) } } score <- score[-xlist] ### Find the key of the audio, and shift the whole score up or down to match it key <-0 for (i in (state_per_error+1):(frames-1)) { key.window <- hat[(i-state_per_error):i] if ((length(unique(key.window))==1) && ( unique(key.window) != lop)) { key <- key.window[1] break } } score.old <- score ### saving old unshifted score score <- score - (score[1] - key) ### Begin error detection/annotation ### detect errors while iterating through solution (the recognized audio ### representation "hat") and building waveform ### location in (index of) the score ### Current index of solution in the score (where ### we are in my representation of the song. iscore <- 1 ### A place to store the indices of errors. Where in the score ### was a mistake made ierrors <- c() ### For converting the unmarked-up and marked-up versions of the ### tune from midi representation into audio. t = 0 t.err<-0 in.error<-FALSE ### The error state. Starting in not-in-error state for (i in (state_per_error+1):(frames-1)) { ### This is 8 frames long, and I shift through it to find mistakes window <- hat[(i-state_per_error):i] ### I am allowed to be in lop (silence) or in the location of the ### score that I think I'm in. legal <- c(lop, score[iscore]) ### marked-up and non-marked-up pitch of current midi note. Identical ### but I'm keeping versions separate for comparison v.err = 2*pi*440.*2^((hat[i]-69)/12)/sr v = 2*pi*440.*2^((hat[i]-69)/12)/sr ### Silence if (hat[i] == lop) { v = 0; v.err <- 0 } ### If I am in an error, look for a return to the previous note, ### or the expected next note, or the one after that, just in case ### the expected next note get skipped. Also watch for silence, ### A sign that the player won't return to the previous note after ### this mistake. if (in.error){ ### If player returns to previous note, leave error state if (0 < length(intersect(c(score[i]), window))) { in.error <- FALSE } ### If the window enters one of the future pitches, leave error state and ### advance past current location in the score. else if (0 < length(intersect(c(lop, score[iscore+1], score[iscore+2]), window))) { in.error <- FALSE ### But, of course, don't advance in score if at end of score. if (iscore < length(score)) iscore <- iscore+1 } ### If I haven't left the error state, I am still in it and shall ### insert a high pitched screech to indicate recognition of error else { ierrors <- c(ierrors, iscore) v.err = 2*pi*440.*2^((hip-69)/12)/sr } ### If not in error state } else { ### If the current window has no silence or the expected note if (0 == length(intersect(legal, window))) { ### If the current window also doesn't have the next note ### enter the error state and log the location in ierror, ### the list of locations-in-error if (0 == length(intersect(c(legal, score[iscore+1]), window))) { ### Debugging output #print ("at") #print(i) #print(hat[i]) #print(iscore) #print("expect") #print(c(legal, score[iscore+1], score[iscore+2])) #print("in") #print(window) ierrors <- c(ierrors, iscore) in.error <- TRUE } ### If current window lacks silence and current note ### but has next note, advance in the score else { ### unless you are already at the end of the score if (iscore < length(score)) iscore <- iscore+1 } } } ### For conversion of short midirepresentation to a waveform suitable ### for conversion back to real audio t = c(t,rep(v,N/2)) t.err = c(t.err,rep(v.err,N/2)) } ### End of error detection/annotation ### create scaled waveform of both HMM representation of audio and ### version of audio annotated with "caught" mistakes z = round((2^13)*sin(cumsum(t))) z.caught = round((2^13)*sin(cumsum(t.err))) ### Convert both versions to .wav file u = Wave(z, samp.rate = sr, bit=16) # make wave struct u.caught = Wave(z.caught, samp.rate = sr, bit=16) # make wave struct ### show where in the score mistakes were made ierrors <- unique(ierrors) print("the score") score print("the indices of errors") ierrors print("the notes in the score corresponding to those indices") score[ierrors] ### Play original audio, than recognized audio, then error-annotated audio play(u.orig) play(u) play(u.caught) ### For writing audio to file #writeWave(u, "~/Desktop/projecto/music_processing/final_project/Good_one_err11_01_frame10_hmm_bad.wav")