### Seth Frey
### Error Detection, applied in this case to the Irish tune typical of the
### Looney Tunes Xylophone Gag
### Class Project for Chris Rafael's Music Procesing, INFO-I547 at IU
### Spring 2010 (4/26/10)
### Boilerplate
rm( list=ls() )
library(tuneR)
setWavPlayer("/usr/local/bin/playRWave")
path <- "~/Desktop/projecto/music_processing/"
### useful library for error analysis
#source(paste(path, "code/spectrogram.r", sep="" ), echo=FALSE)
### The score
### endearing young charms in MIDI
#s_notes<- c(E4, D4, C4, D4, C4, C4, E4, G4, F4, A4, C5, C5, B4, A4)
eyc_orig_notes <- c(64, 62, 60, 62, 60, 60, 64, 67, 65, 69, 72, 72)#, 71, 69)
score <- eyc_orig_notes
timing<- c(.25, .25, .75,.25, 0.5,0.5,0.5 , 0.5 , 0.5 , 0.5 , 0.5 , .75,.25,.25)
### test the above representation of the score:
### scale timing info
#timing<- timing /sum(timing)
### less than two seconds
#duration<- 150000
#sample <-c()
#for (i in 1:length(score)){ sample<-c(sample, rep(score[i], timing[i]*duration))}
#v = 2*pi*440.*2^((sample-69)/12)/sr
#z = round((2^13)*sin(cumsum(v)))
### insert click at errors
#z[ierrors] <- 32000
#u.samp = Wave(z, samp.rate = sr, bit=16) # make wave struct
#play(u.samp)
### Audio of endearing young charms, with or without mistakes
### bad (these don't get recognized well by the HMM)
w = readWave(paste(path, "final_project/bugs2001.wav", sep="" ))
w = readWave(paste(path, "final_project/bugs1001.wav", sep="" ))
### maybe (these kind of get recognized well by the HMM, with mistakes
### that get labeled as player errors by my code)
w = readWave(paste(path, "final_project/travis001good.wav", sep="" ))
w = readWave(paste(path, "final_project/travis002good.wav", sep="" ))
### good (these get recognized well by the HMM, and errors in them get caught
### (though there are a few identification errors labeled as player errors
### in these samples))
w = readWave(paste(path, "final_project/travislow003.wav", sep="" ))
w = readWave(paste(path, "final_project/decent001err2.wav", sep="" ))
w = readWave(paste(path, "final_project/travismistake003.wav", sep="" ))
w = readWave(paste(path, "final_project/travismistake006.wav", sep="" ))
w = readWave(paste(path, "final_project/yosemite1001.wav", sep="" ))
w = readWave(paste(path, "final_project/travislow004.wav", sep="" ))
### This project is a modification of the HMM code provided as the solution to HW6.
### http://www.music.informatics.indiana.edu/courses/I547/hw6_sln.r
### or
### http://enfascination.com/forever/xylophonegag/hw6_sln.r
### I will remove all of the original comments from the original code to high-
### light my own comments. For the unfamiliar, this may obscure the workings
### of the HMM.
### Extract some parameters from the audio
bits = w@bit
sr = w@samp.rate
y = w@left
#y = y[30000:1000000] ### take subset of audio, for faster testing
u = Wave(y, samp.rate = sr, bit=bits)
### preserve original
u.orig = Wave(y, samp.rate = sr, bit=bits)
#play(u.orig)
### Other Important parameters
lop = 59 # the lowest possible pitch
hip = 96 # the highest possible pitch
N = 1024 ### the frame length
skiplen = N/2
frames = floor(length(y)/skiplen)-1 ### The number of frames, given N
notes = length(lop:hip) ### Number of notes represented
### minimum number of frames constituting a played note (must be small
### for realistic run times)
state_per_note = 4
### minimum number of frames constituting a played error
state_per_error <- 8
### Transition probability matrix (almost all borrowed code)
trans = matrix(0,notes*state_per_note,notes*state_per_note)
for (i in 1:ncol(trans)){
if (i%%state_per_note>0){
trans[i,i] = 1
trans[i,i+1] = 1
}
}
### I had some problems with identifying a note in the wrong octave.
### The code below makes note changes within an octave more likely
### than these more than 6 half steps away, in either direction.
### Maybe useless, haven't done a thorough error analysis.
close_note_neighborhood <- 6
for (i in 1:notes) {
for (j in 1:notes) {
if (i!=j) {
if (abs(i-j)<close_note_neighborhood) {
trans[i*state_per_note,(j-1)*state_per_note+1] = 2
}
else {
trans[i*state_per_note,(j-1)*state_per_note+1] = 1
}
}
}
}
### Normalize transition matrix
for (i in 1:ncol(trans))
trans[i,]=trans[i,]/sum(trans[i,])
####### Creating the emission probability function #######
### create templates for each note.
wid = 1
decay = 4
tplt = matrix(.01,hip,N/2)
for (j in lop:hip) {
f = 440*2^((j-69)/12)
b = f*N/sr
for (h in 1:15) {
if (j == lop) next # lop is the rest = flat template
tplt[j,] = tplt[j,] + 2^(-h/decay)*dnorm(1:(N/2),mean=b*h,sd = wid)
}
tplt[j,] = tplt[j,]/sum(tplt[j,])
# plot(tplt[j,],type='l')
# scan() # wait for human to type key
}
t = seq(from=0,by=2*pi/N,length=N)
win = (1 + cos(t-pi))/2
#### The function of computing the logorithm emission probabilities
log_emit = function (observation,cs,energy) {
if (energy > 50000000){ #### this is how to classify rest
Y = fft(win*observation)
log_emit = sum(log(tplt[floor((cs-1)/state_per_note)+lop,])*Mod(Y[1:(N/2)]))
}
else{ #### if the energy is less than a certain value (e.g. 50000000) the only possible states can generating this observation are rest states.
log_emit = -Inf
if (cs<=state_per_note) log_emit = 0
}
log_emit
}
### This is the core of the recognition
### All of this is untouched. Error detection doesn't start until after recog-
### nition and backtracking
### XXXXXXXXX
####### Dynamic programming #######
### creating dynamic programming matrix
lat = matrix(-Inf,ncol(trans),frames) # the array that holds the dynamic programming scores
best = matrix(0,ncol(trans),frames) # the array of best predecessors.
### initialize
lat[state_per_note*(1:notes)-(state_per_note-1),1] = 1/notes # possible start states: every first state of a note
#### start computation
for (j in 2:frames) { ### loop1: main loop of program (for all frames)
s = (j-1)*skiplen +1 # start of frame
observation = y[s:(s+N-1)] # the frame
energy = sum(observation^2)
### cs = current_state ; ps = previous_state
for (cs in 1:ncol(trans)) { ### loop2: loop for current state
if (cs%%state_per_note==1) log_emit_value = log_emit(observation,cs,energy)
for (ps in 1:ncol(trans)) { ### loop3: the possible predecessor states
if (trans[ps,cs]==0) next
temp = lat[ps,j-1] + log_emit_value + log(trans[ps,cs])# score by data model
if (temp > lat[cs,j]){
lat[cs,j] = temp
best[cs,j] = ps
}
}}
}
####### Trace back #######
hat = rep(0,frames) # the optimal parse
hat[frames] = which.max(lat[,frames]) # know the optimal last note
for (j in frames:2) hat[j-1] = best[hat[j],j] # trace back optimal parse
### Visualize results
hat = floor((hat-1)/state_per_note+lop) ##### every state_per_note states represent the same pitch
hat.orig <- hat
hat<- hat.orig
plot(hat)
### XXXXXXXXX
### End of shameless copying, transitioning into slightly shameful copying
### Simplify this score (this project does not attempt timing problems,
### only hitting the wrong note)
### the best way to do this is to remove double notes and then make the
### system robust to silence, which is already represented in the HMM
### implementation above
### Removing doubles
xlist <- c()
for (i in 2:length(score)) {
if (score[i] == score[i-1]) {
xlist <- c(xlist, i)
}
}
score <- score[-xlist]
### Find the key of the audio, and shift the whole score up or down to match it
key <-0
for (i in (state_per_error+1):(frames-1)) {
key.window <- hat[(i-state_per_error):i]
if ((length(unique(key.window))==1) && (
unique(key.window) != lop)) {
key <- key.window[1]
break
}
}
score.old <- score ### saving old unshifted score
score <- score - (score[1] - key)
### Begin error detection/annotation
### detect errors while iterating through solution (the recognized audio
### representation "hat") and building waveform
### location in (index of) the score
### Current index of solution in the score (where
### we are in my representation of the song.
iscore <- 1
### A place to store the indices of errors. Where in the score
### was a mistake made
ierrors <- c()
### For converting the unmarked-up and marked-up versions of the
### tune from midi representation into audio.
t = 0
t.err<-0
in.error<-FALSE ### The error state. Starting in not-in-error state
for (i in (state_per_error+1):(frames-1)) {
### This is 8 frames long, and I shift through it to find mistakes
window <- hat[(i-state_per_error):i]
### I am allowed to be in lop (silence) or in the location of the
### score that I think I'm in.
legal <- c(lop, score[iscore])
### marked-up and non-marked-up pitch of current midi note. Identical
### but I'm keeping versions separate for comparison
v.err = 2*pi*440.*2^((hat[i]-69)/12)/sr
v = 2*pi*440.*2^((hat[i]-69)/12)/sr
### Silence
if (hat[i] == lop) { v = 0; v.err <- 0 }
### If I am in an error, look for a return to the previous note,
### or the expected next note, or the one after that, just in case
### the expected next note get skipped. Also watch for silence,
### A sign that the player won't return to the previous note after
### this mistake.
if (in.error){
### If player returns to previous note, leave error state
if (0 < length(intersect(c(score[i]), window))) {
in.error <- FALSE
}
### If the window enters one of the future pitches, leave error state and
### advance past current location in the score.
else if (0 < length(intersect(c(lop, score[iscore+1], score[iscore+2]), window))) {
in.error <- FALSE
### But, of course, don't advance in score if at end of score.
if (iscore < length(score)) iscore <- iscore+1
}
### If I haven't left the error state, I am still in it and shall
### insert a high pitched screech to indicate recognition of error
else {
ierrors <- c(ierrors, iscore)
v.err = 2*pi*440.*2^((hip-69)/12)/sr
}
### If not in error state
} else {
### If the current window has no silence or the expected note
if (0 == length(intersect(legal, window))) {
### If the current window also doesn't have the next note
### enter the error state and log the location in ierror,
### the list of locations-in-error
if (0 == length(intersect(c(legal, score[iscore+1]), window))) {
### Debugging output
#print ("at")
#print(i)
#print(hat[i])
#print(iscore)
#print("expect")
#print(c(legal, score[iscore+1], score[iscore+2]))
#print("in")
#print(window)
ierrors <- c(ierrors, iscore)
in.error <- TRUE
}
### If current window lacks silence and current note
### but has next note, advance in the score
else {
### unless you are already at the end of the score
if (iscore < length(score)) iscore <- iscore+1
}
}
}
### For conversion of short midirepresentation to a waveform suitable
### for conversion back to real audio
t = c(t,rep(v,N/2))
t.err = c(t.err,rep(v.err,N/2))
}
### End of error detection/annotation
### create scaled waveform of both HMM representation of audio and
### version of audio annotated with "caught" mistakes
z = round((2^13)*sin(cumsum(t)))
z.caught = round((2^13)*sin(cumsum(t.err)))
### Convert both versions to .wav file
u = Wave(z, samp.rate = sr, bit=16) # make wave struct
u.caught = Wave(z.caught, samp.rate = sr, bit=16) # make wave struct
### show where in the score mistakes were made
ierrors <- unique(ierrors)
print("the score")
score
print("the indices of errors")
ierrors
print("the notes in the score corresponding to those indices")
score[ierrors]
### Play original audio, than recognized audio, then error-annotated audio
play(u.orig)
play(u)
play(u.caught)
### For writing audio to file
#writeWave(u, "~/Desktop/projecto/music_processing/final_project/Good_one_err11_01_frame10_hmm_bad.wav")