#!/bin/bash

# NOTE:
# clustalw2_sleep has been replaced by clustalw2 in some cases
# in order to increase the speed of this script
# some other changes have been made


##############
# Exercise 1 #
##############

# The objective of this exercise is to familiarize yourself with the most
# used commands (cd, ls, mkdir), move across the directory tree and see 
# the differences of using relative and absolute paths to refer to files.

# we make sure that we start this exercice in our home directory
cd

# this command shows us where we are in the directory tree
pwd

# we create a directory
mkdir pract_so/

# and another inside the previously created one
mkdir pract_so/WORK

# we enter in this directory
cd pract_so/

# we check were we are again
pwd

# we copy a directory and all its contents to our present working directory. We are using the environment variable $USER to refer to our user, whichever it is
cp -r /home/$USER/LINUX_CLI_EXERCISES/SEQS .

# we list the contents of the directory
ls

# we enter in the WORK directory
cd WORK

# check, if you want, were you are

# we link a program to here, if we want to keep the name, only the source file is needed as an argument
ln -s /home/$USER/LINUX_CLI_EXERCISES/clustalw2_sleep

# we link a file using an absolute path creating it with a different name
ln -s /home/$USER/pract_so/SEQS/LexA_all.fst LexA_all.fst_1

# we link a file using a relative path creating it with a different name
ln -s ../SEQS/LexA_all.fst LexA_all.fst_2

# we list the contents of the directory in a long format where we can see were the links point to
ls -l

# ASK OSCAR IF THIS IS CORRECT BEFORE CONTINUE

# we go to the parent directory
cd ..

# we go to the parent directory so now we are in our home (you don't believe me? check it!)
cd ..

# we change the name of the directory where we've been working
mv pract_so/ PRACT_SO

# we list the contents of our home directory so we can see that the name has change
ls

# we go to the WORK directory
cd PRACT_SO/WORK/

# we list the directory so we can see what has happened to our previously created links. One of them is not working. Do you understand why?
ls -l

# we change the name of the working file and  we remove the not working one.
# we can execute 2 independent commands in the same line using " ; "
mv LexA_all.fst_2 LexA_all.fst ; rm LexA_all.fst_1

# we go to our home directory
cd

##############
# Exercise 2 #
##############

# The objective of this exercise is to learn that some programs take some 
# time to complete their job, which are the differences between running 
# them in background or foreground and how this running programs, also 
# called process, can be monitored, stopped and terminated.


# NOTE: everything inside two " % " must not be typed literally, its meaning must be typed.

# we go to the previously created WORK directory
cd PRACT_SO/WORK/

# we try to execute clustal2_sleep, a multiple sequence alignment program
#clustalw2_sleep -ALIGN -OUTFILE=Lex_A.all.aln -INFILE=LexA_all.fst
# clustalw2_sleep did not execute because our current directory is not in our path

#we check our path
pwd

#we check which directories are in PATH
echo $PATH

# we can execute clustalw2_sleep using an absolute PATH
# /home/$USER/PRACT_SO/WORK/clustalw2_sleep  -ALIGN -OUTFILE=Lex_A.all.aln -INFILE=LexA_all.fst
# but we execute it using a relative PATH which is easier (by now you should understand the "./" and why now clustalw_sleep works
#./clustalw2_sleep -ALIGN -OUTFILE=Lex_A.all.aln -INFILE=LexA_all.fst
# we press "ENTER" and observe how the terminal seems dead
# we press "Ctrl z" and observe how we recover the terminal but we see this mesage "[1]+  Stopped"
# this means that our program has paused execution
/home/ocs/LINUX_CLI_EXERCISES/clustalw2_sleep -ALIGN -OUTFILE=Lex_A.all.aln -INFILE=LexA_all.fst &



# we execute "ps" in order to see which process are running in our shell and their "pid"
# pid = Process Identifier
# ppid = PARENT Process Identifier
ps -f

#compare clustalw2_sleep PPID with the bash PID. This bash is the parent of clustalw2_sleep. This bash is the program in execution that you are using to intereact with the OS.

# we send our process to the background so the execution continues and we still can use the terminal
bg

# if the program ends here repeat: execute it again, "Ctrl z", bg, ps -f

# we try to see how many resources our program is using in real time
# to refresh "top" screen use space bar
#top -p %clustal2_sleep pid%
ps |grep clustalw|cut -d " " -f 1|xargs -i top -bn1 -p {}

# we execute the process in the background adding the "&" at the end
/home/ocs/LINUX_CLI_EXERCISES/clustalw2 -ALIGN -OUTFILE=Lex_A.all.aln -INFILE=LexA_all.fst &
# press enter to see that you have recover the prompt

# we send it to the foreground
fg

# we kill it pressing Ctrl c

# we execute the process in the background again
./clustalw2_sleep -ALIGN -OUTFILE=Lex_A.all.aln -INFILE=LexA_all.fst &

# we execute ps to see its pid (observe that the pid is different)
ps -f

# we kill it using "kill"
#kill %clustal2_sleep pid%
ps |grep clustalw|cut -d " " -f 1|xargs -i kill {}

# we execute ps to see that clustal2_sleep is not there anymore
ps -f

# we return to our $HOME
cd
##############
# Exercise 4#
##############

# The objective of this exercise is to understand how pipes work and how to 
# use them

# NOTE: We are using clustalw2_sleep in this exercisse since is the one previously used. In the original directory you have the original clustalw without the sleep, You can use it instead if you want / can

# we go to the home directory
cd
# we go to the SEQS directory
cd PRACT_SO/SEQS/

# we remove LexA_all.fst
rm LexA_all.fst

# we create it again from the individual sequence files

# we can see the contents of all the files that have ".fst" at the end of their names by sending the contents to the stdout (in this case the screen)
cat *.fst

# we send the contents of all the files that have ".fst" at the end of their names to the stdout and we redirect the stdout to a file
cat *.fst >LexA_all.fsta

# we take a look to the files
less LexA_all.fsta
less Eco.fst
less Det.fst

# now we are going to execute clustalw2_sleep in interactive mode
clustalw2_sleep
# we choose option 1 to enter the sequences from disk
#1
# we type the file with our sequences:
LexA_all.fsta
#we choose option
#2
# we do the multiple alignment choosing option
#1
# we accept default alignment output file pressing ENTER

# we accept default tree output file pressing ENTER

# we press ENTER again to see the whole alignment

# ENTER again to continue

# to exit the menu we type
#X
# and to exit the program
#X


#we take a look to the output alignment
less LexA_all.aln

#we remove all output files
rm LexA_all.dnd LexA_all.aln

# now we take a look to this file /home/$USER/LINUX_CLI_EXERCISES/clustw_stdin.txt
cat /home/$USER/LINUX_CLI_EXERCISES/clustw_stdin.txt
#Does its contents rings a bell to you?

# we execute clustalw2_sleep redirecting stdin to this file
/home/$USER/LINUX_CLI_EXERCISES/clustalw2 </home/$USER/LINUX_CLI_EXERCISES/clustw_stdin.txt

#we execute ls -ltr to see all files sorted by creation date
ls -ltr

#which new files have been created? what is inside them? What Just Happened?

#what is the output of this command
grep Bsu LexA_all.aln

#and this one:
ps -ef|grep $USER
#and this one:
ps -ef|less





# what this line does if you execute it:
#grep ">" LexA_all.fsta |cut -d ">" -f 2|xargs -i bash -c "echo \>{};grep {} LexA_all.aln"|awk '{if (NF==2) print $2;if(NF==1) print $1}' > LexA_all.aln.fsta


# execute it command by command to understad what it does

grep ">" LexA_all.fsta 
grep ">" LexA_all.fsta |cut -d ">" -f 2
grep ">" LexA_all.fsta |cut -d ">" -f 2|xargs -i bash -c "echo \>{};grep {} LexA_all.aln"
grep ">" LexA_all.fsta |cut -d ">" -f 2|xargs -i bash -c "echo \>{};grep {} LexA_all.aln"|awk '{if (NF==2) print $2;if(NF==1) print $1}' 
grep ">" LexA_all.fsta |cut -d ">" -f 2|xargs -i bash -c "echo \>{};grep {} LexA_all.aln"|awk '{if (NF==2) print $2;if(NF==1) print $1}' > LexA_all.aln.fsta

