This is a Bash solution, as allowed me to publish, despite the explicit request asking to use awk
and sed
:
show_genes()
{
local filename="$1"
while read -ra larr; do
if [[ ${larr[2]} = gene ]]; then
larr[8]="${larr[8]%%;*}"
larr[8]="${larr[8]#ID=}"
printf '%s\n' "${larr[*]}"
fi
done < "$filename"
}
Usage: show_genes /path/to/some/file.txt
Sample output:
[rany$] cat data.txt
romosome 1 249213345 . . . ID=chr1;Name=chr1
chr1 GTF2GFF gene 11874 14408 . + . ID=DDX11L1;Note=unknown;Name=DDX11L1
chr1 GTF2GFF exon 11874 12227 . + . Parent=NR_046018_1
chr1 GTF2GFF exon 12613 12721 . + . Parent=NR_046018_1
chr1 GTF2GFF exon 13221 14408 . + . Parent=NR_046018_1
chr1 GTF2GFF gene 14362 29370 . - . ID=WASH7P;Note=unknown;Name=WASH7P
chr1 GTF2GFF exon 14362 14829 . - . Parent=NR_024540
chr1 GTF2GFF exon 14970 15038 . - . Parent=NR_024540
chr1 GTF2GFF exon 15796 15947 . - . Parent=NR_024540
chr1 GTF2GFF exon 16607 16765 . - . Parent=NR_024540
chr1 GTF2GFF exon 16858 17055 . - . Parent=NR_024540
chr1 GTF2GFF exon 17233 17368 . - . Parent=NR_024540
chr1 GTF2GFF exon 17606 17742 . - . Parent=NR_024540
chr1 GTF2GFF exon 17915 18061 . - . Parent=NR_024540
chr1 GTF2GFF exon 18268 18366 . - . Parent=NR_024540
chr1 GTF2GFF exon 24738 24891 . - . Parent=NR_024540
chr1 GTF2GFF exon 29321 29370 . - . Parent=NR_024540
chr1 GTF2GFF gene 34611 36081 . - . ID=FAM138A;Note=unknown;Name=FAM138A
chr1 GTF2GFF exon 34611 35174 . - . Parent=NR_026818
chr1 GTF2GFF exon 35277 35481 . - . Parent=NR_026818
[rany$] show_genes data.txt
chr1 GTF2GFF gene 11874 14408 . + . DDX11L1
chr1 GTF2GFF gene 14362 29370 . - . WASH7P
chr1 GTF2GFF gene 34611 36081 . - . FAM138A
[rany$]