/************************************************************** * Algorithmische Bioinformatik - WS 04/05 * * Assignment 1: Compute a dotplot for 2 sequences. * * Sample solution by: Ben Rich (rich@mi.fu-berlin.de) * * To compile: * > gcc -o dotplot dotplot.c * * To run: * > ./dotplot * *************************************************************/ #include #include #include #define MAX_LENGTH 1000 char A[MAX_LENGTH]; char B[MAX_LENGTH]; char matrix[MAX_LENGTH][MAX_LENGTH]; int m, n; int w, k; void dotplot_naive(); void dotplot(); void print_matrix(); inline int min(int a, int b) { return (a <= b ? a : b); } inline int max(int a, int b) { return (a >= b ? a : b); } /********************************* * Funtion: main() * Program execution starts here. *********************************/ int main() { FILE *in = NULL; in = fopen("input.txt", "r"); if (in == NULL) { fprintf(stderr, "Hey! Where's the input file?\n"); exit(1); } fscanf(in, "%s", A); fscanf(in, "%s", B); fscanf(in, "%i", &w); fscanf(in, "%i", &k); m = strlen(A); n = strlen(B); //dotplot_naive(); dotplot(); print_matrix(); return 0; } /********************************* * Funtion: dotplot_naive() * Compute a dotplot for sequences A and B in time O(m.n.w) *********************************/ void dotplot_naive() { int i, j, l; int count = 0; for (i=0; i < m - w + 1; ++i) { for (j=0; j < n - w + 1; ++j) { count = 0; for (l=0; l < w; ++l) { if (A[i + l] == B[j + l]) ++count; } matrix[i][j] = (count >= k ? 1 : 0); } } } /********************************* * Funtion: dotplot() * Compute a dotplot for sequences A and B in time O(m.n) * * The key observation is that when the window is shifted by * 1 in both sequences, we not need to recompute the matches * in the entire window because most of the comparisons will * be the same as before. *********************************/ void dotplot() { int i, j, l, d; int count = 0; // d = j - i defines a diagonal in the matrix. for (d = -(m - w); d <= (n - w); ++d) { // The first position on the diagonal. i = max(-d, 0); j = max(d, 0); // Count the number of matches in the first window along the diagonal. count = 0; for (l=0; l < w; ++l) { if (A[i + l] == B[j + l]) ++count; } matrix[i][j] = (count >= k ? 1 : 0); // Advance the window by 1 in both sequences. // Don't exceed the bounds of the matrix. ++i; ++j; while (i <= m - w && j <= n - w) { if (A[i - 1] == B[j - 1]) --count; if (A[i + w - 1] == B[j + w - 1]) ++count; matrix[i][j] = (count >= k ? 1 : 0); ++i; ++j; } } } /********************************* * Funtion: print_matrix() * Prints the dotplot to stdout *********************************/ void print_matrix() { int i, j; for (i=0; i < m - w + 1; ++i) { for (j=0; j < n - w + 1; ++j) { printf("%i ", matrix[i][j]); } printf("\n"); } }