/* * A simple program to blend two RGBA 5551 layers using MMX/SSE2 instructions * * Copyright (C) 2007 Damien Lespiau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include #include #include #include #include #define likely(x) __builtin_expect((x),1) #define unlikely(x) __builtin_expect((x),0) #define TILE_SZ 512 /* * Options */ static gchar *opt_fusion_method = "mmx"; static gchar *opt_bottom_layer = "water.gif"; static gchar *opt_top_layer = "roads.gif"; static gint opt_verbose = FALSE; static GOptionEntry mmx_options[] = { { "bottom-layer", 'b', 0, G_OPTION_ARG_STRING, &opt_bottom_layer, "use specified file as bottom layer (water.gif*)", "file"}, { "top-layer", 't', 0, G_OPTION_ARG_STRING, &opt_top_layer, "use specified file as top layer (roads.gif*)", "file"}, { "fusion-method", 'm', 0, G_OPTION_ARG_STRING, &opt_fusion_method, "select fusion method (mmx*|nommx|sse2)", "type" }, { "verbose", 'v', 0, G_OPTION_ARG_NONE, &opt_verbose, "turn on a few printf", NULL }, { NULL } }; /* * Tiles */ struct tile { guint16 *data; gchar *name; }; static inline struct tile *tile_new(char *name, guint width, guint height) { struct tile *t; t = g_new(struct tile, 1); t->name = g_strdup(name); /* align t->data on a 16 bytes boundary */ t->data = g_new(guint16, width * height + 16); t->data = (guint16*)((GPOINTER_TO_UINT(t->data) + 0xf) & ~0xf); return t; } static inline void tile_free(struct tile *t) { if (!t) return; if (t->name) g_free(t->name); if (t->data) g_free(t->data); g_free(t); } static struct tile *tile_new_from_pixbuf(GdkPixbuf *pixbuf, char *name) { struct tile *t; int width, height, rowstride, n_channels, x ,y; guchar *pixels, *p; g_assert(pixbuf); g_assert(gdk_pixbuf_get_colorspace(pixbuf) == GDK_COLORSPACE_RGB); g_assert(gdk_pixbuf_get_bits_per_sample(pixbuf) == 8); g_assert(gdk_pixbuf_get_has_alpha(pixbuf)); n_channels = gdk_pixbuf_get_n_channels(pixbuf); g_assert(n_channels == 4); width = gdk_pixbuf_get_width(pixbuf); height = gdk_pixbuf_get_height(pixbuf); g_assert(width == TILE_SZ); g_assert(height == TILE_SZ); rowstride = gdk_pixbuf_get_rowstride(pixbuf); pixels = gdk_pixbuf_get_pixels(pixbuf); t = tile_new(name, width, height); /* GdkPixbuf -> RGBA 5551 */ for (y = 0; y < height; y++) for (x = 0; x < width; x++) { /* little endian: * p[0] = R | p[1] = G | p[2] = B | p[3] = A */ p = pixels + y * rowstride + x * n_channels; t->data[TILE_SZ * y + x] = ((p[0] << 8) & 0xf800) | ((p[1] << 3) & 0x07c0) | ((p[2] >> 2) & 0x003e); /* set alpha is needed */ if (p[3]) t->data[TILE_SZ * y + x] |= 0x1; } return t; } static GdkPixbuf *pixbuf_new_from_tile(struct tile *t) { GdkPixbuf *pixbuf; int rowstride, n_channels, x, y; guchar *pixels, *p; pixbuf = gdk_pixbuf_new(GDK_COLORSPACE_RGB, TRUE, 8, TILE_SZ, TILE_SZ); n_channels = gdk_pixbuf_get_n_channels(pixbuf); g_assert(n_channels == 4); rowstride = gdk_pixbuf_get_rowstride(pixbuf); pixels = gdk_pixbuf_get_pixels(pixbuf); for (y = 0; y < TILE_SZ; y++) for (x = 0; x < TILE_SZ; x++) { p = pixels + y * rowstride + x * n_channels; p[0] = (t->data[TILE_SZ * y + x] & 0xf800) >> 8; p[1] = (t->data[TILE_SZ * y + x] & 0x07c0) >> 3; p[2] = (t->data[TILE_SZ * y + x] & 0x003e) << 2; p[3] = (t->data[TILE_SZ * y + x] & 0x0001) * 0xff; } return pixbuf; } /* * Counter */ struct counter { guint32 low, high; }; #ifdef ARCH_X86_64 static inline guint64 counter_read_time(void) { guint64 a, d; __asm__ __volatile__ ( "rdtsc\n\t" : "=a" (a), "=d" (d) #if 0 : /* no input */ : "%rdx", "%rax" #endif ); return (d << 32) | (a & 0xffffffff); } #elif ARCH_X86 static inline guint64 counter_read_time(void) { guint64 l; __asm__ __volatile__ ( "rdtsc\n\t" : "=A" (l) #if 0 : /* no input */ : "%edx", "%eax" #endif ); return l; } #endif static inline guint64 counter_elapsed(guint64 t0, guint64 t1) { return t1 - t0; } /* * Fusion */ struct fusion_ops { guint64 (*fusion)(struct tile *bottom, struct tile *top); }; static guint64 tile_fusion_nommx(struct tile *bottom, struct tile *top) { struct tile *t; guint64 start, end; int x, y, pos; t = tile_new("fused layer", TILE_SZ, TILE_SZ); start = counter_read_time(); for (y = 0; y < TILE_SZ; y++) { pos = TILE_SZ * y; for (x = 0; x < TILE_SZ; x++) { if (unlikely(top->data[pos] & 1)) bottom->data[pos] = top->data[pos]; pos++; } } end = counter_read_time(); return counter_elapsed(start, end); } static struct fusion_ops nommx_fusion = { .fusion = tile_fusion_nommx }; static const guint64 alpha_mask[2] __attribute__ ((aligned(16))) = {0x0001000100010001ULL, 0x0001000100010001ULL}; static guint64 tile_fusion_mmx(struct tile *bottom, struct tile *top) { guint64 start, end; guint64 nb_pixels __attribute__ ((aligned(8))); union { guint64 *p64; guint16 *p16; } nb_pixels_p; guint16 *bottom_p = bottom->data; guint16 *top_p = top->data; int i; start = counter_read_time(); /* mm0: alpha_mask * mm1: top layer pixels * mm2: top layer pixels mask * mm3: bottom layer pixels * mm4: bottom layer pixels mask * mm5: count top layer pixels */ __asm__ __volatile__ ( "movq (%0), %%mm0 \n\t" /* put alpha mask in mm0 */ "pxor %%mm5, %%mm5 \n\t" : /* no output */ : "r"(&alpha_mask) ); for (i = 0; i < TILE_SZ * TILE_SZ / 4; i++) { __asm__ __volatile__ ( /* compute top layer mask */ "movq (%0), %%mm1 \n\t" /* 4 pixels in mm1 */ "movq %%mm1, %%mm2 \n\t" /* copy them in mm2 */ "pand %%mm0, %%mm2 \n\t" /* keep alpha channels */ "paddw %%mm2, %%mm5 \n\t" /* count top layer pixels */ "pcmpeqw %%mm0, %%mm2 \n\t" /* expand 0x0001 to 0xffff */ /* keep the right top pixels */ "pand %%mm2, %%mm1 \n\t" /* compute bottom layer mask */ "movq (%1), %%mm3 \n\t" "movq %%mm3, %%mm4 \n\t" "pand %%mm0, %%mm4 \n\t" "pcmpeqw %%mm0, %%mm4 \n\t" "pandn %%mm4, %%mm2 \n\t" /* keep the right bottom pixels */ "pand %%mm2, %%mm3 \n\t" /* fuse */ "paddw %%mm3, %%mm1 \n\t" /* copy result */ "movq %%mm1, (%1) \n\t" : /* no output */ : "r"(top_p), "r"(bottom_p) ); bottom_p += 4; top_p += 4; } __asm__ __volatile__ ( "movq %%mm5, (%0) \n\t" "emms \n\t" : /* no output */ : "r"(&nb_pixels) ); end = counter_read_time(); if (opt_verbose) { nb_pixels_p.p64 = &nb_pixels; guint16 *p = nb_pixels_p.p16; fprintf(stderr, "top layer has %d pixels\n", p[0] + p[1] + p[2] + p[3]); } return counter_elapsed(start, end); } static struct fusion_ops mmx_fusion = { .fusion = tile_fusion_mmx }; static guint64 tile_fusion_sse2(struct tile *bottom, struct tile *top) { guint64 start, end; guint64 nb_pixels[2] __attribute__ ((aligned(16))); union { guint64 *p64; guint16 *p16; } nb_pixels_p; guint16 *bottom_p = bottom->data; guint16 *top_p = top->data; int i; g_assert(!(GPOINTER_TO_UINT(bottom_p) & 0xF)); g_assert(!(GPOINTER_TO_UINT(top_p) & 0xF)); start = counter_read_time(); /* xmm0: alpha_mask * xmm1: top layer pixels * xmm2: top layer pixels mask * xmm3: bottom layer pixels * xmm4: bottom layer pixels mask * xmm5: count top layer pixels */ __asm__ __volatile__ ( "movdqa (%0), %%xmm0 \n\t" /* put alpha mask in mm0 */ "pxor %%xmm5, %%xmm5 \n\t" : /* no output */ : "r"(&alpha_mask) ); for (i = 0; i < TILE_SZ * TILE_SZ / 8; i++) { __builtin_prefetch(bottom_p + 8, 1, 2); __builtin_prefetch(top_p + 8, 0, 1); __asm__ __volatile__ ( /* compute top layer mask */ "movdqa (%0), %%xmm1 \n\t" /* 8 pixels in xmm1 */ "movdqa %%xmm1, %%xmm2 \n\t" /* copy them in xmm2 */ "pand %%xmm0, %%xmm2 \n\t" /* keep alpha channels */ "paddw %%xmm2, %%xmm5 \n\t" /* count top layer pixels */ "pcmpeqw %%xmm0, %%xmm2 \n\t" /* expand 0x0001 to 0xffff */ /* keep the right top pixels */ "pand %%xmm2, %%xmm1 \n\t" /* compute bottom layer mask */ "movdqa (%1), %%xmm3 \n\t" "movdqa %%xmm3, %%xmm4 \n\t" "pand %%xmm0, %%xmm4 \n\t" "pcmpeqw %%xmm0, %%xmm4 \n\t" "pandn %%xmm4, %%xmm2 \n\t" /* keep the right bottom pixels */ "pand %%xmm2, %%xmm3 \n\t" /* fuse */ "paddw %%xmm3, %%xmm1 \n\t" /* copy result */ "movdqa %%xmm1, (%1) \n\t" : /* no output */ : "r"(top_p), "r"(bottom_p) ); bottom_p += 8; top_p += 8; } __asm__ __volatile__ ( /* with i386-mingw32-gcc, __attribute__((aligned(16))) does * not seem to work with variables on the stack, use unligned * access here */ "movdqu %%xmm5, (%0) \n\t" : /* no output */ : "r"(nb_pixels) ); end = counter_read_time(); if (opt_verbose) { nb_pixels_p.p64 = nb_pixels; guint16 *p = nb_pixels_p.p16; fprintf(stderr, "top layer has %d pixels\n", p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]); } return counter_elapsed(start, end); } static struct fusion_ops sse2_fusion = { .fusion = tile_fusion_sse2 }; static struct fusion_ops *selected_fusion_ops; static void tile_fusion_select_type(struct fusion_ops *ops) { selected_fusion_ops = ops; } static inline guint64 tile_fusion(struct tile *bottom, struct tile *top) { guint64 time; g_assert(selected_fusion_ops); g_assert(bottom); g_assert(top); time = selected_fusion_ops->fusion(bottom, top); if (opt_verbose) fprintf(stderr, "nb cycles: %"G_GUINT64_FORMAT"\n", time); return time; } /* * UI */ #define UI_BOTTOM_LAYER 0x1 #define UI_TOP_LAYER 0x2 #define UI_FUSED_LAYER 0x3 static struct mmx_ui { GtkWidget *window; GtkWidget *hbox; GtkWidget *event_box; GtkWidget *bottom_layer; GtkWidget *top_layer; GtkWidget *fused_layer; GtkWidget *current_layer; /* either bottom, top or fused */ GtkWidget *popup; } app; static void ui_set_current_layer_by_widget(struct mmx_ui *ui, GtkWidget *next) { g_assert(ui); if (next == ui->current_layer) return; if (ui->current_layer) gtk_container_remove(GTK_CONTAINER(ui->event_box), ui->current_layer); gtk_container_add(GTK_CONTAINER(ui->event_box), next); ui->current_layer = next; gtk_widget_show(next); } static void ui_set_current_layer_by_type(struct mmx_ui *ui, gint type) { GtkWidget *next; g_assert(ui); switch(type) { case UI_BOTTOM_LAYER: next = ui->bottom_layer; break; case UI_TOP_LAYER: next = ui->top_layer; break; case UI_FUSED_LAYER: next = ui->fused_layer; break; default: g_assert_not_reached(); } ui_set_current_layer_by_widget(ui, next); } static void ui_destroy(GtkWidget *widget, gpointer data) { gtk_main_quit(); } static gboolean ui_on_popup_clicked(GtkWidget *widget, GdkEventButton *event, gpointer user_data) { ui_set_current_layer_by_widget(&app, user_data); return FALSE; } static void ui_popup_add_layer(GtkWidget *menu, gchar *name, gpointer data) { GtkWidget *w; w = gtk_check_menu_item_new_with_label(name); gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(w), app.current_layer == data); g_signal_connect(w, "button-release-event", G_CALLBACK(ui_on_popup_clicked), data); gtk_menu_shell_append(GTK_MENU_SHELL(menu), w); gtk_widget_show(w); } static void ui_do_popup_menu(GtkWidget *widget, GdkEventButton *event) { GtkWidget *menu; int button, event_time; menu = gtk_menu_new(); g_signal_connect(menu, "deactivate", G_CALLBACK(gtk_widget_destroy), NULL); ui_popup_add_layer(menu, "bottom layer", app.bottom_layer); ui_popup_add_layer(menu, "top layer", app.top_layer); ui_popup_add_layer(menu, "fused layer", app.fused_layer); if (event) { button = event->button; event_time = event->time; } else { button = 0; event_time = gtk_get_current_event_time (); } gtk_menu_attach_to_widget(GTK_MENU(menu), widget, NULL); gtk_menu_popup(GTK_MENU(menu), NULL, NULL, NULL, NULL, button, event_time); } static gboolean ui_clicked(GtkWidget *widget, GdkEventButton *event, gpointer user_data) { if (event->button == 3 && event->type == GDK_BUTTON_PRESS) { ui_do_popup_menu (widget, event); return TRUE; } return FALSE; } static gboolean ui_popup_menu(GtkWidget *widget) { ui_do_popup_menu(widget, NULL); return TRUE; } static void ui_build(struct mmx_ui *ui) { g_return_if_fail(ui); ui->window = gtk_window_new (GTK_WINDOW_TOPLEVEL); g_signal_connect(G_OBJECT(ui->window), "destroy", G_CALLBACK(ui_destroy), NULL); ui->hbox = gtk_hbox_new(TRUE, 0); gtk_container_add(GTK_CONTAINER(ui->window), ui->hbox); gtk_widget_show(ui->hbox); ui->event_box = gtk_event_box_new(); g_signal_connect(G_OBJECT(ui->event_box), "button-press-event", G_CALLBACK(ui_clicked), NULL); g_signal_connect(G_OBJECT(ui->event_box), "popup-menu", G_CALLBACK(ui_popup_menu), NULL); gtk_container_add(GTK_CONTAINER(ui->hbox), ui->event_box); gtk_widget_show(ui->event_box); } static void ui_set_bottom_layer(struct mmx_ui *ui, GtkWidget *w) { ui->bottom_layer = w; g_object_ref(w); } static void ui_set_top_layer(struct mmx_ui *ui, GtkWidget *w) { ui->top_layer = w; g_object_ref(w); } static void ui_set_fused_layer(struct mmx_ui *ui, GtkWidget *w) { ui->fused_layer = w; g_object_ref(w); } static void ui_show(struct mmx_ui *ui) { gtk_widget_show(ui->window); } int main(int argc, char **argv) { GdkPixbuf *bottom, *top, *fused_pixbuf; GtkWidget *bottom_image, *top_image, *fused_image; struct tile *bottom_tile, *top_tile; GError *err = NULL; GOptionContext *opt_ctx; int ret = EXIT_SUCCESS; gtk_init (&argc, &argv); #ifdef G_OS_WIN32 g_setenv("GTK_PATH", "c:\\sagem\\emotion\\lib\\gtk-2.0\\2.10.0", TRUE); #endif /* parse options */ opt_ctx = g_option_context_new("- fuse two RGBA 5551 layers"); g_option_context_add_main_entries(opt_ctx, mmx_options, NULL); g_option_context_add_group(opt_ctx, gtk_get_option_group(TRUE)); g_option_context_parse(opt_ctx, &argc, &argv, &err); /* Load tiles */ bottom = gdk_pixbuf_new_from_file(opt_bottom_layer, &err); if (unlikely(!bottom)) { fprintf(stderr, "Unable to load %s : %s\n", opt_bottom_layer, err->message); ret = EXIT_FAILURE; goto out_bottom; } top = gdk_pixbuf_new_from_file(opt_top_layer, &err); if (unlikely(!top)) { fprintf(stderr, "Unable to load %s : %s\n", opt_top_layer, err->message); ret = EXIT_FAILURE; goto out_road; } bottom_tile = tile_new_from_pixbuf(bottom, "bottom layer"); top_tile = tile_new_from_pixbuf(top, "top layer"); bottom_image = gtk_image_new_from_pixbuf(bottom); top_image = gtk_image_new_from_pixbuf(top); if (strcmp(opt_fusion_method, "nommx") == 0) tile_fusion_select_type(&nommx_fusion); else if (strcmp(opt_fusion_method, "sse2") == 0) tile_fusion_select_type(&sse2_fusion); else tile_fusion_select_type(&mmx_fusion); /* fuuuuuuusiooooon */ tile_fusion(bottom_tile, top_tile); fused_pixbuf = pixbuf_new_from_tile(bottom_tile); fused_image = gtk_image_new_from_pixbuf(fused_pixbuf); /* build UI */ ui_build(&app); ui_set_bottom_layer(&app, bottom_image); ui_set_top_layer(&app, top_image); ui_set_fused_layer(&app, fused_image); ui_set_current_layer_by_type(&app, UI_FUSED_LAYER); ui_show(&app); gtk_main(); g_object_unref(top); out_road: g_object_unref(bottom); out_bottom: return ret; }