| /* GStreamer SAMI subtitle parser |
| * Copyright (c) 2006, 2013 Young-Ho Cha <ganadist at gmail com> |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public |
| * License along with this library; if not, write to the |
| * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| */ |
| |
| #include "samiparse.h" |
| |
| #include <glib.h> |
| #include <string.h> |
| #include <stdlib.h> |
| |
| #define ITALIC_TAG 'i' |
| #define SPAN_TAG 's' |
| #define RUBY_TAG 'r' |
| #define RT_TAG 't' |
| #define CLEAR_TAG '0' |
| |
| typedef struct _HtmlParser HtmlParser; |
| typedef struct _HtmlContext HtmlContext; |
| typedef struct _GstSamiContext GstSamiContext; |
| |
| struct _GstSamiContext |
| { |
| GString *buf; /* buffer to collect content */ |
| GString *rubybuf; /* buffer to collect ruby content */ |
| GString *resultbuf; /* when opening the next 'sync' tag, move |
| * from 'buf' to avoid to append following |
| * content */ |
| GString *state; /* in many sami files there are tags that |
| * are not closed, so for each open tag the |
| * parser will append a tag flag here so |
| * that tags can be closed properly on |
| * 'sync' tags. See _context_push_state() |
| * and _context_pop_state(). */ |
| HtmlContext *htmlctxt; /* html parser context */ |
| gboolean has_result; /* set when ready to push out result */ |
| gboolean in_sync; /* flag to avoid appending anything except the |
| * content of the sync elements to buf */ |
| guint64 time1; /* previous start attribute in sync tag */ |
| guint64 time2; /* current start attribute in sync tag */ |
| }; |
| |
| struct _HtmlParser |
| { |
| void (*start_element) (HtmlContext * ctx, |
| const gchar * name, const gchar ** attr, gpointer user_data); |
| void (*end_element) (HtmlContext * ctx, |
| const gchar * name, gpointer user_data); |
| void (*text) (HtmlContext * ctx, |
| const gchar * text, gsize text_len, gpointer user_data); |
| }; |
| |
| struct _HtmlContext |
| { |
| const HtmlParser *parser; |
| gpointer user_data; |
| GString *buf; |
| }; |
| |
| static HtmlContext * |
| html_context_new (HtmlParser * parser, gpointer user_data) |
| { |
| HtmlContext *ctxt = (HtmlContext *) g_new0 (HtmlContext, 1); |
| ctxt->parser = parser; |
| ctxt->user_data = user_data; |
| ctxt->buf = g_string_new (NULL); |
| return ctxt; |
| } |
| |
| static void |
| html_context_free (HtmlContext * ctxt) |
| { |
| g_string_free (ctxt->buf, TRUE); |
| g_free (ctxt); |
| } |
| |
| struct EntityMap |
| { |
| const gunichar unescaped; |
| const gchar *escaped; |
| }; |
| |
| struct EntityMap XmlEntities[] = { |
| {34, "quot;"}, |
| {38, "amp;"}, |
| {39, "apos;"}, |
| {60, "lt;"}, |
| {62, "gt;"}, |
| {0, NULL}, |
| }; |
| |
| struct EntityMap HtmlEntities[] = { |
| /* nbsp will handle manually |
| { 160, "nbsp;" }, */ |
| {161, "iexcl;"}, |
| {162, "cent;"}, |
| {163, "pound;"}, |
| {164, "curren;"}, |
| {165, "yen;"}, |
| {166, "brvbar;"}, |
| {167, "sect;"}, |
| {168, "uml;"}, |
| {169, "copy;"}, |
| {170, "ordf;"}, |
| {171, "laquo;"}, |
| {172, "not;"}, |
| {173, "shy;"}, |
| {174, "reg;"}, |
| {175, "macr;"}, |
| {176, "deg;"}, |
| {177, "plusmn;"}, |
| {178, "sup2;"}, |
| {179, "sup3;"}, |
| {180, "acute;"}, |
| {181, "micro;"}, |
| {182, "para;"}, |
| {183, "middot;"}, |
| {184, "cedil;"}, |
| {185, "sup1;"}, |
| {186, "ordm;"}, |
| {187, "raquo;"}, |
| {188, "frac14;"}, |
| {189, "frac12;"}, |
| {190, "frac34;"}, |
| {191, "iquest;"}, |
| {192, "Agrave;"}, |
| {193, "Aacute;"}, |
| {194, "Acirc;"}, |
| {195, "Atilde;"}, |
| {196, "Auml;"}, |
| {197, "Aring;"}, |
| {198, "AElig;"}, |
| {199, "Ccedil;"}, |
| {200, "Egrave;"}, |
| {201, "Eacute;"}, |
| {202, "Ecirc;"}, |
| {203, "Euml;"}, |
| {204, "Igrave;"}, |
| {205, "Iacute;"}, |
| {206, "Icirc;"}, |
| {207, "Iuml;"}, |
| {208, "ETH;"}, |
| {209, "Ntilde;"}, |
| {210, "Ograve;"}, |
| {211, "Oacute;"}, |
| {212, "Ocirc;"}, |
| {213, "Otilde;"}, |
| {214, "Ouml;"}, |
| {215, "times;"}, |
| {216, "Oslash;"}, |
| {217, "Ugrave;"}, |
| {218, "Uacute;"}, |
| {219, "Ucirc;"}, |
| {220, "Uuml;"}, |
| {221, "Yacute;"}, |
| {222, "THORN;"}, |
| {223, "szlig;"}, |
| {224, "agrave;"}, |
| {225, "aacute;"}, |
| {226, "acirc;"}, |
| {227, "atilde;"}, |
| {228, "auml;"}, |
| {229, "aring;"}, |
| {230, "aelig;"}, |
| {231, "ccedil;"}, |
| {232, "egrave;"}, |
| {233, "eacute;"}, |
| {234, "ecirc;"}, |
| {235, "euml;"}, |
| {236, "igrave;"}, |
| {237, "iacute;"}, |
| {238, "icirc;"}, |
| {239, "iuml;"}, |
| {240, "eth;"}, |
| {241, "ntilde;"}, |
| {242, "ograve;"}, |
| {243, "oacute;"}, |
| {244, "ocirc;"}, |
| {245, "otilde;"}, |
| {246, "ouml;"}, |
| {247, "divide;"}, |
| {248, "oslash;"}, |
| {249, "ugrave;"}, |
| {250, "uacute;"}, |
| {251, "ucirc;"}, |
| {252, "uuml;"}, |
| {253, "yacute;"}, |
| {254, "thorn;"}, |
| {255, "yuml;"}, |
| {338, "OElig;"}, |
| {339, "oelig;"}, |
| {352, "Scaron;"}, |
| {353, "scaron;"}, |
| {376, "Yuml;"}, |
| {402, "fnof;"}, |
| {710, "circ;"}, |
| {732, "tilde;"}, |
| {913, "Alpha;"}, |
| {914, "Beta;"}, |
| {915, "Gamma;"}, |
| {916, "Delta;"}, |
| {917, "Epsilon;"}, |
| {918, "Zeta;"}, |
| {919, "Eta;"}, |
| {920, "Theta;"}, |
| {921, "Iota;"}, |
| {922, "Kappa;"}, |
| {923, "Lambda;"}, |
| {924, "Mu;"}, |
| {925, "Nu;"}, |
| {926, "Xi;"}, |
| {927, "Omicron;"}, |
| {928, "Pi;"}, |
| {929, "Rho;"}, |
| {931, "Sigma;"}, |
| {932, "Tau;"}, |
| {933, "Upsilon;"}, |
| {934, "Phi;"}, |
| {935, "Chi;"}, |
| {936, "Psi;"}, |
| {937, "Omega;"}, |
| {945, "alpha;"}, |
| {946, "beta;"}, |
| {947, "gamma;"}, |
| {948, "delta;"}, |
| {949, "epsilon;"}, |
| {950, "zeta;"}, |
| {951, "eta;"}, |
| {952, "theta;"}, |
| {953, "iota;"}, |
| {954, "kappa;"}, |
| {955, "lambda;"}, |
| {956, "mu;"}, |
| {957, "nu;"}, |
| {958, "xi;"}, |
| {959, "omicron;"}, |
| {960, "pi;"}, |
| {961, "rho;"}, |
| {962, "sigmaf;"}, |
| {963, "sigma;"}, |
| {964, "tau;"}, |
| {965, "upsilon;"}, |
| {966, "phi;"}, |
| {967, "chi;"}, |
| {968, "psi;"}, |
| {969, "omega;"}, |
| {977, "thetasym;"}, |
| {978, "upsih;"}, |
| {982, "piv;"}, |
| {8194, "ensp;"}, |
| {8195, "emsp;"}, |
| {8201, "thinsp;"}, |
| {8204, "zwnj;"}, |
| {8205, "zwj;"}, |
| {8206, "lrm;"}, |
| {8207, "rlm;"}, |
| {8211, "ndash;"}, |
| {8212, "mdash;"}, |
| {8216, "lsquo;"}, |
| {8217, "rsquo;"}, |
| {8218, "sbquo;"}, |
| {8220, "ldquo;"}, |
| {8221, "rdquo;"}, |
| {8222, "bdquo;"}, |
| {8224, "dagger;"}, |
| {8225, "Dagger;"}, |
| {8226, "bull;"}, |
| {8230, "hellip;"}, |
| {8240, "permil;"}, |
| {8242, "prime;"}, |
| {8243, "Prime;"}, |
| {8249, "lsaquo;"}, |
| {8250, "rsaquo;"}, |
| {8254, "oline;"}, |
| {8260, "frasl;"}, |
| {8364, "euro;"}, |
| {8465, "image;"}, |
| {8472, "weierp;"}, |
| {8476, "real;"}, |
| {8482, "trade;"}, |
| {8501, "alefsym;"}, |
| {8592, "larr;"}, |
| {8593, "uarr;"}, |
| {8594, "rarr;"}, |
| {8595, "darr;"}, |
| {8596, "harr;"}, |
| {8629, "crarr;"}, |
| {8656, "lArr;"}, |
| {8657, "uArr;"}, |
| {8658, "rArr;"}, |
| {8659, "dArr;"}, |
| {8660, "hArr;"}, |
| {8704, "forall;"}, |
| {8706, "part;"}, |
| {8707, "exist;"}, |
| {8709, "empty;"}, |
| {8711, "nabla;"}, |
| {8712, "isin;"}, |
| {8713, "notin;"}, |
| {8715, "ni;"}, |
| {8719, "prod;"}, |
| {8721, "sum;"}, |
| {8722, "minus;"}, |
| {8727, "lowast;"}, |
| {8730, "radic;"}, |
| {8733, "prop;"}, |
| {8734, "infin;"}, |
| {8736, "ang;"}, |
| {8743, "and;"}, |
| {8744, "or;"}, |
| {8745, "cap;"}, |
| {8746, "cup;"}, |
| {8747, "int;"}, |
| {8756, "there4;"}, |
| {8764, "sim;"}, |
| {8773, "cong;"}, |
| {8776, "asymp;"}, |
| {8800, "ne;"}, |
| {8801, "equiv;"}, |
| {8804, "le;"}, |
| {8805, "ge;"}, |
| {8834, "sub;"}, |
| {8835, "sup;"}, |
| {8836, "nsub;"}, |
| {8838, "sube;"}, |
| {8839, "supe;"}, |
| {8853, "oplus;"}, |
| {8855, "otimes;"}, |
| {8869, "perp;"}, |
| {8901, "sdot;"}, |
| {8968, "lceil;"}, |
| {8969, "rceil;"}, |
| {8970, "lfloor;"}, |
| {8971, "rfloor;"}, |
| {9001, "lang;"}, |
| {9002, "rang;"}, |
| {9674, "loz;"}, |
| {9824, "spades;"}, |
| {9827, "clubs;"}, |
| {9829, "hearts;"}, |
| {9830, "diams;"}, |
| {0, NULL}, |
| }; |
| |
| static gchar * |
| unescape_string (const gchar * text) |
| { |
| gint i; |
| GString *unescaped = g_string_new (NULL); |
| |
| while (*text) { |
| if (*text == '&') { |
| text++; |
| |
| /* unescape   and */ |
| if (!g_ascii_strncasecmp (text, "nbsp", 4)) { |
| unescaped = g_string_append_unichar (unescaped, 160); |
| text += 4; |
| if (*text == ';') { |
| text++; |
| } |
| goto next; |
| } |
| |
| /* pass xml entities. these will be processed as pango markup */ |
| for (i = 0; XmlEntities[i].escaped; i++) { |
| gssize len = strlen (XmlEntities[i].escaped); |
| if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) { |
| unescaped = g_string_append_c (unescaped, '&'); |
| unescaped = |
| g_string_append_len (unescaped, XmlEntities[i].escaped, len); |
| text += len; |
| goto next; |
| } |
| } |
| |
| /* convert html entities */ |
| for (i = 0; HtmlEntities[i].escaped; i++) { |
| gssize len = strlen (HtmlEntities[i].escaped); |
| if (!strncmp (text, HtmlEntities[i].escaped, len)) { |
| unescaped = |
| g_string_append_unichar (unescaped, HtmlEntities[i].unescaped); |
| text += len; |
| goto next; |
| } |
| } |
| |
| if (*text == '#') { |
| gboolean is_hex = FALSE; |
| gunichar l; |
| gchar *end = NULL; |
| |
| text++; |
| if (*text == 'x') { |
| is_hex = TRUE; |
| text++; |
| } |
| errno = 0; |
| if (is_hex) { |
| l = strtoul (text, &end, 16); |
| } else { |
| l = strtoul (text, &end, 10); |
| } |
| |
| if (text == end || errno != 0) { |
| /* error occured. pass it */ |
| goto next; |
| } |
| unescaped = g_string_append_unichar (unescaped, l); |
| text = end; |
| |
| if (*text == ';') { |
| text++; |
| } |
| goto next; |
| } |
| |
| /* escape & */ |
| unescaped = g_string_append (unescaped, "&"); |
| |
| next: |
| continue; |
| |
| } else if (g_ascii_isspace (*text)) { |
| unescaped = g_string_append_c (unescaped, ' '); |
| /* strip whitespace */ |
| do { |
| text++; |
| } while ((*text) && g_ascii_isspace (*text)); |
| } else { |
| unescaped = g_string_append_c (unescaped, *text); |
| text++; |
| } |
| } |
| |
| return g_string_free (unescaped, FALSE); |
| } |
| |
| static const gchar * |
| string_token (const gchar * string, const gchar * delimiter, gchar ** first) |
| { |
| gchar *next = strstr (string, delimiter); |
| if (next) { |
| *first = g_strndup (string, next - string); |
| } else { |
| *first = g_strdup (string); |
| } |
| return next; |
| } |
| |
| static void |
| html_context_handle_element (HtmlContext * ctxt, |
| const gchar * string, gboolean must_close) |
| { |
| gchar *name = NULL; |
| gint count = 0, i; |
| gchar **attrs; |
| const gchar *found, *next; |
| |
| /* split element name and attributes */ |
| next = string_token (string, " ", &name); |
| |
| if (next) { |
| /* count attributes */ |
| found = next + 1; |
| while (TRUE) { |
| found = strchr (found, '='); |
| if (!found) |
| break; |
| found++; |
| count++; |
| } |
| } else { |
| count = 0; |
| } |
| |
| attrs = g_new0 (gchar *, (count + 1) * 2); |
| |
| for (i = 0; i < count && next != NULL; i += 2) { |
| gchar *attr_name = NULL, *attr_value = NULL; |
| gsize length; |
| next = string_token (next + 1, "=", &attr_name); |
| next = string_token (next + 1, " ", &attr_value); |
| |
| /* strip " or ' from attribute value */ |
| if (attr_value[0] == '"' || attr_value[0] == '\'') { |
| gchar *tmp = g_strdup (attr_value + 1); |
| g_free (attr_value); |
| attr_value = tmp; |
| } |
| |
| length = strlen (attr_value); |
| if (length > 0 && (attr_value[length - 1] == '"' |
| || attr_value[length - 1] == '\'')) { |
| attr_value[length - 1] = '\0'; |
| } |
| |
| attrs[i] = attr_name; |
| attrs[i + 1] = attr_value; |
| } |
| |
| ctxt->parser->start_element (ctxt, name, |
| (const gchar **) attrs, ctxt->user_data); |
| if (must_close) { |
| ctxt->parser->end_element (ctxt, name, ctxt->user_data); |
| } |
| g_strfreev (attrs); |
| g_free (name); |
| } |
| |
| static void |
| html_context_parse (HtmlContext * ctxt, gchar * text, gsize text_len) |
| { |
| const gchar *next = NULL; |
| ctxt->buf = g_string_append_len (ctxt->buf, text, text_len); |
| next = ctxt->buf->str; |
| while (TRUE) { |
| if (next[0] == '<') { |
| gchar *element = NULL; |
| /* find <blahblah> */ |
| if (!strchr (next, '>')) { |
| /* no tag end point. buffer will be process in next time */ |
| return; |
| } |
| |
| next = string_token (next, ">", &element); |
| next++; |
| if (g_str_has_suffix (next, "/")) { |
| /* handle <blah/> */ |
| element[strlen (element) - 1] = '\0'; |
| html_context_handle_element (ctxt, element + 1, TRUE); |
| } else if (element[1] == '/') { |
| /* handle </blah> */ |
| ctxt->parser->end_element (ctxt, element + 2, ctxt->user_data); |
| } else { |
| /* handle <blah> */ |
| html_context_handle_element (ctxt, element + 1, FALSE); |
| } |
| g_free (element); |
| } else if (strchr (next, '<')) { |
| gchar *text = NULL; |
| gsize length; |
| next = string_token (next, "<", &text); |
| text = g_strstrip (text); |
| length = strlen (text); |
| ctxt->parser->text (ctxt, text, length, ctxt->user_data); |
| g_free (text); |
| |
| } else { |
| gchar *text = (gchar *) next; |
| gsize length; |
| text = g_strstrip (text); |
| length = strlen (text); |
| ctxt->parser->text (ctxt, text, length, ctxt->user_data); |
| ctxt->buf = g_string_assign (ctxt->buf, ""); |
| return; |
| } |
| } |
| |
| ctxt->buf = g_string_assign (ctxt->buf, next); |
| } |
| |
| static gchar * |
| has_tag (GString * str, const gchar tag) |
| { |
| return strrchr (str->str, tag); |
| } |
| |
| static void |
| sami_context_push_state (GstSamiContext * sctx, char state) |
| { |
| GST_LOG ("state %c", state); |
| g_string_append_c (sctx->state, state); |
| } |
| |
| static void |
| sami_context_pop_state (GstSamiContext * sctx, char state) |
| { |
| GString *str = g_string_new (""); |
| GString *context_state = sctx->state; |
| int i; |
| |
| GST_LOG ("state %c", state); |
| for (i = context_state->len - 1; i >= 0; i--) { |
| switch (context_state->str[i]) { |
| case ITALIC_TAG: /* <i> */ |
| { |
| g_string_append (str, "</i>"); |
| break; |
| } |
| case SPAN_TAG: /* <span foreground= > */ |
| { |
| g_string_append (str, "</span>"); |
| break; |
| } |
| case RUBY_TAG: /* <span size= > -- ruby */ |
| { |
| break; |
| } |
| case RT_TAG: /* ruby */ |
| { |
| /* FIXME: support for furigana/ruby once implemented in pango */ |
| g_string_append (sctx->rubybuf, "</span>"); |
| if (has_tag (context_state, ITALIC_TAG)) { |
| g_string_append (sctx->rubybuf, "</i>"); |
| } |
| |
| break; |
| } |
| default: |
| break; |
| } |
| if (context_state->str[i] == state) { |
| g_string_append (sctx->buf, str->str); |
| g_string_free (str, TRUE); |
| g_string_truncate (context_state, i); |
| return; |
| } |
| } |
| if (state == CLEAR_TAG) { |
| g_string_append (sctx->buf, str->str); |
| g_string_truncate (context_state, 0); |
| } |
| g_string_free (str, TRUE); |
| } |
| |
| static void |
| handle_start_sync (GstSamiContext * sctx, const gchar ** atts) |
| { |
| int i; |
| |
| sami_context_pop_state (sctx, CLEAR_TAG); |
| if (atts != NULL) { |
| for (i = 0; (atts[i] != NULL); i += 2) { |
| const gchar *key, *value; |
| |
| key = atts[i]; |
| value = atts[i + 1]; |
| |
| if (!value) |
| continue; |
| if (!g_ascii_strcasecmp ("start", key)) { |
| /* Only set a new start time if we don't have text pending */ |
| if (sctx->resultbuf->len == 0) |
| sctx->time1 = sctx->time2; |
| |
| sctx->time2 = atoi ((const char *) value) * GST_MSECOND; |
| sctx->time2 = MAX (sctx->time2, sctx->time1); |
| g_string_append (sctx->resultbuf, sctx->buf->str); |
| sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE; |
| g_string_truncate (sctx->buf, 0); |
| } |
| } |
| } |
| } |
| |
| static void |
| handle_start_font (GstSamiContext * sctx, const gchar ** atts) |
| { |
| int i; |
| |
| sami_context_pop_state (sctx, SPAN_TAG); |
| if (atts != NULL) { |
| g_string_append (sctx->buf, "<span"); |
| for (i = 0; (atts[i] != NULL); i += 2) { |
| const gchar *key, *value; |
| |
| key = atts[i]; |
| value = atts[i + 1]; |
| |
| if (!value) |
| continue; |
| if (!g_ascii_strcasecmp ("color", key)) { |
| /* |
| * There are invalid color value in many |
| * sami files. |
| * It will fix hex color value that start without '#' |
| */ |
| const gchar *sharp = ""; |
| int len = strlen (value); |
| |
| if (!(*value == '#' && len == 7)) { |
| gchar *r; |
| |
| /* check if it looks like hex */ |
| if (strtol ((const char *) value, &r, 16) >= 0 && |
| ((gchar *) r == (value + 6) && len == 6)) { |
| sharp = "#"; |
| } |
| } |
| /* some colours can be found in many sami files, but X RGB database |
| * doesn't contain a colour by this name, so map explicitly */ |
| if (!g_ascii_strcasecmp ("aqua", value)) { |
| value = "#00ffff"; |
| } else if (!g_ascii_strcasecmp ("crimson", value)) { |
| value = "#dc143c"; |
| } else if (!g_ascii_strcasecmp ("fuchsia", value)) { |
| value = "#ff00ff"; |
| } else if (!g_ascii_strcasecmp ("indigo", value)) { |
| value = "#4b0082"; |
| } else if (!g_ascii_strcasecmp ("lime", value)) { |
| value = "#00ff00"; |
| } else if (!g_ascii_strcasecmp ("olive", value)) { |
| value = "#808000"; |
| } else if (!g_ascii_strcasecmp ("silver", value)) { |
| value = "#c0c0c0"; |
| } else if (!g_ascii_strcasecmp ("teal", value)) { |
| value = "#008080"; |
| } |
| g_string_append_printf (sctx->buf, " foreground=\"%s%s\"", sharp, |
| value); |
| } else if (!g_ascii_strcasecmp ("face", key)) { |
| g_string_append_printf (sctx->buf, " font_family=\"%s\"", value); |
| } |
| } |
| g_string_append_c (sctx->buf, '>'); |
| sami_context_push_state (sctx, SPAN_TAG); |
| } |
| } |
| |
| static void |
| handle_start_element (HtmlContext * ctx, const gchar * name, |
| const char **atts, gpointer user_data) |
| { |
| GstSamiContext *sctx = (GstSamiContext *) user_data; |
| |
| GST_LOG ("name:%s", name); |
| |
| if (!g_ascii_strcasecmp ("sync", name)) { |
| handle_start_sync (sctx, atts); |
| sctx->in_sync = TRUE; |
| } else if (!g_ascii_strcasecmp ("font", name)) { |
| handle_start_font (sctx, atts); |
| } else if (!g_ascii_strcasecmp ("ruby", name)) { |
| sami_context_push_state (sctx, RUBY_TAG); |
| } else if (!g_ascii_strcasecmp ("br", name)) { |
| g_string_append_c (sctx->buf, '\n'); |
| /* FIXME: support for furigana/ruby once implemented in pango */ |
| } else if (!g_ascii_strcasecmp ("rt", name)) { |
| if (has_tag (sctx->state, ITALIC_TAG)) { |
| g_string_append (sctx->rubybuf, "<i>"); |
| } |
| g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>"); |
| sami_context_push_state (sctx, RT_TAG); |
| } else if (!g_ascii_strcasecmp ("i", name)) { |
| g_string_append (sctx->buf, "<i>"); |
| sami_context_push_state (sctx, ITALIC_TAG); |
| } else if (!g_ascii_strcasecmp ("p", name)) { |
| } |
| } |
| |
| static void |
| handle_end_element (HtmlContext * ctx, const char *name, gpointer user_data) |
| { |
| GstSamiContext *sctx = (GstSamiContext *) user_data; |
| |
| GST_LOG ("name:%s", name); |
| |
| if (!g_ascii_strcasecmp ("sync", name)) { |
| sctx->in_sync = FALSE; |
| } else if ((!g_ascii_strcasecmp ("body", name)) || |
| (!g_ascii_strcasecmp ("sami", name))) { |
| /* We will usually have one buffer left when the body is closed |
| * as we need the next sync to actually send it */ |
| if (sctx->buf->len != 0) { |
| /* Only set a new start time if we don't have text pending */ |
| if (sctx->resultbuf->len == 0) |
| sctx->time1 = sctx->time2; |
| |
| sctx->time2 = GST_CLOCK_TIME_NONE; |
| g_string_append (sctx->resultbuf, sctx->buf->str); |
| sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE; |
| g_string_truncate (sctx->buf, 0); |
| } |
| } else if (!g_ascii_strcasecmp ("font", name)) { |
| sami_context_pop_state (sctx, SPAN_TAG); |
| } else if (!g_ascii_strcasecmp ("ruby", name)) { |
| sami_context_pop_state (sctx, RUBY_TAG); |
| } else if (!g_ascii_strcasecmp ("i", name)) { |
| sami_context_pop_state (sctx, ITALIC_TAG); |
| } |
| } |
| |
| static void |
| handle_text (HtmlContext * ctx, const gchar * text, gsize text_len, |
| gpointer user_data) |
| { |
| GstSamiContext *sctx = (GstSamiContext *) user_data; |
| |
| /* Skip everything except content of the sync elements */ |
| if (!sctx->in_sync) |
| return; |
| |
| if (has_tag (sctx->state, RT_TAG)) { |
| g_string_append_c (sctx->rubybuf, ' '); |
| g_string_append (sctx->rubybuf, text); |
| g_string_append_c (sctx->rubybuf, ' '); |
| } else { |
| g_string_append (sctx->buf, text); |
| } |
| } |
| |
| static HtmlParser samiParser = { |
| handle_start_element, /* start_element */ |
| handle_end_element, /* end_element */ |
| handle_text, /* text */ |
| }; |
| |
| void |
| sami_context_init (ParserState * state) |
| { |
| GstSamiContext *context; |
| |
| g_assert (state->user_data == NULL); |
| |
| context = g_new0 (GstSamiContext, 1); |
| |
| context->htmlctxt = html_context_new (&samiParser, context); |
| context->buf = g_string_new (""); |
| context->rubybuf = g_string_new (""); |
| context->resultbuf = g_string_new (""); |
| context->state = g_string_new (""); |
| |
| state->user_data = context; |
| } |
| |
| void |
| sami_context_deinit (ParserState * state) |
| { |
| GstSamiContext *context = (GstSamiContext *) state->user_data; |
| |
| if (context) { |
| html_context_free (context->htmlctxt); |
| context->htmlctxt = NULL; |
| g_string_free (context->buf, TRUE); |
| g_string_free (context->rubybuf, TRUE); |
| g_string_free (context->resultbuf, TRUE); |
| g_string_free (context->state, TRUE); |
| g_free (context); |
| state->user_data = NULL; |
| } |
| } |
| |
| void |
| sami_context_reset (ParserState * state) |
| { |
| GstSamiContext *context = (GstSamiContext *) state->user_data; |
| |
| if (context) { |
| g_string_truncate (context->buf, 0); |
| g_string_truncate (context->rubybuf, 0); |
| g_string_truncate (context->resultbuf, 0); |
| g_string_truncate (context->state, 0); |
| context->has_result = FALSE; |
| context->in_sync = FALSE; |
| context->time1 = 0; |
| context->time2 = 0; |
| } |
| } |
| |
| gchar * |
| parse_sami (ParserState * state, const gchar * line) |
| { |
| gchar *ret = NULL; |
| GstSamiContext *context = (GstSamiContext *) state->user_data; |
| |
| gchar *unescaped = unescape_string (line); |
| html_context_parse (context->htmlctxt, (gchar *) unescaped, |
| strlen (unescaped)); |
| g_free (unescaped); |
| |
| if (context->has_result) { |
| if (context->rubybuf->len) { |
| context->rubybuf = g_string_append_c (context->rubybuf, '\n'); |
| g_string_prepend (context->resultbuf, context->rubybuf->str); |
| context->rubybuf = g_string_truncate (context->rubybuf, 0); |
| } |
| |
| ret = g_string_free (context->resultbuf, FALSE); |
| context->resultbuf = g_string_new (""); |
| state->start_time = context->time1; |
| state->duration = context->time2 - context->time1; |
| context->has_result = FALSE; |
| } |
| return ret; |
| } |