Skip to content
tracker-extract.c 16.7 KiB
Newer Older
/*
 * Copyright (C) 2008, Nokia
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#include "config.h"

#include <string.h>

#include <gmodule.h>
#include <gio/gio.h>

#include <libtracker-common/tracker-dbus.h>
#include <libtracker-common/tracker-log.h>
#include <libtracker-client/tracker-sparql-builder.h>

#include <libtracker-extract/tracker-extract.h>

#include "tracker-dbus.h"
#include "tracker-extract.h"
#include "tracker-main.h"
#include "tracker-marshal.h"
#ifdef HAVE_LIBSTREAMANALYZER
#include "tracker-topanalyzer.h"
#endif /* HAVE_STREAMANALYZER */
#define EXTRACT_FUNCTION "tracker_extract_get_data"

#define TRACKER_EXTRACT_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TRACKER_TYPE_EXTRACT, TrackerExtractPrivate))

typedef struct {
	GArray *specific_extractors;
	GArray *generic_extractors;
	gboolean disable_shutdown;
	gboolean force_internal_extractors;
} TrackerExtractPrivate;

Martyn Russell's avatar
Martyn Russell committed
typedef struct {
Martyn Russell's avatar
Martyn Russell committed
	const TrackerExtractData *edata;
static void tracker_extract_finalize (GObject *object);

G_DEFINE_TYPE(TrackerExtract, tracker_extract, G_TYPE_OBJECT)

static void
tracker_extract_class_init (TrackerExtractClass *klass)
{
	GObjectClass *object_class;

	object_class = G_OBJECT_CLASS (klass);

	object_class->finalize = tracker_extract_finalize;

	g_type_class_add_private (object_class, sizeof (TrackerExtractPrivate));
}

static void
tracker_extract_init (TrackerExtract *object)
{
#ifdef HAVE_LIBSTREAMANALYZER
	tracker_topanalyzer_init ();
#endif /* HAVE_STREAMANALYZER */
}

static void
tracker_extract_finalize (GObject *object)
{
	TrackerExtractPrivate *priv;

	priv = TRACKER_EXTRACT_GET_PRIVATE (object);

#ifdef HAVE_LIBSTREAMANALYZER
	tracker_topanalyzer_shutdown ();
#endif /* HAVE_STREAMANALYZER */
	g_array_free (priv->specific_extractors, TRUE);
	g_array_free (priv->generic_extractors, TRUE);
	G_OBJECT_CLASS (tracker_extract_parent_class)->finalize (object);
}

static gboolean
load_modules (const gchar  *force_module,
              GArray      **specific_extractors,
              GArray      **generic_extractors)
	GError *error = NULL;
	const gchar *name;
	gchar *force_module_checked;
	gboolean success;

	dir = g_dir_open (MODULESDIR, 0, &error);

	if (!dir) {
		g_error ("Error opening modules directory: %s", error->message);
		g_error_free (error);
	if (G_UNLIKELY (force_module)) {
		if (!g_str_has_suffix (force_module, "." G_MODULE_SUFFIX)) {
			force_module_checked = g_strdup_printf ("%s.%s", 
			                                        force_module, 
			                                        G_MODULE_SUFFIX);
		} else {
			force_module_checked = g_strdup (force_module);
		}
	} else {
		force_module_checked = NULL;
	}

	*specific_extractors = g_array_new (FALSE,
	                                    TRUE,
	                                    sizeof (ModuleData));

	*generic_extractors = g_array_new (FALSE,
	                                   TRUE,
	                                   sizeof (ModuleData));

#ifdef HAVE_LIBSTREAMANALYZER
	if (!force_internal_extractors) {
		g_message ("Adding extractor for libstreamanalyzer");
		g_message ("  Generic  match for ALL (tried first before our module)");
		g_message ("  Specific match for NONE (fallback to our modules)");
	} else {
		g_message ("Not using libstreamanalyzer");
		g_message ("  It is available but disabled by command line");
	}
#endif /* HAVE_STREAMANALYZER */
	while ((name = g_dir_read_name (dir)) != NULL) {
		TrackerExtractDataFunc func;
		GModule *module;
		gchar *module_path;

		if (!g_str_has_suffix (name, "." G_MODULE_SUFFIX)) {
			continue;
		}

		if (force_module_checked && strcmp (name, force_module_checked) != 0) {
			continue;
		}

		module_path = g_build_filename (MODULESDIR, name, NULL);

		module = g_module_open (module_path, G_MODULE_BIND_LOCAL);

		if (!module) {
Martyn Russell's avatar
Martyn Russell committed
			g_warning ("Could not load module '%s': %s",
			           name,
			           g_module_error ());
			g_free (module_path);
			continue;
		}

		g_module_make_resident (module);

		if (g_module_symbol (module, EXTRACT_FUNCTION, (gpointer *) &func)) {
			mdata.module = module;
			mdata.edata = (func) ();
			g_message ("Adding extractor:'%s' with:",
Martyn Russell's avatar
Martyn Russell committed
			           g_module_name ((GModule*) mdata.module));

			for (; mdata.edata->mime; mdata.edata++) {
				if (G_UNLIKELY (strchr (mdata.edata->mime, '*') != NULL)) {
					g_message ("  Generic  match for mime:'%s'",
Martyn Russell's avatar
Martyn Russell committed
					           mdata.edata->mime);
					g_array_append_val (*generic_extractors, mdata);
					g_message ("  Specific match for mime:'%s'",
Martyn Russell's avatar
Martyn Russell committed
					           mdata.edata->mime);
					g_array_append_val (*specific_extractors, mdata);
		} else {
			g_warning ("Could not load module '%s': Function %s() was not found, is it exported?", 
			           name, EXTRACT_FUNCTION);
	if (G_UNLIKELY (force_module) && 
	    (!*specific_extractors || (*specific_extractors)->len < 1) && 
	    (!*generic_extractors || (*generic_extractors)->len < 1)) {
		g_warning ("Could not force module '%s', it was not found", force_module_checked);
		success = FALSE;
	} else {
		success = TRUE;
	}

	g_free (force_module_checked);
	return success;
}

TrackerExtract *
tracker_extract_new (gboolean     disable_shutdown,
                     gboolean     force_internal_extractors,
                     const gchar *force_module)
{
	TrackerExtract *object;
	TrackerExtractPrivate *priv;
	GArray *specific_extractors;
	GArray *generic_extractors;

	if (!g_module_supported ()) {
		g_error ("Modules are not supported for this platform");
		return NULL;
	}

	if (!load_modules (force_module, &specific_extractors, &generic_extractors)) {
		return NULL;
	}

	/* Set extractors */
	object = g_object_new (TRACKER_TYPE_EXTRACT, NULL);

	priv = TRACKER_EXTRACT_GET_PRIVATE (object);

	priv->disable_shutdown = disable_shutdown;
	priv->force_internal_extractors = force_internal_extractors;
	priv->specific_extractors = specific_extractors;
	priv->generic_extractors = generic_extractors;
static gboolean
get_file_metadata (TrackerExtract         *extract,
                   guint                   request_id,
                   DBusGMethodInvocation  *context,
                   const gchar            *uri,
                   const gchar            *mime,
		   TrackerSparqlBuilder  **preupdate_out,
		   TrackerSparqlBuilder  **statements_out)
	TrackerSparqlBuilder *statements, *preupdate;
	gchar *mime_used = NULL;
	gchar *content_type = NULL;
	priv = TRACKER_EXTRACT_GET_PRIVATE (extract);

	*preupdate_out = NULL;
	*statements_out = NULL;

	/* Create sparql builders to send back */
	preupdate = tracker_sparql_builder_new_update ();
	statements = tracker_sparql_builder_new_embedded_insert ();
#ifdef HAVE_LIBSTREAMANALYZER
	if (!priv->force_internal_extractors) {
		tracker_dbus_request_comment (request_id, context,
Martyn Russell's avatar
Martyn Russell committed
		                              "  Extracting with libstreamanalyzer...");

		tracker_topanalyzer_extract (uri, statements, &content_type);
		if (tracker_sparql_builder_get_length (statements) > 0) {
			tracker_sparql_builder_insert_close (statements);

			*preupdate_out = preupdate;
			*statements_out = statements;
		tracker_dbus_request_comment (request_id, context,
Martyn Russell's avatar
Martyn Russell committed
		                              "  Extracting with internal extractors ONLY...");
#endif /* HAVE_LIBSTREAMANALYZER */
	if (mime && *mime) {
		/* We know the mime */
		mime_used = g_strdup (mime);
		g_strstrip (mime_used);
	} else if (content_type && *content_type) {
		/* We know the mime from LSA */
		mime_used = content_type;
		g_strstrip (mime_used);
		GFile *file;
		GFileInfo *info;
		GError *error = NULL;

		file = g_file_new_for_uri (uri);
		if (!file) {
			g_warning ("Could not create GFile for uri:'%s'",
Martyn Russell's avatar
Martyn Russell committed
			           uri);
			g_object_unref (statements);
Carlos Garnacho's avatar
Carlos Garnacho committed
			g_object_unref (preupdate);
		info = g_file_query_info (file,
Martyn Russell's avatar
Martyn Russell committed
		                          G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE,
		                          G_FILE_QUERY_INFO_NONE,
		                          NULL,
		                          &error);

		if (error || !info) {
			tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
			                              "  Could not create GFileInfo for file size check, %s",
			                              error ? error->message : "no error given");
			g_error_free (error);
			if (info) {
				g_object_unref (info);
			}
			g_object_unref (file);
			g_object_unref (statements);
			g_object_unref (preupdate);

		mime_used = g_strdup (g_file_info_get_content_type (info));

		tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
		                              "  Guessing mime type as '%s' for uri:'%s'",
		                              mime_used,
		                              uri);
		g_object_unref (info);
		g_object_unref (file);
	}

	/* Now we have sanity checked everything, actually get the
	 * data we need from the extractors.
	 */
	if (mime_used) {
		guint i;

		for (i = 0; i < priv->specific_extractors->len; i++) {
			const TrackerExtractData *edata;
			ModuleData mdata;

			mdata = g_array_index (priv->specific_extractors, ModuleData, i);
			edata = mdata.edata;

			if (g_pattern_match_simple (edata->mime, mime_used)) {
				gint items;

				tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
				                              "  Extracting with module:'%s'",
				                              g_module_name ((GModule*) mdata.module));
				(*edata->func) (uri, preupdate, statements);
				items = tracker_sparql_builder_get_length (statements);

				tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
				                              "  Found %d metadata items",
				                              items);
				tracker_sparql_builder_insert_close (statements);

				*preupdate_out = preupdate;
				*statements_out = statements;

				return TRUE;
			}
		}

		for (i = 0; i < priv->generic_extractors->len; i++) {
			const TrackerExtractData *edata;
			ModuleData mdata;

			mdata = g_array_index (priv->generic_extractors, ModuleData, i);
			edata = mdata.edata;

			if (g_pattern_match_simple (edata->mime, mime_used)) {
				gint items;

				tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
				                              "  Extracting with module:'%s'",
				                              g_module_name ((GModule*) mdata.module));

				(*edata->func) (uri, preupdate, statements);
				items = tracker_sparql_builder_get_length (statements);
				tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
				                              "  Found %d metadata items",
				                              items);
				tracker_sparql_builder_insert_close (statements);

				g_free (mime_used);
				*preupdate_out = preupdate;
				*statements_out = statements;

				return TRUE;
		tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
		                              "  Could not find any extractors to handle metadata type");
	} else {
		tracker_dbus_request_comment (request_id,
Martyn Russell's avatar
Martyn Russell committed
		                              "  No mime available, not extracting data");
	if (tracker_sparql_builder_get_length (statements) > 0) {
		tracker_sparql_builder_insert_close (statements);
	}
	*preupdate_out = preupdate;
	*statements_out = statements;

	return TRUE;
}

void
tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
Martyn Russell's avatar
Martyn Russell committed
                                         const gchar    *uri,
                                         const gchar    *mime)
	TrackerSparqlBuilder *statements, *preupdate;

	request_id = tracker_dbus_get_next_request_id ();

	g_return_if_fail (uri != NULL);
	tracker_dbus_request_new (request_id,
	                          NULL,
	                          "%s(uri:'%s', mime:%s)",
	                          __FUNCTION__,
Martyn Russell's avatar
Martyn Russell committed
	                          uri,
	                          mime);
	/* NOTE: Don't reset the timeout to shutdown here */

	if (get_file_metadata (object, request_id,
			       NULL, uri, mime,
			       &preupdate, &statements)) {
		const gchar *preupdate_str, *statements_str;

		preupdate_str = statements_str = NULL;

		if (tracker_sparql_builder_get_length (statements) > 0) {
			statements_str = tracker_sparql_builder_get_result (statements);
		}

		if (tracker_sparql_builder_get_length (preupdate) > 0) {
			preupdate_str = tracker_sparql_builder_get_result (preupdate);
		}

		tracker_dbus_request_info (request_id, NULL, "%s",
					   preupdate_str ? preupdate_str : "");
		tracker_dbus_request_info (request_id, NULL, "%s",
					   statements_str ? statements_str : "");

		g_object_unref (statements);
Carlos Garnacho's avatar
Carlos Garnacho committed
		g_object_unref (preupdate);
	tracker_dbus_request_success (request_id, NULL);
tracker_extract_get_pid (TrackerExtract         *object,
Martyn Russell's avatar
Martyn Russell committed
                         DBusGMethodInvocation  *context,
{
	guint request_id;
	pid_t value;

	request_id = tracker_dbus_get_next_request_id ();

	tracker_dbus_request_new (request_id,
	value = getpid ();
	tracker_dbus_request_debug (request_id,
	tracker_dbus_request_success (request_id, context);
Martyn Russell's avatar
Martyn Russell committed
tracker_extract_get_metadata (TrackerExtract         *object,
                              const gchar            *uri,
                              const gchar            *mime,
                              DBusGMethodInvocation  *context,
                              GError                **error)
	guint request_id;
	TrackerExtractPrivate *priv;
	TrackerSparqlBuilder *sparql, *preupdate;
	gboolean extracted = FALSE;

	request_id = tracker_dbus_get_next_request_id ();

	tracker_dbus_async_return_if_fail (uri != NULL, context);

	tracker_dbus_request_new (request_id,
	                          context,
	                          "%s(uri:'%s', mime:%s)",
	                          __FUNCTION__,
Martyn Russell's avatar
Martyn Russell committed
	                          uri,
	                          mime);
	tracker_dbus_request_debug (request_id,
Martyn Russell's avatar
Martyn Russell committed
	                            "  Resetting shutdown timeout");

	priv = TRACKER_EXTRACT_GET_PRIVATE (object);
	if (!priv->disable_shutdown) {
		alarm (MAX_EXTRACT_TIME);
	}
	extracted = get_file_metadata (object, request_id, context, uri, mime, &preupdate, &sparql);
		tracker_dbus_request_success (request_id, context);

		if (tracker_sparql_builder_get_length (sparql) > 0) {
			const gchar *preupdate_str = NULL;
			if (tracker_sparql_builder_get_length (preupdate) > 0) {
				preupdate_str = tracker_sparql_builder_get_result (preupdate);
			}

			dbus_g_method_return (context,
Carlos Garnacho's avatar
Carlos Garnacho committed
			                      preupdate_str ? preupdate_str : "",
			                      tracker_sparql_builder_get_result (sparql));
			dbus_g_method_return (context, "", "");
		g_object_unref (sparql);
		g_object_unref (preupdate);
	} else {
		GError *actual_error = NULL;

		tracker_dbus_request_failed (request_id,
Martyn Russell's avatar
Martyn Russell committed
		                             &actual_error,
		                             "Could not get any metadata for uri:'%s' and mime:'%s'",
		                             uri,
		                             mime);
		dbus_g_method_return_error (context, actual_error);
		g_error_free (actual_error);
	if (!priv->disable_shutdown) {
		/* Unset alarm so the extractor doesn't die when it's idle */
		alarm (0);
	}
-