Commit 4d15fafc authored by Laurent Wouters's avatar Laurent Wouters
Browse files

Refactoring the extension

parent ef6bafcc57c1
......@@ -20,22 +20,17 @@
import { definition } from "@logilab/libview";
import {
NO_DATA,
MIME,
Tag,
DataSourceLinked,
DataSourcePage,
Link,
DataSource,
tryNegotiateData,
MimeInfo,
refersToPrimaryTopic,
refersToData,
ObservedResourceRegistry,
resolveObservationsForTab,
selectDataSource,
hasDetectedData,
ResourceObservedData
ResourceObservedData,
detectTopicOnlinks,
detectDataOnLinks,
fetchObservableAt,
ObservableContent,
observeContent
} from "../common/data";
import { Message } from "../common/messages";
/// <reference path="./fallback.d.ts"/>
......@@ -79,42 +74,6 @@ function onObservedTabUpdated(tabId: number): void {
}
}
/**
* Parses the tags for a link
* @param content The link description
*/
function parseLinkTags(content: string): Tag[] {
let tags: Tag[] = [];
let regexp = RegExp("([a-zA-Z_0-9]+)\\s*=\\s*('[^']*'|\"[^\"]*\")", "g");
let match;
while ((match = regexp.exec(content)) !== null) {
tags.push({
name: match[1],
value: match[2].substring(1, match[2].length - 1)
});
}
return tags;
}
/**
* Parses the content of a link header
* @param content The content to parse
*/
function parseLinks(content: string): Link[] {
let links: Link[] = [];
let regexp = RegExp(
"<([^>]*)>(?:\\s*;\\s*([a-zA-Z_0-9]+)\\s*=\\s*('[^']*'|\"[^\"]*\"))*",
"g"
);
let match;
while ((match = regexp.exec(content)) !== null) {
let tags = parseLinkTags(match[0]);
let link = new Link(match[1], tags);
links.push(link);
}
return links;
}
/**
* Get the value of a header
* @param headers The headers
......@@ -132,109 +91,6 @@ function getHeader(
return null;
}
/**
* Set the value of a header
* @param headers The headers
* @param name The name of a header
* @param value The value for the header
*/
function setHeader(
headers: chrome.webRequest.HttpHeader[],
name: string,
value: string
): void {
for (var i = 0; i != headers.length; i++) {
if (headers[i].name == name) {
headers[i].value = value;
}
}
headers.push({ name: name, value: value });
}
/**
* Gets all the links in the headers
* @param headers The headers
*/
function getAllLinks(headers: chrome.webRequest.HttpHeader[]): Link[] {
let linksContent = getHeader(headers, "Link");
if (linksContent == null) return [];
return parseLinks(linksContent);
}
/**
* Detects the primary topic in the links
* @param links The links in the header
*/
function detectTopicOnlinks(links: Link[]): string {
return links
.filter((link: Link) => refersToPrimaryTopic(link))
.reduce((acc: string, link: Link) => (acc != null ? acc : link.url), null);
}
/**
* Detects linked data in HTTP headers
* @param links The links in the header
*/
function detectDataOnLinks(links: Link[]): DataSourceLinked[] {
return links
.filter((link: Link) => refersToData(link))
.map((link: Link) => new DataSourceLinked(link));
}
/**
* Detects linked data in page content
* @param details The details
* @param headers The HTTP headers
*/
function detectDataOnContent(
details: chrome.webRequest.WebResponseHeadersDetails,
headers: chrome.webRequest.HttpHeader[]
): DataSource {
var contentType = getHeader(headers, "Content-Type");
if (contentType == null) return NO_DATA;
contentType = contentType.split(";")[0];
return MIME.hasOwnProperty(contentType)
? new DataSourcePage(details.url, contentType)
: NO_DATA;
}
/**
* Gets whether based on the specified details a request's content should be pre-empted
* @param details The details of an incoming request
*/
function shouldPreempt(
details: chrome.webRequest.WebResponseHeadersDetails
): MimeInfo {
let contentType = getHeader(details.responseHeaders, "Content-Type");
contentType = contentType != null ? contentType.split(";")[0] : null;
let mimes = Object.keys(MIME).map((key: string) => MIME[key]);
for (var i = 0; i != mimes.length; i++) {
if (
mimes[i].mime == contentType ||
details.url.endsWith(mimes[i].fileExtension)
) {
return mimes[i];
}
}
return null;
}
/**
* Preempts the content of a request
* @param details The details of an incoming request
* @param mime The catpured MIME type
*/
function doPreempt(
details: chrome.webRequest.WebResponseHeadersDetails,
mime: MimeInfo
): chrome.webRequest.BlockingResponse {
let headers = details.responseHeaders;
setHeader(headers, "Content-Type", "text/plain");
let data = resolveObservationsForTab(allObservations, details.tabId);
data.sources.push(new DataSourcePage(details.url, mime.mime));
return { responseHeaders: headers };
}
/**
* When headers are received for a tab
* @param details The details
......@@ -264,23 +120,17 @@ function onHeadersReceived(
};
}
let observable: ObservableContent = {
content: null,
contentType: getHeader(details.responseHeaders, "Content-Type"),
linkHeader: getHeader(details.responseHeaders, "Link"),
url: details.url
};
let obs = observeContent(observable);
let observation = resolveObservationsForTab(allObservations, details.tabId);
observation.url = details.url;
// try to detect the primary topic in HTTP headers' links
let links = getAllLinks(details.responseHeaders);
if (observation.primaryTopic == "")
observation.primaryTopic = detectTopicOnlinks(links);
// determine if we shall preempt the content due to its type (raw RDF data)
/*let mime = shouldPreempt(details);
if (mime != null) {
// preempts this request
return doPreempt(details, mime);
}*/
// try to detect alternate sources of content
observation.sources.push(
detectDataOnContent(details, details.responseHeaders)
);
observation.sources = observation.sources.concat(detectDataOnLinks(links));
observation.url = obs.url;
observation.primaryTopic = obs.primaryTopic;
observation.sources = obs.sources;
// if there are still nothing, try to probe with HTTP content negotiation
if (!hasDetectedData(observation)) {
tryNegotiateData(details.url)
......@@ -394,6 +244,15 @@ chrome.runtime.onMessage.addListener(
console.log("Failed to fetch the resource: " + reason);
});
return true;
} else if (request.requestType == "FetchObservable") {
fetchObservableAt(request.payload)
.then((content: ObservableContent) => {
sendResponse({ ok: content, error: null });
})
.catch((reason: any) => {
sendResponse({ ok: null, error: reason });
});
return true;
}
}
);
......
......@@ -108,6 +108,192 @@ export function refersToPrimaryTopic(link: Link): boolean {
);
}
/**
* Parses the tags for a link
* @param content The link description
*/
export function parseLinkTags(content: string): Tag[] {
let tags: Tag[] = [];
let regexp = RegExp("([a-zA-Z_0-9]+)\\s*=\\s*('[^']*'|\"[^\"]*\")", "g");
let match;
while ((match = regexp.exec(content)) !== null) {
tags.push({
name: match[1],
value: match[2].substring(1, match[2].length - 1)
});
}
return tags;
}
/**
* Parses the content of a link header
* @param content The content to parse
*/
export function parseLinks(content: string): Link[] {
if (content == undefined || content == null) return [];
let links: Link[] = [];
let regexp = RegExp(
"<([^>]*)>(?:\\s*;\\s*([a-zA-Z_0-9]+)\\s*=\\s*('[^']*'|\"[^\"]*\"))*",
"g"
);
let match;
while ((match = regexp.exec(content)) !== null) {
let tags = parseLinkTags(match[0]);
let link = new Link(match[1], tags);
links.push(link);
}
return links;
}
/**
* Detects the primary topic in the links
* @param links The links in the header
*/
export function detectTopicOnlinks(links: Link[]): string {
return links
.filter((link: Link) => refersToPrimaryTopic(link))
.reduce((acc: string, link: Link) => (acc != null ? acc : link.url), null);
}
/**
* Detects linked data in HTTP headers
* @param links The links in the header
*/
export function detectDataOnLinks(links: Link[]): DataSourceLinked[] {
return links
.filter((link: Link) => refersToData(link))
.map((link: Link) => new DataSourceLinked(link));
}
/**
* Finds the relevant links in an HTML document
* @param document The document to inspect
* @param url The document URL
* @param primaryTopic The detected primary topic, if any
*/
export function findLinksInDocument(
document: HTMLDocument,
url: string,
primaryTopic: string
): Link[] {
let links: Link[] = [];
let resource = primaryTopic;
if (resource == null || resource == undefined || resource == "")
resource = url;
let elementsLink = document.head.getElementsByTagName("link");
for (var i = 0; i != elementsLink.length; i++) {
let element = elementsLink.item(i);
let rel = element.getAttribute("rel");
let type = element.type;
let href = element.href;
if (rel == "meta") {
links.push(
new Link(href, [
{ name: "type", value: type },
{ name: "rel", value: "alternate" }
])
);
} else if (rel == "bookmark") {
links.push(new Link(href, [{ name: "rel", value: "bookmark" }]));
}
}
let elementsA = document.body.getElementsByTagName("a");
for (var i = 0; i != elementsA.length; i++) {
let element = elementsA.item(i);
if (element.href == null || element.href == undefined) continue;
if (!element.href.startsWith(resource)) continue;
let mimes = Object.keys(MIME).map((key: string) => MIME[key]);
for (var j = 0; j != mimes.length; j++) {
if (element.href.endsWith(mimes[j].fileExtension)) {
links.push(
new Link(element.href, [
{ name: "type", value: mimes[j].mime },
{ name: "rel", value: "alternate" }
])
);
break;
}
}
}
return links;
}
/**
* A typed raw content
*/
export interface RawContent {
/**
* The MIME type
*/
contentType: string;
/**
* The raw content
*/
content: string;
}
/**
* Content that can be observed for the detection of linked data
*/
export interface ObservableContent extends RawContent {
/**
* The URL of the content
*/
url: string;
/**
* The value for the Link HTTP header
*/
linkHeader: string;
}
/**
* Gets some observable content at a target URI
* @param target The target URI
*/
export function fetchObservableAt(target: string): Promise<ObservableContent> {
let accept = Object.keys(MIME)
.map(key => {
return MIME[key];
})
.sort((x: MimeInfo, y: MimeInfo) => {
return x.priority - y.priority;
})
.reduce((acc: string, mime: MimeInfo, index: number) => {
if (acc.length == 0) return mime.mime;
return acc + ", " + mime.mime + ";q=" + (1 - 0.1 * index).toString();
}, "");
accept += ", text/html;q=0.1";
return new Promise<ObservableContent>(
(
resolve: (result: ObservableContent) => void,
reject: (reason: any) => void
) => {
let xmlHttp = new XMLHttpRequest();
xmlHttp.onreadystatechange = function() {
if (xmlHttp.readyState == 4) {
if (xmlHttp.status < 200 && xmlHttp.status >= 300)
return reject("HTTP error: " + xmlHttp.status);
let contentType = xmlHttp.getResponseHeader("Content-Type");
let index = contentType.indexOf(";");
if (index > 0) contentType = contentType.substring(0, index);
resolve({
contentType: contentType,
content: xmlHttp.responseText,
url: target,
linkHeader: xmlHttp.getResponseHeader("Link")
});
}
};
xmlHttp.open("HEAD", target, true);
xmlHttp.setRequestHeader("Accept", accept);
xmlHttp.send();
}
);
}
/**
* Map of known MIME types to badge names
*/
......@@ -126,20 +312,6 @@ export const MIME: { [mime: string]: MimeInfo } = {
"application/ld+json": new MimeInfo("application/ld+json", "LD", 11, ".json")
};
/**
* A typed raw content
*/
export class RawContent {
/**
* The MIME type
*/
contentType: string;
/**
* The raw content
*/
content: string;
}
/**
* A source of data
*/
......@@ -184,6 +356,29 @@ export class DataSourceNone implements DataSource {
priority: number;
}
/**
* A data source with inline content
*/
export class DataSourceInline implements DataSource {
constructor(url: string, mime: MimeInfo, content: string) {
this.sourceType = "DataSourceInline";
this.name = "Inline content";
this.url = url;
this.contentType = mime.mime;
this.priority = mime.priority;
this.content = content;
}
sourceType: string;
name: string;
url: string;
contentType: string;
priority: number;
/**
* The inline content
*/
content: string;
}
/**
* The data about a page
*/
......@@ -268,7 +463,7 @@ function doFetchSourceLink(link: DataSourceLinked): Promise<RawContent> {
let xmlHttp = new XMLHttpRequest();
xmlHttp.onreadystatechange = function() {
if (xmlHttp.readyState == 4) {
if (xmlHttp.status != 200) {
if (xmlHttp.status < 200 && xmlHttp.status >= 300) {
reject("HTTP error: " + xmlHttp.status);
}
let ct = xmlHttp.getResponseHeader("Content-Type");
......@@ -292,6 +487,11 @@ export function fetchSource(source: DataSource): Promise<RawContent> {
return new Promise((resolve, reject) => reject("No data"));
} else if (source.sourceType == "DataSourceNone") {
return new Promise((resolve, reject) => reject("No data"));
} else if (source.sourceType == "DataSourceInline") {
let inline = source as DataSourceInline;
return new Promise((resolve, reject) =>
resolve({ contentType: inline.contentType, content: inline.content })
);
} else if (source.sourceType == "DataSourcePage") {
return doFetchSourcePage(source as DataSourcePage);
} else if (source.sourceType == "DataSourceLinked") {
......@@ -337,6 +537,69 @@ export class ResourceObservedData {
primaryTopic: string;
}
/**
* Observes the content of an observable to get the observed data
* @param observable An observable
*/
export function observeContent(
observable: ObservableContent
): ResourceObservedData {
if (MIME.hasOwnProperty(observable.contentType)) {
// Linked-data content at this URI
let links = parseLinks(observable.linkHeader);
let primary = detectTopicOnlinks(links);
let source =
observable.content != null
? new DataSourceInline(
observable.url,
MIME[observable.contentType],
observable.content
)
: new DataSourceLinked(
new Link(observable.url, [
{ name: "rel", value: "alternate" },
{ name: "type", value: observable.contentType }
])
);
let observations: ResourceObservedData = {
primaryTopic: primary == null ? "" : primary,
url: observable.url,
sources: [source]
};
return observations;
} else {
// no inline data, inspect the content
let links = parseLinks(observable.linkHeader);
let primary = detectTopicOnlinks(links);
if (
observable.contentType == "text/html" &&
observable.content != null &&
observable.content.length > 0
) {
// parse the html to retrieve its header
let doc = document.implementation.createDocument(
"http://www.w3.org/1999/xhtml",
"html",
null
);
let html = doc.createElement("html");
html.innerHTML = observable.content;
while (html.hasChildNodes()) doc.appendChild(html.firstChild);
let otherLinks = findLinksInDocument(doc, observable.url, primary);
links = links.concat(otherLinks);
if (primary == null || primary == undefined || primary == "")
primary = detectTopicOnlinks(links);
}
let sources = detectDataOnLinks(links);
let observations: ResourceObservedData = {
primaryTopic: primary == null || primary == undefined ? "" : primary,
url: observable.url,
sources: sources
};
return observations;
}
}
/**
* Gets whether data has been detected
* @param observation The current observation
......
......@@ -18,7 +18,7 @@
* with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
import { ResourceObservedData } from "./data";
import { ResourceObservedData, ObservableContent } from "./data";
import { definition } from "@logilab/libview";
import "chrome";
......@@ -176,3 +176,28 @@ export function getObservationsFor(
}
);
}
/**
* Gets some content at a target URI
* @param target The target URI
*/
export function fetchObservableAt(target: string): Promise<ObservableContent> {
return new Promise<ObservableContent>(
(
resolve: (observable: ObservableContent) => void,
reject: (reason: any) => void
) => {
chrome.runtime.sendMessage(
{
requestType: "FetchObservable",
payload: target
},
(result: { ok: ObservableContent; error: string }) => {
if (result.error != null && result.error.length > 0)
reject(result.error);
else resolve(result.ok);
}
);
}
);
}
......@@ -18,7 +18,7 @@
* with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/