\r\n

51Degrees Device Detection C/C++  4.4

A device detection library that is used natively or by 51Degrees products

Hash/OfflineProcessing.c

Provides an example of processing a YAML file containing evidence for device detection.There are 20,000 examples in the supplied file of evidence representing HTTP Headers. For example:

header.user-agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
header.sec-ch-ua: '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"'
header.sec-ch-ua-full-version: '"98.0.4758.87"'
header.sec-ch-ua-mobile: '?0'
header.sec-ch-ua-platform: '"Android"'

We create a resource manager and results hash to read the data and find out about the associated device, we write this data to a YAML formatted output stream.

As well as explaining the basic operation of offline processing using the defaults, for advanced operation this example can be used to experiment with tuning device detection for performance and predictive power using Performance Profile, Graph and Difference and Drift settings.

This example is available in full on GitHub

/* *********************************************************************
* This Original Work is copyright of 51 Degrees Mobile Experts Limited.
* Copyright 2023 51 Degrees Mobile Experts Limited, Davidson House,
* Forbury Square, Reading, Berkshire, United Kingdom RG1 3EU.
*
* This Original Work is licensed under the European Union Public Licence
* (EUPL) v.1.2 and is subject to its terms as set out below.
*
* If a copy of the EUPL was not distributed with this file, You can obtain
* one at https://opensource.org/licenses/EUPL-1.2.
*
* The 'Compatible Licences' set out in the Appendix to the EUPL (as may be
* amended by the European Commission) shall be deemed incompatible for
* the purposes of the Work and the provisions of the compatibility
* clause in Article 5 of the EUPL shall not apply.
*
* If using the Work as, or as part of, a network application, by
* including the attribution notice(s) required under Article 5 of the EUPL
* in the end user terms of the application under an appropriate heading,
* such notice(s) shall fulfill the requirements of that article.
* ********************************************************************* */
// Include ExmapleBase.h before others as it includes Windows 'crtdbg.h'
// which requires to be included before 'malloc.h'.
#include "ExampleBase.h"
#include "../../../src/hash/hash.h"
#include "../../../src/common-cxx/textfile.h"
#include "../../../src/hash/fiftyone.h"
static const char *dataDir = "device-detection-data";
// In this example, by default, the 51degrees "Lite" file needs to be somewhere in the
// project space, or you may specify another file as a command line parameter.
//
// Note that the Lite data file is only used for illustration, and has limited accuracy
// and capabilities. Find out about the Enterprise data file on our pricing page:
// https://51degrees.com/pricing
static const char *dataFileName = "51Degrees-LiteV4.1.hash";
// This file contains the 20,000 most commonly seen combinations of header values
// that are relevant to device detection. For example, User-Agent and UA-CH headers.
static const char *evidenceFileName = "20000 Evidence Records.yml";
static char valueBuffer[1024] = "";
static const size_t valueBufferLength = sizeof(valueBuffer) / sizeof(valueBuffer[0]);
// We use the low memory profile as its performance is sufficient for this
// example. See the documentation for more detail on this and other
// configuration options:
// http://51degrees.com/documentation/_device_detection__features__performance_options.html
// http://51degrees.com/documentation/_features__automatic_datafile_updates.html
// http://51degrees.com/documentation/_features__usage_sharing.html
// #define CONFIG fiftyoneDegreesHashInMemoryConfig
// #define CONFIG fiftyoneDegreesHashHighPerformanceConfig
#define CONFIG fiftyoneDegreesHashLowMemoryConfig
// #define CONFIG fiftyoneDegreesHashBalancedConfig
// #define CONFIG fiftyoneDegreesHashBalancedTempConfig
typedef struct t_offline_processing_state {
FILE *outputFile;
FILE *outputLog;
ResultsHash *results;
static void outputDeviceId(
ResultsHash *results,
const char *name,
FILE *output) {
char buffer[50];
HashGetDeviceIdFromResults (results, buffer, 50, exception);
fprintf(output, "%s: %s\n", name, buffer);
}
static void outputValue(
ResultsHash *results,
const char *name,
const char* propertyName,
FILE *output) {
DataSetHash* dataset = (DataSetHash*)results->b.b.dataSet;
dataset->b.b.available,
propertyName);
// If a value has not been set then trying to access the value will
// result in an exception.
results, requiredIndex, exception)) {
results,
propertyName,
valueBuffer,
valueBufferLength,
",",
exception);
}
else {
// A no value message can also be obtained. This message describes why
// the value has not been set.
ResultsHashGetNoValueReason(results, requiredIndex, exception);
snprintf(valueBuffer, valueBufferLength, "Unknown (%s)", ResultsHashGetNoValueReasonMessage(reason));
}
fprintf(output, "%s: %s\n", name, valueBuffer);
}
static void analyse(
ResultsHash* results,
FILE* outputFile) {
// Information required for detection is called "evidence"
// and usually consists of a number of HTTP Header field
// values, in this case represented by a
// Object of header name/value entries.
// list the evidence
fprintf(outputFile, "---\n");
for (uint32_t i = 0; i < evidence->count; i++) {
fprintf(outputFile,
"%s%s: %s\n",
}
ResultsHashFromEvidence(results, evidence, exception);
outputValue(results, "device.ismobile", "IsMobile", outputFile);
outputValue(results, "device.platformname", "PlatformName", outputFile);
outputValue(results, "device.platformversion", "PlatformVersion", outputFile);
outputValue(results, "device.browsername", "BrowserName", outputFile);
outputValue(results, "device.browserversion", "BrowserVersion", outputFile);
// DeviceId is a unique identifier for the combination of hardware, operating
// system, browser and crawler that has been detected.
// Our device detection solution uses machine learning to find the optimal
// way to identify devices based on the real-world evidence values that we
// observe each day.
// As this changes over time, the result of detection can potentially change
// as well. By storing the device id, we can use this as a lookup in future
// rather than performing detection with the original evidence again.
// Do this by passing an evidence entry with:
// key = query.51D_ProfileIds
// value = [the device id]
// This is much faster and avoids the potential for getting a different
// result.
outputDeviceId(results, "device.deviceid", outputFile);
}
static void process(KeyValuePair *pairs, uint16_t size, void *state) {
// Create an evidence collection and add the evidence to the collection
EvidenceKeyValuePairArray* evidenceArray = EvidenceCreate(size);
for (uint32_t i = 0; i < size; i++) {
// Get prefix
EvidencePrefixMap *prefixMap = EvidenceMapPrefix(pairs[i].key);
// Add the evidence as string
evidenceArray,
prefixMap->prefixEnum,
pairs[i].key + prefixMap->prefixLength,
pairs[i].value);
}
analyse(
offline->results,
evidenceArray,
offline->outputFile);
// Ensure the evidence collection is freed after used
EvidenceFree(evidenceArray);
}
void run(
ResourceManager *manager,
const char *evidenceFilePath,
const char *outputFilePath,
FILE *output) {
char buffer[1000];
// Open a fresh data file for writing the output to.
FileDelete(outputFilePath);
state.outputFile = fopen(outputFilePath, "w");
if (state.outputFile == NULL) {
fprintf(output, "Could not open file %s for write\n", outputFilePath);
return;
}
state.outputLog = output;
// Get the results and data set from the manager. Use a higher closest
// number of signatures than the default because performance is less
// important for offline processing and expanding the number of
// alternatives evaluated can lead to a better result.
state.results = ResultsHashCreate(manager, 10, 10);
KeyValuePair pair[10];
char key[10][500];
char value[10][1000];
for (int i = 0; i < 10; i++) {
pair[i].key = key[i];
pair[i].keyLength = 500;
pair[i].value = value[i];
pair[i].valueLength = 1000;
}
// Perform offline processing.
evidenceFilePath,
buffer,
sizeof(buffer),
pair,
10,
&state,
process
);
fprintf(state.outputFile, "...\n");
fclose(state.outputFile);
fprintf(output, "Output Written to %s\n", outputFilePath);
DataSetHash* dataset = (DataSetHash *)state.results->b.b.dataSet;
fiftyoneDegreesExampleCheckDataFile(dataset);
// Free the memory used by the results instance.
}
static void reportStatus(
StatusCode status,
const char* fileName) {
const char *message = StatusGetMessage(status, fileName);
Free((void*)message);
}
void fiftyoneDegreesOfflineProcessingRun(
const char *dataFilePath,
const char *evidenceFilePath,
const char *outputFilePath,
const char *requiredProperties,
ConfigHash config,
FILE* output) {
// Set concurrency to ensure sufficient shared resources available.
config.nodes.concurrency =
config.strings.concurrency = 1;
// If time can be sacrificed for a more thorough analysis of the different
// options then increase the closestSignatures value. More permutations
// will be considered the higher the number.
// config.closestSignatures = 50000;
// Set the required properties for the output file.
properties.string = requiredProperties;
// Set the User-Agent update so that we can output the sub strings found
// and not the entire User-Agent.
config.b.updateMatchedUserAgent = true;
ResourceManager manager;
&manager,
&config,
&properties,
dataFilePath,
exception);
printf("%s\n", dataFilePath);
if (status != SUCCESS) {
reportStatus(status, dataFilePath);
}
else {
// Process the evidence writing the results to the output path.
run(&manager, evidenceFilePath, outputFilePath, output);
// Free the memory used by the data set.
fprintf(output, "Processing complete. See results in: '%s'", outputFilePath);
}
}
void fiftyoneDegreesExampleCOfflineProcessingRun(ExampleParameters *params) {
// Call the actual function.
fiftyoneDegreesOfflineProcessingRun(
params->dataFilePath,
params->evidenceFilePath,
params->outputFilePath,
params->propertiesString,
*params->config,
params->output);
}
#ifndef TEST
static const char* getBaseName(const char* path) {
for (size_t i = strlen(path) - 1; i >= 0; i--) {
if (path[i] == '\\' || path[i] == '/') {
// Check if there is a basename
if (i == (strlen(path) - 1)) {
return NULL;
}
return path + i + 1;
}
}
return path;
}
int main(int argc, char* argv[]) {
StatusCode status = SUCCESS;
char dataFilePath[FILE_MAX_PATH];
char evidenceFilePath[FILE_MAX_PATH];
char outputFilePath[FILE_MAX_PATH];
// Set data file path
if (argc > 1) {
strcpy(dataFilePath, argv[1]);
}
else {
status = FileGetPath(
dataDir,
dataFileName,
dataFilePath,
sizeof(dataFilePath));
if (status != SUCCESS) {
printf(("Failed to find a device detection "
"data file. Make sure the device-detection-data "
"submodule has been updated by running "
"`git submodule update --recursive`"));
fgetc(stdin);
return 1;
}
}
// Set evidence file path
if (argc > 2) {
strcpy(evidenceFilePath, argv[2]);
}
else {
status = FileGetPath(
dataDir,
evidenceFileName,
evidenceFilePath,
sizeof(evidenceFilePath));
if (status != SUCCESS) {
reportStatus(status, evidenceFilePath);
fgetc(stdin);
return 1;
}
}
// Set output file path
if (argc > 3) {
strcpy(outputFilePath, argv[3]);
}
else {
const char* baseName = getBaseName(evidenceFilePath);
if (baseName == NULL) {
printf("Invalid evidence file path.\n");
fgetc(stdin);
return 1;
}
size_t cpySize = baseName - evidenceFilePath;
strncpy(outputFilePath, evidenceFilePath, cpySize);
strcpy(outputFilePath + cpySize, "offline-processing-output.yml");
}
ConfigHash config = CONFIG;
ExampleParameters params;
params.dataFilePath = dataFilePath;
params.evidenceFilePath = evidenceFilePath;
params.outputFilePath = outputFilePath;
params.propertiesString = argc > 4 ? argv[4] : "";
params.config = &config;
params.output = stdout;
// Run the example
fiftyoneDegreesExampleMemCheck(
&params,
fiftyoneDegreesExampleCOfflineProcessingRun);
// Wait for a character to be pressed.
fgetc(stdin);
return 0;
}
#endif