Skip to content

Commit b494f47

Browse files
committed
C++ version of tag.sh
1 parent a66b494 commit b494f47

File tree

1 file changed

+117
-0
lines changed
  • extended-attributes-and-tags

1 file changed

+117
-0
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// tag.cpp
2+
//
3+
// program to interpret binary @SynoEAStream files and extract the tags (labels) from the com.apple.metadata:_kMDItemUserTags bplist structure
4+
// input:
5+
// usage: tag file
6+
// the parameter is either a @SynoEAStream file or the (mother) file to which the @SynoEAStream file belongs.
7+
// the script assumes that the com.apple.metadata:_kMDItemUserTags extended attribute is present in the file, so it is wise to grep first before calling this script
8+
// a very efficient way of doing this is with:
9+
// grep -rlF "com.apple.metadata:_kMDItemUserTags" <path> --include='*@SynoEAStream' | while read f ; do tag "$f" ; done
10+
// or look at the listtags script.
11+
// if you know which tag you're looking for (e.g. "Red"), a very efficient way is to further prefilter the list with:
12+
// grep -rlF "com.apple.metadata:_kMDItemUserTags" <path> --include='*@SynoEAStream' | xargs -d'\n' grep -alF <tag> | while read f ; do tag "$f" ; done
13+
// or look at the mk_tag_links script.
14+
// output:
15+
// prints the Finder tags (user tags and Finder labels) associated with file, each on a separate line,
16+
// effectively implementing the 'tag -l -N -g' -equivalent of the jdberry Python script version (--list --no-name --garrulous).
17+
// if the file does not contain tags (empty com.apple.metadata:_kMDItemUserTags bplist), the script prints nothing.
18+
// prints a msg on stderr when the input is not according to expectation (parse error).
19+
// note:
20+
// the formatting of the com.apple.metadata:_kMDItemUserTags varies considerably, depending which application wrote the extended attributes (tag, Finder) or
21+
// whether the list is empty or not (no com.apple.metadata:_kMDItemUserTags at all or empty bplist).
22+
// the bplist format itself is perfectly explained in https://medium.com/@karaiskc/understanding-apples-binary-property-list-format-281e6da00dbd
23+
24+
#include "get_attr.h"
25+
26+
#include <locale>
27+
#include <codecvt>
28+
29+
// string (utf8) -> u16string -> wstring
30+
static std::wstring utf8_to_utf16(const std::string& utf8)
31+
{
32+
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> convert;
33+
std::u16string utf16 = convert.from_bytes(utf8);
34+
std::wstring wstr(utf16.begin(), utf16.end());
35+
return wstr;
36+
}
37+
38+
// wstring -> u16string -> string (utf8)
39+
static std::string utf16_to_utf8(const std::wstring& utf16)
40+
{
41+
std::u16string u16str(utf16.begin(), utf16.end());
42+
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> convert;
43+
std::string utf8 = convert.to_bytes(u16str);
44+
return utf8;
45+
}
46+
47+
static std::string read(std::string& str, int n)
48+
{
49+
std::string result = str.substr(0,n);
50+
str = str.substr(n);
51+
return result;
52+
}
53+
54+
int main(int argc, char** argv)
55+
{
56+
std::string file = argv[1];
57+
std::string hex;
58+
if (! get_attr("com.apple.metadata:_kMDItemUserTags", file, true, true, hex)) return 1;
59+
60+
// (try to) read "bplist" - this SHOULD be at this position in the file or the offset referencing didn't work
61+
std::string bplist = read(hex, 12);
62+
if (bplist != tohex("bplist")) { std::cerr << "tag: error in " << file << ":bplist not found (found 0x" << bplist << ")" << LF ; return 1; }
63+
64+
std::string x,k,l,t;
65+
x = read(hex, 4); // read 4 hex digits (2 bytes) - this is the version number, usually "00" (but "14" and "18" has also been reported)
66+
// at this point, we're at the array
67+
x = read(hex, 1); // this should be the '0xAk' (array marker) - com.apple.metadata:_kMDItemUserTags is encoded as a bplist array of strings
68+
if ( x != "a" ) { std::cerr << "tag: error in " << file << ": array marker not found (found '" << x << "')" << LF; return 1; }
69+
k = read(hex, 1); // this should be the number of elements
70+
if ( k == "f" ) {
71+
// multi-byte array length: 0x1t kk [kk ...]
72+
t = read(hex, 1); if ( t != "1" ) { std::cerr << "tag: error in " << file << ": unexpected data in multi-byte array length parameter (" << t << ")" << LF; return 1; }
73+
t = read(hex, 1);
74+
k = read(hex, 2*(1<<fromhex(t))); // the 4 bits after '1' defines how may bytes we need to describe the length: 2^t bytes
75+
}
76+
int kk = fromhex(k); // hex to decimal
77+
//std::cerr << "- '" << x << "' " << kk << LF;
78+
x = read(hex, 2*kk); // skip the object refs
79+
// at this point, we're at the actual tag strings. these are preceded by 0x5l or 0x6l length byte(s)
80+
while ( kk>0 ) {
81+
x = read(hex, 1);
82+
l = read(hex, 1);
83+
//std::cerr << "-- " << x << l << LF;
84+
if (! ( x == "5" || x == "6" ) ) { std::cerr << "tag: error in " << file << ": string marker not found (found 0x" << x << l << ")" << LF; return 1; }
85+
if ( l == "f" ) {
86+
// multi-byte string length: 0x1t kk [kk ...]
87+
t = read(hex, 1) ; if ( t != "1" ) { std::cerr << "tag: error in " << file << ": unexpected data in multi-byte string length parameter (" << t << ")"; return 1; }
88+
t = read(hex, 1);
89+
//std::cerr << "--- 1" << t << LF;
90+
l = read(hex, 2*(1<<fromhex(t))); // the 4 bits after '1' defines how may bytes we need to describe the length: 2^t bytes
91+
}
92+
//std::cerr << "---- " << l << LF;
93+
int ll=fromhex(l); // hex to decimal
94+
if ( x == "5" ) { // regular ASCII string. note that as soon as you use a UTF-8 character, the string becomes UTF-16 ($x is 6)
95+
while ( ll>0 ) {
96+
// read the string but ignore the "\n<digit>" at the end, if found
97+
x = read(hex, 2); if ( x == "0a" && ll == 2 ) { x = read(hex, 2) ; break; }
98+
std::cout << static_cast<char>(fromhex(x));
99+
--ll;
100+
}
101+
std::cout << LF;
102+
}
103+
else if ( x == "6" ) { // UTF-16 string (output as UTF-8)
104+
std::wstring out;
105+
while ( ll>0 ) {
106+
// read the string but ignore the "\n<digit>" at the end, if found
107+
x = read(hex, 4); if ( x == "000a" && ll == 2 ) { x = read(hex, 4) ; break; }
108+
out += static_cast<wchar_t>(fromhex(x));
109+
--ll;
110+
}
111+
std::cout << utf16_to_utf8(out) << LF;
112+
}
113+
--kk;
114+
}
115+
}
116+
117+
//EOF

0 commit comments

Comments
 (0)